From 895e46bca0967df8790f004d72d3bd2206a73901 Mon Sep 17 00:00:00 2001
From: "d.buechler" <d.buechler@adito.de>
Date: Fri, 8 Nov 2019 10:16:23 +0100
Subject: [PATCH] If a fields value is empty, it gets replaced with a wildcard
 (*) removed all loggings except for the pattern the scan pattern now gets
 automatically trimmed after loading

---
 process/DuplicateScanner_lib/process.js | 129 +++++++++++++-----------
 1 file changed, 68 insertions(+), 61 deletions(-)

diff --git a/process/DuplicateScanner_lib/process.js b/process/DuplicateScanner_lib/process.js
index 3759c2a2ad..4bd19f78e4 100644
--- a/process/DuplicateScanner_lib/process.js
+++ b/process/DuplicateScanner_lib/process.js
@@ -278,7 +278,7 @@ DuplicateScannerUtils.GetCachedDuplicatesForClusterId = function(pClusterId)
  *          {
  *              let entityFieldName = resultFields[b];
  *              let indexFieldName = indexResultFields[entityFieldName];
- *              //logging.log("Entity Field -> "+ pPossibleDuplicatesRay[i][indexFieldName]);
+ *              ////logging.log("Entity Field -> "+ pPossibleDuplicatesRay[i][indexFieldName]);
  *              //format values
  *          }
  *      }
@@ -303,19 +303,19 @@ DuplicateScannerUtils.RebuildDuplicatesCache = function(pFilterName, pTargetEnti
     let entityFields = _DuplicateScannerUtils._loadEntityFieldsFromPattern(indexPattern);
     let entityIdField = _DuplicateScannerUtils._loadEntityIdField(pFilterName, pTargetEntity);
     
-    logging.log("indexPattern -> " +indexPattern );
+    //logging.log("indexPattern -> " +indexPattern );
     
-    logging.log("entityIdField -> " + entityIdField);
+    //logging.log("entityIdField -> " + entityIdField);
     
     //Add the id field to ensure that it's on the first position
     entityFields = [entityIdField].concat(entityFields);
     
-    logging.log("entityFields -> " + entityFields);
+    //logging.log("entityFields -> " + entityFields);
     
     let alreadyIdentifiedIds = [];
 
     let targetRecords = DuplicateScannerUtils.LoadEntityRecords(pTargetEntity, entityFields, 0, pRecordsBlockSize);
-    //logging.log("Initialer Block geladen targetRecords-> " + JSON.stringify(targetRecords));
+    ////logging.log("Initialer Block geladen targetRecords-> " + JSON.stringify(targetRecords));
 
     let currentRecordIndex = pRecordsBlockSize;
     while(targetRecords.length > 0)
@@ -323,29 +323,29 @@ DuplicateScannerUtils.RebuildDuplicatesCache = function(pFilterName, pTargetEnti
         foundDuplicateIds = DuplicateScannerUtils.ScanRecords(pTargetEntity, targetRecords,
             entityFields, resultFields, useExternalWebservice,
             pFormatValuesConsumeWebserviceCallback, alreadyIdentifiedIds, indexPattern);
-        //logging.log("gefundene ids zum ignorieren foundDuplicateIds -> " + foundDuplicateIds);
+        ////logging.log("gefundene ids zum ignorieren foundDuplicateIds -> " + foundDuplicateIds);
         alreadyIdentifiedIds = alreadyIdentifiedIds.concat(foundDuplicateIds);
-        //logging.log("Gesamte ignorierListe -> " + alreadyIdentifiedIds);
+        ////logging.log("Gesamte ignorierListe -> " + alreadyIdentifiedIds);
         if(targetRecords.length < pRecordsBlockSize)
         {
-            logging.log("weniger records geladen als möglich => Ende der verfügbaren Records -> Abbrechen");
+            //logging.log("weniger records geladen als möglich => Ende der verfügbaren Records -> Abbrechen");
             break;
         }
 
         //logging.log("Nächster Block wird geladen mit startRow -> " + currentRecordIndex);
 
-        targetRecords = DuplicateScannerUtils.LoadEntityRecords(pTargetEntity, entityFieldsToLoad,
+        targetRecords = DuplicateScannerUtils.LoadEntityRecords(pTargetEntity, entityFields,
             currentRecordIndex, pRecordsBlockSize);
 
         currentRecordIndex += pRecordsBlockSize;
-    //logging.log("Nächster Block geladen mit targetRecords -> " + JSON.stringify(targetRecords));
+    ////logging.log("Nächster Block geladen mit targetRecords -> " + JSON.stringify(targetRecords));
     }
 }
 
 DuplicateScannerUtils.ScanRecords = function(pTargetEntity, pTargetRecordsData,
     pEntityFields, pResultFields, pUseExternalWebservice, pFormatValuesConsumeWebserviceCallback, pAlreadyIdentifiedIds, pIndexPattern)
 {
-    //logging.log("in ScanRecords -> ");
+    ////logging.log("in ScanRecords -> ");
     let foundDuplicateIds = [];
 
     //If the contact id loading query has no results, stop.
@@ -355,27 +355,27 @@ DuplicateScannerUtils.ScanRecords = function(pTargetEntity, pTargetRecordsData,
 
     //First it gets checked if the current id has already been identified. If that's the case it'll continue with the next.
     //Otherwise an object gets build in the form of ["FilterFieldName" = "FilterFieldValueFromQuery"] with which a scan for possible duplicates get's started
-    //logging.log("configured pResultFields -> " + pResultFields);
+    ////logging.log("configured pResultFields -> " + pResultFields);
     var duplicatesToInsertQueries = [];
     for (b = 0; b < pTargetRecordsData.length; b++)
     {
-        //logging.log("Nächster Datensatz in RebuildDuplicatesCache -> " + b);
-        //        logging.log("Neuer Record -> " + pTargetRecordsData[b]);
+        ////logging.log("Nächster Datensatz in RebuildDuplicatesCache -> " + b);
+        //        //logging.log("Neuer Record -> " + pTargetRecordsData[b]);
 
-        //logging.log("pTargetRecordsData[b] -> " + JSON.stringify(pTargetRecordsData[b]));
+        ////logging.log("pTargetRecordsData[b] -> " + JSON.stringify(pTargetRecordsData[b]));
         let entityFieldValuesRay = DuplicateScannerUtils.BuildEntityFieldNameValueRays(pEntityFields, pTargetRecordsData[b]);
-        logging.log("Werte des Datensatzes entityFieldValuesRay -> " + JSON.stringify(entityFieldValuesRay));
+        //logging.log("Werte des Datensatzes entityFieldValuesRay -> " + JSON.stringify(entityFieldValuesRay));
 
         //The first field in this Array must always be the configured id field. This is ensured using onValidation-logic
         let idField = entityFieldValuesRay[0][0];
         let idValue = entityFieldValuesRay[0][1];
-                logging.log("idField -> " + idField);
-                logging.log("idValue -> " + idValue);
-        //logging.log("pTargetRecordsData[b][idField] -> " + pTargetRecordsData[b][idField]);
+                //logging.log("idField -> " + idField);
+                //logging.log("idValue -> " + idValue);
+        ////logging.log("pTargetRecordsData[b][idField] -> " + pTargetRecordsData[b][idField]);
         //If the current Id has already been identified, continue
         if(pAlreadyIdentifiedIds.indexOf(pTargetRecordsData[b][idField]) > -1)
         {
-            //logging.log("Id schon behandelt, continue; -> ");
+            ////logging.log("Id schon behandelt, continue; -> ");
             continue;
         }
 
@@ -384,17 +384,17 @@ DuplicateScannerUtils.ScanRecords = function(pTargetEntity, pTargetRecordsData,
 
         if(foundDuplicates == null || foundDuplicates.length == 0)
         {
-            logging.log("Keine Datensätze gefunden  continue;-> ");
+            //logging.log("Keine Datensätze gefunden  continue;-> ");
             continue;
         }
-        logging.log("foundDuplicates.length nach _scanForDuplicates -> " + foundDuplicates.length);
+        //logging.log("foundDuplicates.length nach _scanForDuplicates -> " + foundDuplicates.length);
 
         //Insert all found duplicate ids into an cache array because those ids don't have to be checked again lateron.
 
         for (let i = 0; i < foundDuplicates.length; i++)
         {
             let localId = foundDuplicates[i][indexsearch.FIELD_ID];
-            logging.log("foundDuplicates[i] -> " + foundDuplicates[i]);
+            //logging.log("foundDuplicates[i] -> " + foundDuplicates[i]);
             foundDuplicateIds.push(localId);
         }
 
@@ -402,14 +402,14 @@ DuplicateScannerUtils.ScanRecords = function(pTargetEntity, pTargetRecordsData,
 
         //The duplicates list contains only the found duplicates to the original id, therefore it get's added manually
         foundDuplicateIds.push(pTargetRecordsData[b][idField]);
-        logging.log("foundDuplicates -> " + JSON.stringify(foundDuplicates));
-        logging.log("foundDuplicateIds -> " + JSON.stringify(foundDuplicateIds));
+        //logging.log("foundDuplicates -> " + JSON.stringify(foundDuplicates));
+        //logging.log("foundDuplicateIds -> " + JSON.stringify(foundDuplicateIds));
 
         let insertQueriesRay = _DuplicateScannerUtils._createInsertDuplicatesClusterQuery(foundDuplicateIds, pTargetEntity)
         duplicatesToInsertQueries = duplicatesToInsertQueries.concat(insertQueriesRay);
         foundDuplicateIds = [];
     }
-    logging.log("duplicatesToInsertQueries -> " + JSON.stringify(duplicatesToInsertQueries));
+    //logging.log("duplicatesToInsertQueries -> " + JSON.stringify(duplicatesToInsertQueries));
     db.inserts(duplicatesToInsertQueries, db.getCurrentAlias(), 10 * datetime.ONE_MINUTE);
     return foundDuplicateIds;
 }
@@ -447,7 +447,7 @@ DuplicateScannerUtils.CacheNewScanResults = function(pNewRecordId, pDuplicateIds
     else
         idRayToInsert.push(pNewRecordId);
     
-    //logging.log("idRayToInsert -> " + idRayToInsert);
+    ////logging.log("idRayToInsert -> " + idRayToInsert);
     insertQueriesRay = _DuplicateScannerUtils._createInsertDuplicatesClusterQuery(idRayToInsert, pTargetEntity, clusterId)
 
     return db.inserts(insertQueriesRay);
@@ -477,7 +477,7 @@ DuplicateScannerUtils.GetClusterWithIdenticalDuplicates = function(pDuplicateIds
     clusterIdCondition = clusterIdConditionBuilder.buildSql("select distinct CLUSTERID from DUPLICATECLUSTERS", "1=2");
     
     let foundClusterId = db.cell(clusterIdCondition);
-    //logging.log("clusterid des clusters der die gleichen ids enthält-> " + foundClusterId);
+    ////logging.log("clusterid des clusters der die gleichen ids enthält-> " + foundClusterId);
     if(foundClusterId == null || foundClusterId == "")
         return RESULT_NO_CLUSTER_FOUND;
     
@@ -485,8 +485,8 @@ DuplicateScannerUtils.GetClusterWithIdenticalDuplicates = function(pDuplicateIds
     .andPrepare("DUPLICATECLUSTERS.CLUSTERID", foundClusterId)
     .buildSql("select DUPLICATEID from DUPLICATECLUSTERS");
     let duplicatesInCluster = db.array(db.COLUMN, duplicatesInClusterCondition);
-    //logging.log("länge gefundener cluster -> " + duplicatesInCluster.length);
-    //logging.log("länge der angefragten ids -> " + pDuplicateIds.length);
+    ////logging.log("länge gefundener cluster -> " + duplicatesInCluster.length);
+    ////logging.log("länge der angefragten ids -> " + pDuplicateIds.length);
     /* 
      * A cluster has been searched which contains all duplicate ids as specified via parameter.
      * There's the possibility that this cluster contains even more duplicates than specified via the parameter.
@@ -593,7 +593,7 @@ DuplicateScannerUtils.LoadIndexFieldsConfiguration = function(pFilterName, pTarg
  *         {
  *             let entityFieldName = resultFields[b];
  *             let indexFieldName = indexResultFields[entityFieldName];
- *             //logging.log("Entity Field -> "+ pPossibleDuplicatesRay[i][indexFieldName]);
+ *             ////logging.log("Entity Field -> "+ pPossibleDuplicatesRay[i][indexFieldName]);
  *             //format values
  *         }
  *     }
@@ -622,7 +622,7 @@ DuplicateScannerUtils.ScanForDuplicates = function(pFilterName, pTargetEntity, p
     let entityFieldValuesRay = DuplicateScannerUtils.BuildEntityFieldNameValueRays(entityFields, pValuesToCheck);
    
    //The first field in this Array must always be the configured id field.
-    logging.log("ray " + entityFieldValuesRay.toSource())
+    //logging.log("ray " + entityFieldValuesRay.toSource())
     let idField = entityFieldValuesRay[0][0];
     let idValue = entityFieldValuesRay[0][1];
 
@@ -643,7 +643,7 @@ DuplicateScannerUtils.ScanForDuplicates = function(pFilterName, pTargetEntity, p
  *  let entityResultFields = ["LASTNAME"];
  *  let entityIndexFields = DuplicateScannerUtils.TranslateEntityToIndexFields("Person_entity", entityResultFields);
  *
- *  logging.log(entityIndexFields["LASTNAME"]);//=> "LASTNAME_value"
+ *  //logging.log(entityIndexFields["LASTNAME"]);//=> "LASTNAME_value"
  */
 DuplicateScannerUtils.TranslateEntityToIndexFields = function(pEntityName, pEntityFields)
 {
@@ -697,15 +697,15 @@ DuplicateScannerUtils.MergePerson = function(pSourceContactId, pTargetContactId)
     deleteStatements = deleteStatements.concat(_DuplicateScannerUtils._buildDeletePersonAndContactQuery(sourcePersonId, pSourceContactId));
     deleteStatements = deleteStatements.concat(_DuplicateScannerUtils._buildDeleteCachedUnrelatedDuplicateQuery(pSourceContactId));
 
-    //logging.log("updateStatementsCurrentAlias -> " + JSON.stringify(updateStatementsCurrentAlias));
-    //logging.log("deleteStatements -> " + JSON.stringify(deleteStatements));
+    ////logging.log("updateStatementsCurrentAlias -> " + JSON.stringify(updateStatementsCurrentAlias));
+    ////logging.log("deleteStatements -> " + JSON.stringify(deleteStatements));
 
     let affectedRowsCurrentAlias = db.updates(updateStatementsCurrentAlias);
     let affectedRowsSystemAlias = db.updates(updateStatementsSystemAlias, SqlUtils.getSystemAlias());
     let deletedRows = db.deletes(deleteStatements)
 
-    //logging.log("affectedRowsCurrentAlias -> " + affectedRowsCurrentAlias);
-    //logging.log("deletedRows -> " + deletedRows);
+    ////logging.log("affectedRowsCurrentAlias -> " + affectedRowsCurrentAlias);
+    ////logging.log("deletedRows -> " + deletedRows);
 
     DuplicateScannerUtils.DeleteCachedDuplicate(pSourceContactId);
 
@@ -750,15 +750,15 @@ DuplicateScannerUtils.MergeOrganisation = function(pSourceContactId, pTargetCont
     deleteStatements = deleteStatements.concat(_DuplicateScannerUtils._buildDeleteOrganisationAndContactQuery(sourceOrganisationId, pSourceContactId));
     deleteStatements = deleteStatements.concat(_DuplicateScannerUtils._buildDeleteCachedUnrelatedDuplicateQuery(pSourceContactId));
 
-    //logging.log("updateStatementsCurrentAlias -> " + JSON.stringify(updateStatementsCurrentAlias));
-    //logging.log("deleteStatements -> " + JSON.stringify(deleteStatements));
+    ////logging.log("updateStatementsCurrentAlias -> " + JSON.stringify(updateStatementsCurrentAlias));
+    ////logging.log("deleteStatements -> " + JSON.stringify(deleteStatements));
 
     let affectedRowsCurrentAlias = db.updates(updateStatementsCurrentAlias);
     let affectedRowsSystemAlias = db.updates(updateStatementsSystemAlias, SqlUtils.getSystemAlias());
     let deletedRows = db.deletes(deleteStatements)
 
-    //logging.log("affectedRowsCurrentAlias -> " + affectedRowsCurrentAlias);
-    //logging.log("deletedRows -> " + deletedRows);
+    ////logging.log("affectedRowsCurrentAlias -> " + affectedRowsCurrentAlias);
+    ////logging.log("deletedRows -> " + deletedRows);
 
     DuplicateScannerUtils.DeleteCachedDuplicate(pSourceContactId);
 
@@ -792,8 +792,8 @@ DuplicateScannerUtils.BuildEntityFieldNameValueRays = function(pDuplicateFieldsC
     */
     for (a = 0; a < pDuplicateFieldsConfig.length; a++)
     {
-        //        logging.log("pDuplicateFieldsConfig[a][1] -> " + pDuplicateFieldsConfig[a][INDEX_CONFIG_ENTITY_FIELD]);
-        //        logging.log(" pTargetRecordData[a] -> " + pTargetRecordData[a]);
+        //        //logging.log("pDuplicateFieldsConfig[a][1] -> " + pDuplicateFieldsConfig[a][INDEX_CONFIG_ENTITY_FIELD]);
+        //        //logging.log(" pTargetRecordData[a] -> " + pTargetRecordData[a]);
         let entityField = pDuplicateFieldsConfig[a];
         let entityFieldValue = pTargetRecordData[entityField];
         
@@ -840,8 +840,11 @@ _DuplicateScannerUtils._loadIndexPattern = function(pScannerName, pTargetEntity)
     .and("DUPLICATESCANNER.ENTITY_TO_SCAN_NAME = '" + pTargetEntity + "'")
     .and("DUPLICATESCANNER.FILTER_NAME = '" + pScannerName + "'")
     .buildSql("select SCAN_PATTERN from DUPLICATESCANNER");
-
-    return db.cell(scanPatternQuery);
+    
+    let scanPattern = db.cell(scanPatternQuery);
+    scanPattern = scanPattern.trim();
+    
+    return scanPattern;
 }
 
 _DuplicateScannerUtils._loadEntityFieldsFromPattern = function(indexPattern)
@@ -849,7 +852,7 @@ _DuplicateScannerUtils._loadEntityFieldsFromPattern = function(indexPattern)
     return indexPattern.match(/[^{}]+(?=\})/g);
 }
 
-_DuplicateScannerUtils._replacePlaceholderForValuesInPattern = function(pIndexPattern, pEntityFieldValueRays)
+_DuplicateScannerUtils._replacePlaceholderForValuesInPattern = function(pIndexPattern, pEntityFieldValueRays, pDefaultValue)
 {
     let INDEX_ENTITY_FIELD_NAME = 0;
     let INDEX_ENTITY_FIELD_VALUE = 1;
@@ -861,8 +864,11 @@ _DuplicateScannerUtils._replacePlaceholderForValuesInPattern = function(pIndexPa
         placeholder = "{" + pEntityFieldValueRays[i][INDEX_ENTITY_FIELD_NAME] + "}";
         fieldValue = pEntityFieldValueRays[i][INDEX_ENTITY_FIELD_VALUE];
         
-        logging.log("placeholder -> " + placeholder);
-        logging.log("fieldValue -> " + fieldValue);
+        if(pDefaultValue != null && (fieldValue == null || fieldValue == ""))
+            fieldValue = pDefaultValue;
+        
+        //logging.log("placeholder -> " + placeholder);
+        //logging.log("fieldValue -> " + fieldValue);
         pIndexPattern = pIndexPattern.replace(placeholder, fieldValue);
     }
     return pIndexPattern;
@@ -890,14 +896,14 @@ pResultFields, pRecordIdFieldToIgnore, pRecordIdValueToIgnore, pFormatValuesCons
     
     let possibleDuplicates = [];
     let ignoreSourceRecordPattern = _DuplicateScannerUtils._getIgnoreSourceRecordPattern(pRecordIdFieldToIgnore, pRecordIdValueToIgnore);
-   logging.log("ignoreSourceRecordPattern -> " + ignoreSourceRecordPattern);
-    let indexPatternWithValues = _DuplicateScannerUtils._replacePlaceholderForValuesInPattern(pIndexPattern, pEntityFieldNameValueRays);
-    logging.log("indexPatternWithValues -> " + indexPatternWithValues);
+   //logging.log("ignoreSourceRecordPattern -> " + ignoreSourceRecordPattern);
+    let indexPatternWithValues = _DuplicateScannerUtils._replacePlaceholderForValuesInPattern(pIndexPattern, pEntityFieldNameValueRays, "*");
+    //logging.log("indexPatternWithValues -> " + indexPatternWithValues);
     indexPatternWithValues = ignoreSourceRecordPattern + indexPatternWithValues + ")";
-    logging.log("indexPatternWithValues -> " + indexPatternWithValues);
+    //logging.log("indexPatternWithValues -> " + indexPatternWithValues);
     possibleDuplicates = _DuplicateScannerUtils._callIndexSearch(pTargetEntity, indexPatternWithValues, pResultFields, 100);
     
-    logging.log("possibleDuplicates -> " + JSON.stringify(possibleDuplicates));
+    //logging.log("possibleDuplicates -> " + JSON.stringify(possibleDuplicates));
     
     if(possibleDuplicates == null)
         return null;
@@ -907,11 +913,11 @@ pResultFields, pRecordIdFieldToIgnore, pRecordIdValueToIgnore, pFormatValuesCons
     if(pUseExternalWebservice && pFormatValuesConsumeWebserviceCallback != null)
         possibleDuplicates = pFormatValuesConsumeWebserviceCallback.apply(this, [possibleDuplicates]);
 
-//    //logging.log("pTargetEntity -> " + pTargetEntity);
-//    //logging.log("preFilter -> " + preFilter);
-//    //logging.log("pFilterFieldValueRays -> " + pFilterFieldValueRays);
-//    //logging.log("pRecordIdFieldToIgnore -> " + pRecordIdFieldToIgnore);
-//    //logging.log("possibleDuplicates -> " + possibleDuplicates);
+//    ////logging.log("pTargetEntity -> " + pTargetEntity);
+//    ////logging.log("preFilter -> " + preFilter);
+//    ////logging.log("pFilterFieldValueRays -> " + pFilterFieldValueRays);
+//    ////logging.log("pRecordIdFieldToIgnore -> " + pRecordIdFieldToIgnore);
+//    ////logging.log("possibleDuplicates -> " + possibleDuplicates);
 
     return possibleDuplicates;
 }
@@ -947,6 +953,7 @@ _DuplicateScannerUtils._isUseExternalWebservice = function(pFilterName, pTargetE
  */
 _DuplicateScannerUtils._callIndexSearch = function(pTargetEntity, pIndexPatternWithValues, pResultFields, pResultSetRows)
 {
+    logging.log("pIndexPatternWithValues -> " + pIndexPatternWithValues);
     //The indexPattern can't be null because it is required to run the search.
     if(pIndexPatternWithValues == null || pIndexPatternWithValues == "")
         return null;
@@ -1000,7 +1007,7 @@ _DuplicateScannerUtils._setResultFields = function(pIndexQuery, pResultFields)
     }
     //    }
 
-    //logging.log("resultFields -> " + resultFields);
+    ////logging.log("resultFields -> " + resultFields);
     return pIndexQuery;
 }
 
@@ -1040,7 +1047,7 @@ _DuplicateScannerUtils._buildFilterPatternConfig = function(pEntityFieldValueRay
 
             if(pEntityFieldValueRays[i][INDEX_CONFIG_USE_FOR_SEARCH] == 1)
             {
-                logging.log("Nutze fuzzy search -> ");
+                //logging.log("Nutze fuzzy search -> ");
                 filterTerm = filterTerm.setFuzzySearchFactor(2);
             }
 
@@ -1303,7 +1310,7 @@ _DuplicateScannerUtils._insertValuesInFilterTemplate = function(pJsonRootNode, p
 
             if(fieldValue == null)
             {
-                //logging.log("Duplicate Scan: Requested value for field " + fieldName + " not present in the provided valueslist");
+                ////logging.log("Duplicate Scan: Requested value for field " + fieldName + " not present in the provided valueslist");
                 continue;
             }
 
-- 
GitLab