From 2a8771bb9521c8adcdddd4d5af109c9ebd20eebc Mon Sep 17 00:00:00 2001
From: "d.buechler" <d.buechler@adito.de>
Date: Thu, 19 Sep 2019 11:14:28 +0200
Subject: [PATCH] Old functions to rebuild the duplicate cache have been
 removed and the ones with the postfix "Index"(Which was also removed)
 replaced them New function to load the DB_FIELDS from the IndexFieldConfig
 and concatenate them to use the resulting string as fields-to-select in a db
 select query

---
 .../testduplicatescanner/onActionProcess.js   |  17 +-
 process/DuplicateScanner_lib/process.js       | 213 +++---------------
 2 files changed, 32 insertions(+), 198 deletions(-)

diff --git a/entity/DuplicateScanner_entity/entityfields/testactiongroup/children/testduplicatescanner/onActionProcess.js b/entity/DuplicateScanner_entity/entityfields/testactiongroup/children/testduplicatescanner/onActionProcess.js
index 362bf7ba4c7..c3def7988e4 100644
--- a/entity/DuplicateScanner_entity/entityfields/testactiongroup/children/testduplicatescanner/onActionProcess.js
+++ b/entity/DuplicateScanner_entity/entityfields/testactiongroup/children/testduplicatescanner/onActionProcess.js
@@ -185,24 +185,13 @@ import("JditoFilter_lib");
 
 var filterName = "PersonDuplicates";
 var targetEntity = "Person_entity";
-var resultFieldsIdFieldName = "CONTACTID";
 
-//Array
-//[DB_FELD, ENTITY_FELD, IS_ID, USE_FOR_SEARCH]
-//["CONTACTID", "CONTACTID", true, false]
-//["FIRSTNAME", "FIRSTNAME", false, true]
+
 let duplicateFieldsConfig = DuplicateScannerUtils.LoadDuplicateIndexFieldsConfiguration(filterName, targetEntity);
 
 logging.log("duplicateFieldsConfig -> " + duplicateFieldsConfig);
 
-let querySelectFields = "";
-for (let i = 0; i < duplicateFieldsConfig.length; i++) 
-{
-    querySelectFields += duplicateFieldsConfig[i][0];
-    
-    if(i < duplicateFieldsConfig.length - 1)
-        querySelectFields += ", ";
-}
+let querySelectFields = DuplicateScannerUtils.BuildSelectFieldsFromFieldConfig(duplicateFieldsConfig);
 
 let queryPersonFieldData = "select " + querySelectFields + " from CONTACT"
                             + " join PERSON on PERSONID = PERSON_ID";
@@ -211,7 +200,7 @@ logging.log("Löschen von PERSON Dubletten -> ");
 DuplicateScannerUtils.DeleteDuplicateClustersByTargetEntity(targetEntity);
 
 logging.log("Neu berechnen von PERSON Dubletten -> ");
-DuplicateScannerUtils.RebuildDuplicatesCacheIndex(filterName, targetEntity, queryPersonFieldData,
+DuplicateScannerUtils.RebuildDuplicatesCache(filterName, targetEntity, queryPersonFieldData,
 duplicateFieldsConfig);
 
 DuplicateScannerUtils.RefreshUnrelatedDuplicateRelations(targetEntity);
\ No newline at end of file
diff --git a/process/DuplicateScanner_lib/process.js b/process/DuplicateScanner_lib/process.js
index 2d0d2e49288..4c78a524df9 100644
--- a/process/DuplicateScanner_lib/process.js
+++ b/process/DuplicateScanner_lib/process.js
@@ -26,27 +26,6 @@ DuplicateScannerUtils.loadFilters = function(pFilterName, pTargetEntity)
     return db.table(query);
 }
 
-DuplicateScannerUtils.ScanForDuplicates = function(pFilterName, pTargetEntity, pFilterValues,
-pTargetEntityResultFields, pRecordIdFieldToIgnore, pRecordIdValueToIgnore)
-{
-    let ignoredRecordFilter = _DuplicateScannerUtils._getIgnoreRecordFilter(pRecordIdFieldToIgnore, pRecordIdValueToIgnore, pTargetEntity);
-    let configuredFilters = _DuplicateScannerUtils._loadFilters(pFilterName, pTargetEntity);
-
-    configuredFilters = [ignoredRecordFilter].concat(configuredFilters);
-
-//Wenn externer Service genutzt wird 
-//  applyPreFilterOnly
-//  Umwandeln in WS Format
-//  WS die Daten schicken
-//Wenn interne Dublettenermittlung genutzt werden soll
-//  
-    let possibleDuplicates = _DuplicateScannerUtils._applyPreFilter(pTargetEntity, configuredFilters, pTargetEntityResultFields, pFilterValues);
-    
-
-    
-    return possibleDuplicates;
-}
-
 /*
  * Deletes the cached duplicate for the given id. 
  * If there would only remain one item in the cluster after deletion, the whole cluster including the duplicate gets deleted.
@@ -229,15 +208,14 @@ DuplicateScannerUtils.GetCachedDuplicatesForId = function(pClusterRecordId)
  * @param {String} pTargetEntity The target entity which has been assigned to the filters configuration
  * @param {String} pQueryTargetRecords Query which holds the values that are being used as configured in the filter.
  * Important: The first element has to be the id field!
- * @param {String[]} pFilterFields Array of fields used in the filter. The first element has to be the id field. Afterswards the same field in the identical order as defined in the parameter "pQueryTargetRecords"
- * @param {String} pRecordIdFieldToIgnore Name of the id field. Has to be equal to the first field in the parameters "pFilterFields" and "pQueryTargetRecords"
- * 
+ * @param {String[]} pDuplicateFieldsConfig The configuration of the fields and their usage. @see DuplicateScannerUtils.LoadDuplicateIndexFieldsConfiguration
+ *
  * @return {Int} Count of duplicate clusters created
  * 
  * @example
- * filterName = "OrganisationDuplicates";<br />
- * targetEntity = "Organisation_entity";<br />
- * resultFieldsIdFieldName = "CONTACTID";<br />
+ * var filterName = "PersonDuplicates";<br />
+ * var targetEntity = "Person_entity";<br />
+ * todo
  * queryPersonContactIds = "select CONTACTID, ORGANISATION.\"NAME\" from ORGANISATION"<br />
  *                             + " join CONTACT on CONTACT.CONTACTID = ORGANISATION.ORGANISATIONID"<br />
  *                             + " where CONTACTID != '0'";<br />
@@ -250,85 +228,7 @@ DuplicateScannerUtils.GetCachedDuplicatesForId = function(pClusterRecordId)
  * <br />
  * DuplicateScannerUtils.RefreshUnrelatedDuplicateRelations(targetEntity);<br />
  */
-DuplicateScannerUtils.RebuildDuplicatesCache = function(pFilterName, pTargetEntity, 
-pQueryTargetRecords, pFilterFields, pRecordIdFieldToIgnore)
-{   
-    logging.log("in RebuildDuplicatesCache -> ");
-    let alreadyIdentifiedIds = [];
-    let contactIdsToScan = db.table(pQueryTargetRecords);
-    logging.log("contactIdsToScan -> " + JSON.stringify(contactIdsToScan));
-    
-    //If the contact id loading query has no results, stop. 
-    //No ids should be deleted if an error has been made in this query.
-    if(contactIdsToScan.length <= 0)
-        return;
-
-    /* 
-     * First it gets checked if the current id has already been identified. If that's the case it'll continue with the next.
-     * Otherwise an object gets build in the form of ["FilterFieldName" = "FilterFieldValueFromQuery"] with which a scan for possible duplicates get's started
-     */
-    var duplicatesToInsertQueries = [];
-    for (b = 0; b < contactIdsToScan.length; b++) 
-    {
-        logging.log("b -> " + b);
-        logging.log("indexOf(contactIdsToScan[b] -> " + alreadyIdentifiedIds.indexOf(contactIdsToScan[b]));
-        
-        //If the current Id has already been identified, continue
-        if(alreadyIdentifiedIds.indexOf(contactIdsToScan[b][0]) > -1)
-            continue;
-        logging.log("contactid noch nicht bearbeitet -> " + contactIdsToScan[b][0]);
-        
-        let filterValuesObject = {};
-        
-        /* 
-        * Based on the parameterized filter field names and the values loaded via the query, an object in the style of ["FilterFieldName" = "FilterFieldValueFromQuery"]
-        * gets created. This mandatory to run the scan for this record.
-        */
-        for (a = 0; a < pFilterFields.length; a++) 
-        {
-            logging.log("pFilterValues[a] -> " + pFilterFields[a]);
-            logging.log("contactIdsToScan[i][a] -> " + contactIdsToScan[b][a]);
-            
-            filterValuesObject[pFilterFields[a]] = contactIdsToScan[b][a];
-            
-            logging.log("filterValuesObject[pFilterValues[a]] -> " + filterValuesObject[pFilterFields[a]]);
-        }
-        
-        logging.log("filterValuesObject -> " + JSON.stringify(filterValuesObject));
-        
-        let foundDuplicates = DuplicateScannerUtils.ScanForDuplicates(pFilterName, pTargetEntity,
-                                filterValuesObject, [pRecordIdFieldToIgnore], pRecordIdFieldToIgnore, contactIdsToScan[b][0])
-        
-        logging.log("foundDuplicates -> " + JSON.stringify(foundDuplicates));
-        if(foundDuplicates.length == 0)
-            continue;
-        
- 
-        //Insert all found duplicate ids into an cache array because those ids don't have to be checked again lateron.
-        let foundDuplicateIds = [];
-        for (let i = 0; i < foundDuplicates.length; i++) 
-        {
-            logging.log("i -> " + i);
-            logging.log("foundDuplicates[pRecordIdFieldToIgnore] -> " + foundDuplicates[i][pRecordIdFieldToIgnore]);
-            foundDuplicateIds.push(foundDuplicates[i][pRecordIdFieldToIgnore]);
-        }
-        
-        alreadyIdentifiedIds = alreadyIdentifiedIds.concat(foundDuplicateIds);
-        
-        //The duplicates list contains only the found duplicates to the original id, therefore it get's added manually
-        foundDuplicateIds.push(contactIdsToScan[b][0]);
-        logging.log("foundDuplicates -> " + JSON.stringify(foundDuplicates));
-        logging.log("foundDuplicateIds -> " + JSON.stringify(foundDuplicateIds));
-        
-        let insertQueriesRay = _DuplicateScannerUtils._createInsertDuplicatesClusterQuery(foundDuplicateIds, pTargetEntity)
-        duplicatesToInsertQueries = duplicatesToInsertQueries.concat(insertQueriesRay);
-    }
-    logging.log("duplicatesToInsertQueries -> " + JSON.stringify(duplicatesToInsertQueries));
-    return db.inserts(duplicatesToInsertQueries);
-}
-
-//pQueryTargetRecords, pFilterFields, pRecordIdFieldToIgnore)
-DuplicateScannerUtils.RebuildDuplicatesCacheIndex = function(pFilterName, pTargetEntity,
+DuplicateScannerUtils.RebuildDuplicatesCache = function(pFilterName, pTargetEntity,
 pQueryTargetRecords, pDuplicateFieldsConfig)
 {   
     
@@ -370,7 +270,7 @@ pQueryTargetRecords, pDuplicateFieldsConfig)
         logging.log("idField -> " + idField);
         logging.log("idValue -> " + idValue);
         
-        let foundDuplicates = DuplicateScannerUtils.ScanForDuplicatesIndex(pFilterName, pTargetEntity,
+        let foundDuplicates = DuplicateScannerUtils.ScanForDuplicates(pFilterName, pTargetEntity,
                                 entityFieldValuesRay, resultFields, idField, idValue)
         
         //        logging.log("foundDuplicates -> " + JSON.stringify(foundDuplicates));
@@ -441,6 +341,10 @@ DuplicateScannerUtils._buildEntityFieldNameValueRays = function(pDuplicateFields
     return entityFieldValuesRay;
 }
 
+//Array
+//[DB_FELD, ENTITY_FELD, IS_ID, USE_FOR_SEARCH]
+//["CONTACTID", "CONTACTID", true, false]
+//["FIRSTNAME", "FIRSTNAME", false, true]
 DuplicateScannerUtils.LoadDuplicateIndexFieldsConfiguration = function(pFilterName, pTargetEntity)
 {
 //    select dsic.DB_FIELD_NAME, dsic.ENTITY_FIELD_NAME, dsic.IS_ID_FIELD, dsic.USE_FOR_INDEX_DUPLICATE_SEARCH from DUPLICATESCANNERINDEXCONFIG dsic
@@ -478,7 +382,7 @@ DuplicateScannerUtils._loadResultFields = function(pFilterName, pTargetEntity)
 }
 
 
-DuplicateScannerUtils.ScanForDuplicatesIndex = function(pFilterName, pTargetEntity, pFilterFieldValueRays,
+DuplicateScannerUtils.ScanForDuplicates = function(pFilterName, pTargetEntity, pFilterFieldValueRays,
 pTargetEntityResultFields, pRecordIdFieldToIgnore, pRecordIdValueToIgnore)
 {
     let possibleDuplicates = [];
@@ -489,7 +393,7 @@ pTargetEntityResultFields, pRecordIdFieldToIgnore, pRecordIdValueToIgnore)
     
     logging.log("Found filters -> " + configuredFilters);
     
-    let preFilter = DuplicateScannerUtils._applyPreFilterIndex(pTargetEntity, configuredFilters, pFilterFieldValueRays);
+    let preFilter = DuplicateScannerUtils._applyPreFilter(pTargetEntity, configuredFilters, pFilterFieldValueRays);
     
     logging.log("preFilter welcher Elemente im erlaubten bereich ausgibt -> " + preFilter);
     
@@ -519,8 +423,12 @@ pTargetEntityResultFields, pRecordIdFieldToIgnore, pRecordIdValueToIgnore)
 }
 
 //todo doc
+/*
+ * The pre filter is used to narrow the records to be searched by the duplicate scan service
+ * It loads the target entity and uses filters achieve this.
+ */
 //only returns null if the number of results exceeds the allowed count
-DuplicateScannerUtils._applyPreFilterIndex = function(pTargetEntity, pFilterCountCharactersToUseRay, pFilterFieldValueRays)
+DuplicateScannerUtils._applyPreFilter = function(pTargetEntity, pFilterCountCharactersToUseRay, pFilterFieldValueRays)
 {
     var combinedFilter = {};
 
@@ -721,7 +629,18 @@ DuplicateScannerUtils._buildFilterPatternConfig = function(pEntityFieldValueRays
     return filterPatternConfig;
 }
 
+DuplicateScannerUtils.BuildSelectFieldsFromFieldConfig = function(pIndexFieldsConfig)
+{
+    let querySelectFields = "";
+    for (let i = 0; i < pIndexFieldsConfig.length; i++) 
+    {
+        querySelectFields += pIndexFieldsConfig[i][0];
 
+        if(i < pIndexFieldsConfig.length - 1)
+            querySelectFields += ", ";
+    }
+    return querySelectFields;
+}
 
 
 
@@ -1001,80 +920,6 @@ _DuplicateScannerUtils._getMergeUpdateTableInfosSystemAlias = function(pSourceCo
     return tableInfos;
 }
 
-/*
- * The pre filter is used to narrow the records to be searched by the duplicate scan service
- * It loads the target entity and uses filters achieve this.
- */
-_DuplicateScannerUtils._applyPreFilter = function(pTargetEntity, pFilterCountCharactersToUseRay, pTargetEntityResultFields, pFilterValues)
-{
-    var combinedFilter = {};
-
-    for (i = 0; i < pFilterCountCharactersToUseRay.length; i++) 
-    {
-        let filter = pFilterCountCharactersToUseRay[i][INDEX_FILTER_CONDITION];
-        let countCharsOfValueToUse = pFilterCountCharactersToUseRay[i][INDEX_COUNT_CHARS_TO_USE];
-        let maxResultsThreshold = pFilterCountCharactersToUseRay[i][INDEX_MAX_RESULTS_THRESHOLD];
-        
-        if(filter == null || filter == "")
-            continue;
-        logging.log("complete filter -> " + filter);
-        filter = JSON.parse(filter).filter;
-        
-        logging.log("countCharsOfValueToUse -> " + countCharsOfValueToUse);
-        logging.log("maxResultsThreshold -> " + maxResultsThreshold);
-        /*
-         * Insert the values into the current filter. Has to be here so that only the new filter
-         * and therefore the combinedFilter incrementally gets filled and not always everything multiple times.
-         */
-        filter.childs = _DuplicateScannerUtils._insertValuesInFilterTemplate(filter.childs, pFilterValues, countCharsOfValueToUse);
-        
-        logging.log("gefüllter filter in prefilter-> " + JSON.stringify(filter.childs));
-        if(i == 0)
-            combinedFilter = filter;
-        else
-        {
-            logging.log("aktueller combinedFilter -> " + JSON.stringify(combinedFilter));
-            
-            //Extend the current combined filter with the next filter condition to further refine the results
-            //It seems to always contain one child element at the root
-            combinedFilter.childs.push(filter.childs[0]);
-        }
-        
-        logging.log("combinedFilter + gefüllter filter -> " + JSON.stringify(combinedFilter));
-        
-        let loadRowsConfig = entities.createConfigForLoadingRows()
-                             .entity(pTargetEntity)
-                             .filter(JSON.stringify(combinedFilter));
-                             
-        let rowCount = entities.getRowCount(loadRowsConfig);                 
-        
-        logging.log("rowCount -> " + rowCount);
-        
-        if(rowCount > maxResultsThreshold)
-        {
-            logging.log("zu viele rows gefundenn nächsten Filter anwenden -> rowCount:" + rowCount + " maxResultsThreshold:" + maxResultsThreshold);
-            //Found more rows than allowed by the threshold, run next filter to narrow the results
-            continue;
-        }
-        else if(rowCount <= 0)
-        {
-            return [];
-        }
-        else
-        {
-            //we're in the treshold, return the data
-            
-            loadRowsConfig = loadRowsConfig.fields(pTargetEntityResultFields);
-            
-            let resultRows = entities.getRows(loadRowsConfig);
-            
-            return resultRows;
-        }
-    }
-    logging.log("zu viele rows und keine filter mehr -> ");
-    return [];
-}
-
 _DuplicateScannerUtils._isExternalScanServiceAvailable = function()
 {
     //todo check if a webservice has been configured
-- 
GitLab