From cb5fa0d354763733ba877726abe362d8a070d0f1 Mon Sep 17 00:00:00 2001
From: "d.buechler" <d.buechler@adito.de>
Date: Thu, 8 Aug 2019 16:32:26 +0200
Subject: [PATCH] =?UTF-8?q?Erste=20funktionale=20Version=20der=20entkoppel?=
 =?UTF-8?q?ten=20Dublettenermittlung=20mit=20Cache=20Funktionalit=C3=A4t?=
 =?UTF-8?q?=20F=C3=BCr=20jeden=20Filter=20muss=20der=20Cache=20separat=20a?=
 =?UTF-8?q?ktualisiert=20werden.=20Beispiel=20Person:=20Alle=20ContactIds?=
 =?UTF-8?q?=20der=20Personen=20werden=20durchlaufen=20und=20einzeln=20auf?=
 =?UTF-8?q?=20Dubletten=20gepr=C3=BCft.=20Wurden=20entsprechende=20Datens?=
 =?UTF-8?q?=C3=A4tze=20gefunden,=20werden=20diese=20nicht=20mehr=20behande?=
 =?UTF-8?q?lt.=20Die=20zusammengeh=C3=B6renden=20Dubletten=20werden=20dann?=
 =?UTF-8?q?,=20vereint=20unter=20einer=20gemeinsamen=20ClusterId,=20in=20d?=
 =?UTF-8?q?er=20Datenbank=20gespeichert.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../testduplicatescan/onActionProcess.js      |  20 ++-
 process/DuplicateScanner_lib/process.js       | 151 +++++++++++++++++-
 2 files changed, 164 insertions(+), 7 deletions(-)

diff --git a/entity/DuplicateScan_entity/entityfields/testactiongroup/children/testduplicatescan/onActionProcess.js b/entity/DuplicateScan_entity/entityfields/testactiongroup/children/testduplicatescan/onActionProcess.js
index a8dd5a5a39..3b5bdf750a 100644
--- a/entity/DuplicateScan_entity/entityfields/testactiongroup/children/testduplicatescan/onActionProcess.js
+++ b/entity/DuplicateScan_entity/entityfields/testactiongroup/children/testduplicatescan/onActionProcess.js
@@ -1,9 +1,21 @@
 import("system.logging");
 import("DuplicateScanner_lib");
 
-var filterName = "PersonenDubletten";
+//var filterName = "PersonenDubletten";
+//var targetEntity = "Person_entity";
+//var values = {FIRSTNAME: "Anja", LASTNAME: "Lindner", GENDER: "f", CONTACTID: ""};
+//var resultFields = ["PERSONID", "LASTNAME", "FIRSTNAME"];
+//var duplicates = DuplicateScannerUtils.ScanForDuplicates(filterName, targetEntity, values, resultFields);
+
+var filterName = "PersonDuplicates";
 var targetEntity = "Person_entity";
-var values = {FIRSTNAME: "Anja", LASTNAME: "Lindner", GENDER: "f"};
-var resultFields = ["PERSONID", "LASTNAME", "FIRSTNAME"];
+var resultFieldContactId = ["CONTACTID"];
+var resultFieldsIdFieldName = "CONTACTID";
+var queryPersonContactIds = "select CONTACTID, FIRSTNAME, LASTNAME, GENDER from CONTACT"
+                            + " join PERSON on PERSONID = PERSON_ID";
+var tmpFieldsInFilterRay = ["CONTACTID", "FIRSTNAME", "LASTNAME", "GENDER"];
+
+logging.log("in der action -> ");
 
-var duplicates = DuplicateScannerUtils.ScanForDuplicates(filterName, targetEntity, values, resultFields);
\ No newline at end of file
+DuplicateScannerUtils.RebuildDuplicatesCache(filterName, targetEntity, queryPersonContactIds,
+tmpFieldsInFilterRay, resultFieldContactId, resultFieldsIdFieldName);
\ No newline at end of file
diff --git a/process/DuplicateScanner_lib/process.js b/process/DuplicateScanner_lib/process.js
index b0f7f6736c..a8068b310c 100644
--- a/process/DuplicateScanner_lib/process.js
+++ b/process/DuplicateScanner_lib/process.js
@@ -1,3 +1,4 @@
+import("system.util");
 import("system.vars");
 import("system.net");
 import("system.logging");
@@ -12,7 +13,7 @@ import("system.entities");
  */
 function DuplicateScannerUtils() {}
 
-DuplicateScannerUtils.ScanForDuplicates = function(pFilterName, pTargetEntity,  pFilterValues, pTargetEntityResultFields, pRecordIdFieldToIgnore, pRecordIdValueToIgnore)
+DuplicateScannerUtils.ScanForDuplicates = function(pFilterName, pTargetEntity, pFilterValues, pTargetEntityResultFields, pRecordIdFieldToIgnore, pRecordIdValueToIgnore)
 {
     let ignoredRecordFilter = _DuplicateScannerUtils._getIgnoreRecordFilter(pRecordIdFieldToIgnore, pRecordIdValueToIgnore, pTargetEntity);
     let configuredFilters = _DuplicateScannerUtils._loadFilters(pFilterName, pTargetEntity);
@@ -34,6 +35,107 @@ DuplicateScannerUtils.ScanForDuplicates = function(pFilterName, pTargetEntity,
     return possibleDuplicates;
 }
 
+//DuplicateScannerUtils.ScanAndUpdateResultCache = function()
+//{
+//    let duplicatesRay = DuplicateScannerUtils.ScanForDuplicates();
+//    
+//    if(duplicatesRay.length > 0)
+//    {
+//        /*
+//         * Try loading the clusterId instead of using a boolean exists check to determine if theres already an identical cluster.
+//         * If a same cluster already exists the id of it is directly available to use
+//         */
+//        let clusterId = _DuplicateScannerUtils._getIdOfIdenticalCachedDuplicatesCluster(pRecordIdValueToIgnore, duplicatesRay)
+//        if(clusterId == "")
+//        {
+//            // No cluster with identical records exists, therefore a new one has to be created containing the new value as well as the found duplicates
+//            _DuplicateScannerUtils._createNewCachedDuplicatesCluster(pRecordIdValueToIgnore, duplicatesRay);
+//        }
+//        else
+//        {
+//            //aktuellen datensatz zu cluster hinzufügen
+//            _DuplicateScannerUtils._AddRecordsToCachedDuplicatesCluster(pRecordIdValueToIgnore, clusterId);
+//        }
+//    }
+//}
+
+DuplicateScannerUtils.RemoveFromDuplicatesCache = function(pContactId)
+{
+    
+}
+
+//Später mal eigentsändiger Serverprozess ohne externe Konfiguration
+DuplicateScannerUtils.RebuildDuplicatesCache = function(pFilterName, pTargetEntity, 
+pQueryTargetRecords, pFilterValues, pTargetEntityResultFields, pRecordIdFieldToIgnore)
+{
+    logging.log("in RebuildDuplicatesCache -> ");
+    let alreadyIdentifiedIds = [];
+    let contactIdsToScan = db.table(pQueryTargetRecords);
+    logging.log("contactIdsToScan -> " + JSON.stringify(contactIdsToScan));
+    
+    //If the contact id loader query results in no ids, stop. 
+    //No ids should be deleted if an error has been made in this query.
+    if(contactIdsToScan.length <= 0)
+        return;
+    
+    //fie felder für die prüfung muss zusammengebaut werden aus den feldern des filters 
+    //und den dazugehörigen werten des aus der db geladenen datensatzes
+    
+    _DuplicateScannerUtils._deleteDuplicateClusters();
+    var duplicatesToInsertQueries = [];
+    for (b = 0; b < contactIdsToScan.length; b++) 
+    {
+        logging.log("b -> " + b);
+        logging.log("indexOf(contactIdsToScan[b] -> " + alreadyIdentifiedIds.indexOf(contactIdsToScan[b]));
+        
+        //If the current Id has already been identified, continue
+        if(alreadyIdentifiedIds.indexOf(contactIdsToScan[b][0]) > -1)
+            continue;
+        logging.log("contactid noch nicht bearbeitet -> " + contactIdsToScan[b][0]);
+        
+        let filterValuesObject = {};
+        
+        for (a = 0; a < pFilterValues.length; a++) 
+        {
+            logging.log("pFilterValues[a] -> " + pFilterValues[a]);
+            logging.log("contactIdsToScan[i][a] -> " + contactIdsToScan[b][a]);
+            
+            filterValuesObject[pFilterValues[a]] = contactIdsToScan[b][a];
+            
+            logging.log("filterValuesObject[pFilterValues[a]] -> " + filterValuesObject[pFilterValues[a]]);
+        }
+        
+        logging.log("filterValuesObject -> " + JSON.stringify(filterValuesObject));
+        
+        let foundDuplicates = DuplicateScannerUtils.ScanForDuplicates(pFilterName, pTargetEntity,
+                                filterValuesObject, pTargetEntityResultFields, pRecordIdFieldToIgnore, contactIdsToScan[b][0])
+        
+        logging.log("foundDuplicates -> " + JSON.stringify(foundDuplicates));
+        if(foundDuplicates.length == 0)
+            continue;
+        
+        let foundDuplicateIds = [];
+        for (let i = 0; i < foundDuplicates.length; i++) 
+        {
+            logging.log("i -> " + i);
+            logging.log("foundDuplicates[pRecordIdFieldToIgnore] -> " + foundDuplicates[i][pRecordIdFieldToIgnore]);
+            foundDuplicateIds.push(foundDuplicates[i][pRecordIdFieldToIgnore]);
+        }
+        
+        alreadyIdentifiedIds = alreadyIdentifiedIds.concat(foundDuplicateIds);
+        
+        //The duplicates list doesn't contain the id which was tested for, therefore it get's added manually
+        foundDuplicateIds.push(contactIdsToScan[b][0]);
+        logging.log("foundDuplicates -> " + JSON.stringify(foundDuplicates));
+        logging.log("foundDuplicateIds -> " + JSON.stringify(foundDuplicateIds));
+        
+        let insertQueriesRay = _DuplicateScannerUtils._createInsertDuplicatesClusterQuery(foundDuplicateIds)
+        duplicatesToInsertQueries = duplicatesToInsertQueries.concat(insertQueriesRay);
+    }
+    logging.log("duplicatesToInsertQueries -> " + JSON.stringify(duplicatesToInsertQueries));
+    db.inserts(duplicatesToInsertQueries);
+}
+
 DuplicateScannerUtils.MergePerson = function(pSourceContactId, pTargetContactId)
 {
     let updateStatements = [];
@@ -56,8 +158,20 @@ DuplicateScannerUtils.MergePerson = function(pSourceContactId, pTargetContactId)
     return true;//(affectedRows > 0 && deletedRows >= 2);
 }
 
-
-
+//DuplicateScannerUtils._getScanConfigForEntity = function(pTargetEntity)
+//{
+//    switch (pTargetEntity) 
+//    {
+//        case "Person_entity":
+//            {
+//                TargetEntityResultFields:
+//            }
+//            break;
+//        default:
+//            break;
+//    }
+//
+//}
 
 function _DuplicateScannerUtils() {}
 
@@ -69,6 +183,37 @@ var INDEX_TABLE_NAME = 0;
 var INDEX_COLUMN_NAME = 1;
 var INDEX_CONDITION = 2;
 
+_DuplicateScannerUtils._getIdOfIdenticalCachedDuplicatesCluster = function (pRecordIdValueToIgnore, duplicatesRay)
+{
+    
+}
+
+_DuplicateScannerUtils._createInsertDuplicatesClusterQuery = function (pDuplicatesRay)
+{
+    let duplicatesToInsertQueries = [];
+    let cols = ["ID", "CLUSTERID", "DUPLICATEID"];
+    let newClusterUid = util.getNewUUID();
+    
+    for (i = 0; i < pDuplicatesRay.length; i++) 
+    {
+        let newId = util.getNewUUID();
+        let valuesToInsert = [newId, newClusterUid, pDuplicatesRay[i]];
+
+        duplicatesToInsertQueries.push(["DUPLICATECLUSTERS", cols, null, valuesToInsert]);
+    }
+    return duplicatesToInsertQueries;
+}
+
+_DuplicateScannerUtils._AddRecordsToCachedDuplicatesCluster = function (pRecordIdToAdd, pClusterId)
+{
+    
+}
+
+_DuplicateScannerUtils._deleteDuplicateClusters = function ()
+{
+    let queryDropTable = "DELETE FROM DUPLICATECLUSTERS";
+    db.deleteData("DUPLICATECLUSTERS", "");
+}
 
 /*
  * All records with contactId = sourceContactId get updated, which are not assigned to the same "group" as the targetContactId.
-- 
GitLab