From cb5fa0d354763733ba877726abe362d8a070d0f1 Mon Sep 17 00:00:00 2001 From: "d.buechler" <d.buechler@adito.de> Date: Thu, 8 Aug 2019 16:32:26 +0200 Subject: [PATCH] =?UTF-8?q?Erste=20funktionale=20Version=20der=20entkoppel?= =?UTF-8?q?ten=20Dublettenermittlung=20mit=20Cache=20Funktionalit=C3=A4t?= =?UTF-8?q?=20F=C3=BCr=20jeden=20Filter=20muss=20der=20Cache=20separat=20a?= =?UTF-8?q?ktualisiert=20werden.=20Beispiel=20Person:=20Alle=20ContactIds?= =?UTF-8?q?=20der=20Personen=20werden=20durchlaufen=20und=20einzeln=20auf?= =?UTF-8?q?=20Dubletten=20gepr=C3=BCft.=20Wurden=20entsprechende=20Datens?= =?UTF-8?q?=C3=A4tze=20gefunden,=20werden=20diese=20nicht=20mehr=20behande?= =?UTF-8?q?lt.=20Die=20zusammengeh=C3=B6renden=20Dubletten=20werden=20dann?= =?UTF-8?q?,=20vereint=20unter=20einer=20gemeinsamen=20ClusterId,=20in=20d?= =?UTF-8?q?er=20Datenbank=20gespeichert.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../testduplicatescan/onActionProcess.js | 20 ++- process/DuplicateScanner_lib/process.js | 151 +++++++++++++++++- 2 files changed, 164 insertions(+), 7 deletions(-) diff --git a/entity/DuplicateScan_entity/entityfields/testactiongroup/children/testduplicatescan/onActionProcess.js b/entity/DuplicateScan_entity/entityfields/testactiongroup/children/testduplicatescan/onActionProcess.js index a8dd5a5a39..3b5bdf750a 100644 --- a/entity/DuplicateScan_entity/entityfields/testactiongroup/children/testduplicatescan/onActionProcess.js +++ b/entity/DuplicateScan_entity/entityfields/testactiongroup/children/testduplicatescan/onActionProcess.js @@ -1,9 +1,21 @@ import("system.logging"); import("DuplicateScanner_lib"); -var filterName = "PersonenDubletten"; +//var filterName = "PersonenDubletten"; +//var targetEntity = "Person_entity"; +//var values = {FIRSTNAME: "Anja", LASTNAME: "Lindner", GENDER: "f", CONTACTID: ""}; +//var resultFields = ["PERSONID", "LASTNAME", "FIRSTNAME"]; +//var duplicates = DuplicateScannerUtils.ScanForDuplicates(filterName, targetEntity, values, resultFields); + +var filterName = "PersonDuplicates"; var targetEntity = "Person_entity"; -var values = {FIRSTNAME: "Anja", LASTNAME: "Lindner", GENDER: "f"}; -var resultFields = ["PERSONID", "LASTNAME", "FIRSTNAME"]; +var resultFieldContactId = ["CONTACTID"]; +var resultFieldsIdFieldName = "CONTACTID"; +var queryPersonContactIds = "select CONTACTID, FIRSTNAME, LASTNAME, GENDER from CONTACT" + + " join PERSON on PERSONID = PERSON_ID"; +var tmpFieldsInFilterRay = ["CONTACTID", "FIRSTNAME", "LASTNAME", "GENDER"]; + +logging.log("in der action -> "); -var duplicates = DuplicateScannerUtils.ScanForDuplicates(filterName, targetEntity, values, resultFields); \ No newline at end of file +DuplicateScannerUtils.RebuildDuplicatesCache(filterName, targetEntity, queryPersonContactIds, +tmpFieldsInFilterRay, resultFieldContactId, resultFieldsIdFieldName); \ No newline at end of file diff --git a/process/DuplicateScanner_lib/process.js b/process/DuplicateScanner_lib/process.js index b0f7f6736c..a8068b310c 100644 --- a/process/DuplicateScanner_lib/process.js +++ b/process/DuplicateScanner_lib/process.js @@ -1,3 +1,4 @@ +import("system.util"); import("system.vars"); import("system.net"); import("system.logging"); @@ -12,7 +13,7 @@ import("system.entities"); */ function DuplicateScannerUtils() {} -DuplicateScannerUtils.ScanForDuplicates = function(pFilterName, pTargetEntity, pFilterValues, pTargetEntityResultFields, pRecordIdFieldToIgnore, pRecordIdValueToIgnore) +DuplicateScannerUtils.ScanForDuplicates = function(pFilterName, pTargetEntity, pFilterValues, pTargetEntityResultFields, pRecordIdFieldToIgnore, pRecordIdValueToIgnore) { let ignoredRecordFilter = _DuplicateScannerUtils._getIgnoreRecordFilter(pRecordIdFieldToIgnore, pRecordIdValueToIgnore, pTargetEntity); let configuredFilters = _DuplicateScannerUtils._loadFilters(pFilterName, pTargetEntity); @@ -34,6 +35,107 @@ DuplicateScannerUtils.ScanForDuplicates = function(pFilterName, pTargetEntity, return possibleDuplicates; } +//DuplicateScannerUtils.ScanAndUpdateResultCache = function() +//{ +// let duplicatesRay = DuplicateScannerUtils.ScanForDuplicates(); +// +// if(duplicatesRay.length > 0) +// { +// /* +// * Try loading the clusterId instead of using a boolean exists check to determine if theres already an identical cluster. +// * If a same cluster already exists the id of it is directly available to use +// */ +// let clusterId = _DuplicateScannerUtils._getIdOfIdenticalCachedDuplicatesCluster(pRecordIdValueToIgnore, duplicatesRay) +// if(clusterId == "") +// { +// // No cluster with identical records exists, therefore a new one has to be created containing the new value as well as the found duplicates +// _DuplicateScannerUtils._createNewCachedDuplicatesCluster(pRecordIdValueToIgnore, duplicatesRay); +// } +// else +// { +// //aktuellen datensatz zu cluster hinzufügen +// _DuplicateScannerUtils._AddRecordsToCachedDuplicatesCluster(pRecordIdValueToIgnore, clusterId); +// } +// } +//} + +DuplicateScannerUtils.RemoveFromDuplicatesCache = function(pContactId) +{ + +} + +//Später mal eigentsändiger Serverprozess ohne externe Konfiguration +DuplicateScannerUtils.RebuildDuplicatesCache = function(pFilterName, pTargetEntity, +pQueryTargetRecords, pFilterValues, pTargetEntityResultFields, pRecordIdFieldToIgnore) +{ + logging.log("in RebuildDuplicatesCache -> "); + let alreadyIdentifiedIds = []; + let contactIdsToScan = db.table(pQueryTargetRecords); + logging.log("contactIdsToScan -> " + JSON.stringify(contactIdsToScan)); + + //If the contact id loader query results in no ids, stop. + //No ids should be deleted if an error has been made in this query. + if(contactIdsToScan.length <= 0) + return; + + //fie felder für die prüfung muss zusammengebaut werden aus den feldern des filters + //und den dazugehörigen werten des aus der db geladenen datensatzes + + _DuplicateScannerUtils._deleteDuplicateClusters(); + var duplicatesToInsertQueries = []; + for (b = 0; b < contactIdsToScan.length; b++) + { + logging.log("b -> " + b); + logging.log("indexOf(contactIdsToScan[b] -> " + alreadyIdentifiedIds.indexOf(contactIdsToScan[b])); + + //If the current Id has already been identified, continue + if(alreadyIdentifiedIds.indexOf(contactIdsToScan[b][0]) > -1) + continue; + logging.log("contactid noch nicht bearbeitet -> " + contactIdsToScan[b][0]); + + let filterValuesObject = {}; + + for (a = 0; a < pFilterValues.length; a++) + { + logging.log("pFilterValues[a] -> " + pFilterValues[a]); + logging.log("contactIdsToScan[i][a] -> " + contactIdsToScan[b][a]); + + filterValuesObject[pFilterValues[a]] = contactIdsToScan[b][a]; + + logging.log("filterValuesObject[pFilterValues[a]] -> " + filterValuesObject[pFilterValues[a]]); + } + + logging.log("filterValuesObject -> " + JSON.stringify(filterValuesObject)); + + let foundDuplicates = DuplicateScannerUtils.ScanForDuplicates(pFilterName, pTargetEntity, + filterValuesObject, pTargetEntityResultFields, pRecordIdFieldToIgnore, contactIdsToScan[b][0]) + + logging.log("foundDuplicates -> " + JSON.stringify(foundDuplicates)); + if(foundDuplicates.length == 0) + continue; + + let foundDuplicateIds = []; + for (let i = 0; i < foundDuplicates.length; i++) + { + logging.log("i -> " + i); + logging.log("foundDuplicates[pRecordIdFieldToIgnore] -> " + foundDuplicates[i][pRecordIdFieldToIgnore]); + foundDuplicateIds.push(foundDuplicates[i][pRecordIdFieldToIgnore]); + } + + alreadyIdentifiedIds = alreadyIdentifiedIds.concat(foundDuplicateIds); + + //The duplicates list doesn't contain the id which was tested for, therefore it get's added manually + foundDuplicateIds.push(contactIdsToScan[b][0]); + logging.log("foundDuplicates -> " + JSON.stringify(foundDuplicates)); + logging.log("foundDuplicateIds -> " + JSON.stringify(foundDuplicateIds)); + + let insertQueriesRay = _DuplicateScannerUtils._createInsertDuplicatesClusterQuery(foundDuplicateIds) + duplicatesToInsertQueries = duplicatesToInsertQueries.concat(insertQueriesRay); + } + logging.log("duplicatesToInsertQueries -> " + JSON.stringify(duplicatesToInsertQueries)); + db.inserts(duplicatesToInsertQueries); +} + DuplicateScannerUtils.MergePerson = function(pSourceContactId, pTargetContactId) { let updateStatements = []; @@ -56,8 +158,20 @@ DuplicateScannerUtils.MergePerson = function(pSourceContactId, pTargetContactId) return true;//(affectedRows > 0 && deletedRows >= 2); } - - +//DuplicateScannerUtils._getScanConfigForEntity = function(pTargetEntity) +//{ +// switch (pTargetEntity) +// { +// case "Person_entity": +// { +// TargetEntityResultFields: +// } +// break; +// default: +// break; +// } +// +//} function _DuplicateScannerUtils() {} @@ -69,6 +183,37 @@ var INDEX_TABLE_NAME = 0; var INDEX_COLUMN_NAME = 1; var INDEX_CONDITION = 2; +_DuplicateScannerUtils._getIdOfIdenticalCachedDuplicatesCluster = function (pRecordIdValueToIgnore, duplicatesRay) +{ + +} + +_DuplicateScannerUtils._createInsertDuplicatesClusterQuery = function (pDuplicatesRay) +{ + let duplicatesToInsertQueries = []; + let cols = ["ID", "CLUSTERID", "DUPLICATEID"]; + let newClusterUid = util.getNewUUID(); + + for (i = 0; i < pDuplicatesRay.length; i++) + { + let newId = util.getNewUUID(); + let valuesToInsert = [newId, newClusterUid, pDuplicatesRay[i]]; + + duplicatesToInsertQueries.push(["DUPLICATECLUSTERS", cols, null, valuesToInsert]); + } + return duplicatesToInsertQueries; +} + +_DuplicateScannerUtils._AddRecordsToCachedDuplicatesCluster = function (pRecordIdToAdd, pClusterId) +{ + +} + +_DuplicateScannerUtils._deleteDuplicateClusters = function () +{ + let queryDropTable = "DELETE FROM DUPLICATECLUSTERS"; + db.deleteData("DUPLICATECLUSTERS", ""); +} /* * All records with contactId = sourceContactId get updated, which are not assigned to the same "group" as the targetContactId. -- GitLab