From 8e8701825ac4f74544593bc920aef515bfdfed2e Mon Sep 17 00:00:00 2001 From: "d.buechler" <d.buechler@adito.de> Date: Mon, 23 Sep 2019 13:31:24 +0200 Subject: [PATCH] Documentation Refactorings --- process/DuplicateScanner_lib/process.js | 501 ++++++++++++++---------- 1 file changed, 291 insertions(+), 210 deletions(-) diff --git a/process/DuplicateScanner_lib/process.js b/process/DuplicateScanner_lib/process.js index 689d29526a6..773330a743c 100644 --- a/process/DuplicateScanner_lib/process.js +++ b/process/DuplicateScanner_lib/process.js @@ -16,7 +16,14 @@ import("system.indexsearch"); */ function DuplicateScannerUtils() {} -//todo remove +/* + * Loads all prefilters for a scanner in the form of arrays in an array. + * Single filter record: [CONDITION, COUNT_CHARACTERS_TO_USE, MAX_RESULTS_THRESHOLD] + * + * @param {String} pFilterName Name of the filter + * @param {String} pTargetEntity Entity which has been configured + * @returns {String[[]]} Array of arrays containing the configured values + */ DuplicateScannerUtils.loadFilters = function(pFilterName, pTargetEntity) { let query = "select \"CONDITION\", COUNT_CHARACTERS_TO_USE, MAX_RESULTS_THRESHOLD from DUPLICATESCANNERPREFILTERCONFIG" @@ -46,7 +53,8 @@ DuplicateScannerUtils.DeleteCachedDuplicate = function(pDuplicateId) //If only one duplicate would be remaining, //the whole cluster has to be deleted because there are no more duplicates. - //Otherwise delete just the single duplicate. + //Otherwise delete just the single duplicate. The parameterized duplicate has been excluded via sql + //therefore check for smaller/equals 1 if(countDuplicatesInClusterWithoutParameterId <= 1) { let deleteStatements = []; @@ -67,6 +75,7 @@ DuplicateScannerUtils.DeleteCachedDuplicate = function(pDuplicateId) * Deletes all Clusters for the given target Entity. * No records markes as unrelated duplicate are being deleted. * + * @param {String} pTargetEntity Entity which has been configured * @return Count of deleted rows */ DuplicateScannerUtils.DeleteDuplicateClustersByTargetEntity = function(pTargetEntity) @@ -121,44 +130,79 @@ DuplicateScannerUtils.RefreshUnrelatedDuplicateRelations = function(pTargetEntit db.deleteData("UNRELATEDDUPLICATES", deleteCondition); } +/* + * Creates a relation between two duplicates which means they are unrelated. + * They will not appear in each others duplicate tab any more. + * To remove this relation use DuplicateScannerUtils.DeleteUnrelatedDuplicateRelation + * + * @param {String} pSourceContactId Id of first duplicate + * @param {String} pUnrelatedContactId Id of second duplicate + * @param {String} pClusterId Id of the cluster in which the duplicates are aggregated + * @returns {String} Number of Records inserted + */ DuplicateScannerUtils.CreateUnrelatedDuplicateRelation = function(pSourceContactId, pUnrelatedContactId, pClusterId) { let newUid = util.getNewUUID(); let columns = ["ID", "SOURCEDUPLICATEID", "UNRELATEDDUPLICATEID", "CLUSTERID"]; let values = [newUid, pSourceContactId, pUnrelatedContactId, pClusterId]; - db.insertData("UNRELATEDDUPLICATES", columns, null, values); + return db.insertData("UNRELATEDDUPLICATES", columns, null, values); } -DuplicateScannerUtils.GetClusterId = function(pDuplicateContactId) + +/* + * Gets the cluster id in which the given duplicate id exists + * + * @param {String} pDuplicateId whose cluster id should be searched + * @returns {String} Cluster id + */ +DuplicateScannerUtils.GetClusterId = function(pDuplicateId) { let duplicateIdsOfClusterWithoutUnrelated = SqlCondition.begin() - .and("DUPLICATECLUSTERS.DUPLICATEID = '" + pDuplicateContactId + "'") + .and("DUPLICATECLUSTERS.DUPLICATEID = '" + pDuplicateId + "'") .buildSql("select CLUSTERID from DUPLICATECLUSTERS"); return db.cell(duplicateIdsOfClusterWithoutUnrelated); } -DuplicateScannerUtils.DeleteUnrelatedDuplicateRelation = function(pSourceContactId, pUnrelatedContactId) +/* + * Deletes the "unrelated" relation between two duplicates + * + * @param {String} pSourceDuplicateId Id of the source duplicate + * @param {String} pUnrelatedDuplicateId Id of the source duplicate + * @returns {String} Number of records deleted + */ +DuplicateScannerUtils.DeleteUnrelatedDuplicateRelation = function(pSourceDuplicateId, pUnrelatedDuplicateId) { var cond = new SqlCondition(); - cond.andPrepare("UNRELATEDDUPLICATES.SOURCEDUPLICATEID", pSourceContactId) - cond.andPrepare("UNRELATEDDUPLICATES.UNRELATEDDUPLICATEID", pUnrelatedContactId) + cond.andPrepare("UNRELATEDDUPLICATES.SOURCEDUPLICATEID", pSourceDuplicateId) + cond.andPrepare("UNRELATEDDUPLICATES.UNRELATEDDUPLICATEID", pUnrelatedDuplicateId) let condition = db.translateCondition(cond.build()); - db.deleteData("UNRELATEDDUPLICATES", condition); + return db.deleteData("UNRELATEDDUPLICATES", condition); } -DuplicateScannerUtils.DeleteAllUnrelatedDuplicateRelations = function(pContactId) +/* + * Deletes all relations to a duplicate id wether the id is the source or the unrelated duplicate in the relation + * + * @param {String} pDuplicateId Duplicate id whose "unrelated" relations are to delete + * @returns {String} Number of records deleted + */ +DuplicateScannerUtils.DeleteAllUnrelatedDuplicateRelations = function(pDuplicateId) { var cond = new SqlCondition(); - cond.orPrepare("UNRELATEDDUPLICATES.SOURCEDUPLICATEID", pContactId) - cond.orPrepare("UNRELATEDDUPLICATES.UNRELATEDDUPLICATEID", pContactId) + cond.orPrepare("UNRELATEDDUPLICATES.SOURCEDUPLICATEID", pDuplicateId) + cond.orPrepare("UNRELATEDDUPLICATES.UNRELATEDDUPLICATEID", pDuplicateId) let condition = db.translateCondition(cond.build()); - db.deleteData("UNRELATEDDUPLICATES", condition); + return db.deleteData("UNRELATEDDUPLICATES", condition); } - +/* + * Loads all other duplicates from the cluster in which the parameterized duplicate is located + * + * @param {String} pDuplicateId + * @returns {String[]} Array of duplicate ids + */ DuplicateScannerUtils.GetCachedDuplicatesForContactId = function(pDuplicateId) { let querySelectIgnoredDuplicates = ''; @@ -175,8 +219,8 @@ DuplicateScannerUtils.GetCachedDuplicatesForContactId = function(pDuplicateId) /* * Returns all duplicate ids which haven't been marked as unrelated for the given cluster id. * - * @param {String} pClusterId - * @return {String[]} Array of ContactIds of duplicates excluding those marked as unrelated + * @param {String} pClusterId The clusters id + * @return {String[]} Array of duplicate ids excluding those marked as unrelated */ DuplicateScannerUtils.GetCachedDuplicatesForClusterId = function(pClusterId) { @@ -191,31 +235,28 @@ DuplicateScannerUtils.GetCachedDuplicatesForClusterId = function(pClusterId) return db.array(db.COLUMN, duplicateIdsOfClusterWithoutUnrelated); } -DuplicateScannerUtils.GetCachedDuplicatesForId = function(pClusterRecordId) -{ - let query = "select DUPLICATEID from DUPLICATECLUSTERS" - + " where CLUSTERID = (select CLUSTERID from DUPLICATECLUSTERS" - + " where ID = '"+ pClusterRecordId +"')" - return db.array(db.COLUMN, query); -} - -/* +/* * Recreates the cached duplicate clusters based on the configured filters. * The old clusters have to be deleted manually beforehand using "DeleteDuplicateClustersByTargetEntity". * If there have already been ignored relations between duplicate records, it's advised to call "RefreshUnrelatedDuplicateRelations" after the recreation of the duplicates cache. - * + * Please check the documentation of the params on how to get the infos required. + * + * If the usage of an external webservice has been activated, the results will be narrowed down by the prefilter and + * will then be given to the pFormatValuesConsumeWebserviceCallback via parameter. + * To access the values it is advised to run thru the parameter like an array and access its value by key which is the index field name. The entity + * field names can be converted using DuplicateScannerUtils.TranslateEntityToIndexFields. For further infos see the example section below. + * * Attention! * If it is configured to use the external webservice callback the values have to be in the same format as they are in the parameter of the callback. * * @param {String} pFilterName Name of the filter to use * @param {String} pTargetEntity The target entity which has been assigned to the filters configuration * @param {String} pQueryTargetRecords Query which holds the values that are being used as configured in the filter. - * @param {String} pDuplicateFieldsConfig - * @param {String} pResultFields todo - * @param {String} pFormatValuesConsumeWebserviceCallback - * Important: The first element has to be the id field! + * @param {String} pDuplicateFieldsConfig The index field config. Use "DuplicateScannerUtils.LoadIndexFieldsConfiguration" + * @param {String} pResultFields The result field config. Use "DuplicateScannerUtils.LoadResultFields" + * @param {String} pFormatValuesConsumeWebserviceCallback Null if no external service is used otherwise a function with one parameter. * @param {String[]} pDuplicateFieldsConfig The configuration of the fields and their usage. @see DuplicateScannerUtils.LoadDuplicateIndexFieldsConfiguration - * + * Important: The first element has to be the id field! * @return {Int} Count of duplicate clusters created * * @example @@ -237,7 +278,6 @@ DuplicateScannerUtils.GetCachedDuplicatesForId = function(pClusterRecordId) DuplicateScannerUtils.RebuildDuplicatesCache = function(pFilterName, pTargetEntity, pQueryTargetRecords, pDuplicateFieldsConfig, pResultFields, pFormatValuesConsumeWebserviceCallback) { - logging.log("in RebuildDuplicatesCache -> "); let alreadyIdentifiedIds = []; let targetRecordsData = db.table(pQueryTargetRecords); @@ -250,25 +290,20 @@ pQueryTargetRecords, pDuplicateFieldsConfig, pResultFields, pFormatValuesConsume if(targetRecordsData.length <= 0) return; - /* - * First it gets checked if the current id has already been identified. If that's the case it'll continue with the next. - * Otherwise an object gets build in the form of ["FilterFieldName" = "FilterFieldValueFromQuery"] with which a scan for possible duplicates get's started - */ + //First it gets checked if the current id has already been identified. If that's the case it'll continue with the next. + //Otherwise an object gets build in the form of ["FilterFieldName" = "FilterFieldValueFromQuery"] with which a scan for possible duplicates get's started logging.log("configured pResultFields -> " + pResultFields); var duplicatesToInsertQueries = []; for (b = 0; b < targetRecordsData.length; b++) { logging.log("b -> " + b); logging.log("Neuer Record -> " + targetRecordsData[b]); - //logging.log("indexOf(targetRecordsData[b] -> " + alreadyIdentifiedIds.indexOf(targetRecordsData[b])); - + //If the current Id has already been identified, continue if(alreadyIdentifiedIds.indexOf(targetRecordsData[b][0]) > -1) continue; - //logging.log("contactid noch nicht bearbeitet -> " + targetRecordsData[b][0]); - - - let entityFieldValuesRay = _DuplicateScannerUtils._buildEntityFieldNameValueRays(pDuplicateFieldsConfig, targetRecordsData[b]); + + let entityFieldValuesRay = DuplicateScannerUtils.BuildEntityFieldNameValueRays(pDuplicateFieldsConfig, targetRecordsData[b]); logging.log("entityFieldValuesRay -> " + JSON.stringify(entityFieldValuesRay)); //The first field in this Array must always be the configured id field. This is ensured using onValidation-logic @@ -277,12 +312,11 @@ pQueryTargetRecords, pDuplicateFieldsConfig, pResultFields, pFormatValuesConsume logging.log("idField -> " + idField); logging.log("idValue -> " + idValue); - let foundDuplicates = DuplicateScannerUtils.ScanForDuplicates(pFilterName, pTargetEntity, + let foundDuplicates = _DuplicateScannerUtils._scanForDuplicates(pFilterName, pTargetEntity, entityFieldValuesRay, pResultFields, idField, idValue, pFormatValuesConsumeWebserviceCallback, useExternalWebservice) - // logging.log("foundDuplicates -> " + JSON.stringify(foundDuplicates)); logging.log("foundDuplicates -> " + foundDuplicates); - + if(foundDuplicates == null || foundDuplicates.length == 0) continue; logging.log("foundDuplicates.length -> " + foundDuplicates.length); @@ -319,19 +353,12 @@ pQueryTargetRecords, pDuplicateFieldsConfig, pResultFields, pFormatValuesConsume */ DuplicateScannerUtils.LoadResultFields = function(pFilterName, pTargetEntity) { -// select dsic.DB_FIELD_NAME, dsic.ENTITY_FIELD_NAME, dsic.IS_ID_FIELD, dsic.USE_FOR_INDEX_DUPLICATE_SEARCH from DUPLICATESCANNERINDEXCONFIG dsic -// join DUPLICATESCANNER ds on ds.ID = dsic.DUPLICATESCANNER_ID -// where ds.FILTER_NAME = 'PersonDuplicates' -// and ds.ENTITY_TO_SCAN_NAME = 'Person_entity' -// order by dsic.IS_ID_FIELD desc - let duplicateResultFields = SqlCondition.begin() .andPrepare("DUPLICATESCANNER.FILTER_NAME", pFilterName) .andPrepare("DUPLICATESCANNER.ENTITY_TO_SCAN_NAME", pTargetEntity) .buildSql("select dsrfc.ENTITY_FIELD_NAME from DUPLICATESCANNERRESULTFIELDCONFIG dsrfc join DUPLICATESCANNER on DUPLICATESCANNER.ID = dsrfc.DUPLICATESCANNER_ID" , "1=2"); - logging.log("duplicateResultFields condition-> " + duplicateResultFields); return db.array(db.COLUMN, duplicateResultFields); } @@ -348,19 +375,12 @@ DuplicateScannerUtils.LoadResultFields = function(pFilterName, pTargetEntity) */ DuplicateScannerUtils.LoadIndexFieldsConfiguration = function(pFilterName, pTargetEntity) { -// select dsic.DB_FIELD_NAME, dsic.ENTITY_FIELD_NAME, dsic.IS_ID_FIELD, dsic.USE_FOR_INDEX_DUPLICATE_SEARCH from DUPLICATESCANNERINDEXCONFIG dsic -// join DUPLICATESCANNER ds on ds.ID = dsic.DUPLICATESCANNER_ID -// where ds.FILTER_NAME = 'PersonDuplicates' -// and ds.ENTITY_TO_SCAN_NAME = 'Person_entity' -// order by dsic.IS_ID_FIELD desc - let duplicateIndexFieldConfigurations = SqlCondition.begin() .andPrepare("DUPLICATESCANNER.FILTER_NAME", pFilterName) .andPrepare("DUPLICATESCANNER.ENTITY_TO_SCAN_NAME", pTargetEntity) .buildSql("select dsic.DB_FIELD_NAME, dsic.ENTITY_FIELD_NAME, dsic.IS_ID_FIELD, dsic.USE_FOR_INDEX_DUPLICATE_SEARCH from DUPLICATESCANNERINDEXCONFIG dsic join DUPLICATESCANNER on DUPLICATESCANNER.ID = dsic.DUPLICATESCANNER_ID" , "1=2", "order by dsic.IS_ID_FIELD desc"); - logging.log("duplicateIndexFieldConfigurations -> " + duplicateIndexFieldConfigurations); return db.table(duplicateIndexFieldConfigurations); } @@ -368,56 +388,42 @@ DuplicateScannerUtils.LoadIndexFieldsConfiguration = function(pFilterName, pTarg * Scans for duplicates based on the configured prefilters and the pFilterFieldValueRays. * First the prefilters get applied one after another until the count of the returned data is in the allowed threshold. * Then, the duplicate search using the index starts. All fields which have been configured will be used here. + * * If the usage of an external webservice has been activated, the result will then be given to the pFormatValuesConsumeWebserviceCallback via parameter. * To access the values it is advised to run thru the parameter like an array and access its value by key which is the index field name. The entity * field names can be converted using DuplicateScannerUtils.TranslateEntityToIndexFields - * + * * Attention! * If it's a single ScanForDuplicates call it doesn't matter what the callback returns because after the callback, no more modifications follow before * returning the data. * If it's inside the RebuildCache the values have to be in the same format as the parameter - * - * @param {String} - * @param {String[]} - * @returns {String} + * + * @param {String} pFilterName Name of the filter + * @param {String} pTargetEntity Respective target entity + * @param {String[[]]} pFilterFieldValueRays Array of Arrays containing the name of a used field and its value. + * Use "DuplicateScannerUtils.BuildEntityFieldNameValueRays". The fieldnames and values have to be in the same order + * @param {String[]} pResultFields The result field config. Use "DuplicateScannerUtils.LoadResultFields" + * @param {String} pRecordIdFieldToIgnore Name of the id field e.g. the contact id in case of a Person duplicate + * @param {String} pRecordIdValueToIgnore Value to the id field + * @param {function} pFormatValuesConsumeWebserviceCallback Null if no external service is used otherwise a function with one parameter. + * If the function is called is based on the configuration of the current scanner + * @returns {[["key", "value"]]} Array of Key-Value-Pairs based on the configured resultfields, if an external webservices was used + * the structure is defined by the parameterized function "pFormatValuesConsumeWebserviceCallback" * @example - * + * todo */ DuplicateScannerUtils.ScanForDuplicates = function(pFilterName, pTargetEntity, pFilterFieldValueRays, -pTargetEntityResultFields, pRecordIdFieldToIgnore, pRecordIdValueToIgnore, pFormatValuesConsumeWebserviceCallback, pUseExternalWebservice) +pResultFields, pRecordIdFieldToIgnore, pRecordIdValueToIgnore, pFormatValuesConsumeWebserviceCallback) { - let possibleDuplicates = []; - let ignoredRecordFilter = _DuplicateScannerUtils._getIgnoreRecordFilter(pRecordIdFieldToIgnore, pRecordIdValueToIgnore, pTargetEntity); - let configuredFilters = _DuplicateScannerUtils._loadFilters(pFilterName, pTargetEntity); - - configuredFilters = [ignoredRecordFilter].concat(configuredFilters); - - logging.log("Found filters -> " + configuredFilters); - - let preFilter = _DuplicateScannerUtils._applyPreFilter(pTargetEntity, configuredFilters, pFilterFieldValueRays); - - logging.log("preFilter welcher Elemente im erlaubten bereich ausgibt -> " + preFilter); - - if(preFilter == null) - return null; - - possibleDuplicates = _DuplicateScannerUtils._callIndexSearch(pTargetEntity, preFilter, pFilterFieldValueRays, pTargetEntityResultFields, 100); - possibleDuplicates = possibleDuplicates[indexsearch.HITS]; - - if(pUseExternalWebservice && possibleDuplicates.length > 0 && pFormatValuesConsumeWebserviceCallback != null) - possibleDuplicates = pFormatValuesConsumeWebserviceCallback.apply(this, [possibleDuplicates]); - - logging.log("pTargetEntity -> " + pTargetEntity); - logging.log("preFilter -> " + preFilter); - logging.log("pFilterFieldValueRays -> " + pFilterFieldValueRays); - logging.log("pRecordIdFieldToIgnore -> " + pRecordIdFieldToIgnore); - logging.log("possibleDuplicates -> " + possibleDuplicates); + let useExternalWebservice = _DuplicateScannerUtils._isUseExternalWebservice(pFilterName, pTargetEntity); - return possibleDuplicates; + return _DuplicateScannerUtils._scanForDuplicates(pFilterName, pTargetEntity, + pFilterFieldValueRays, pResultFields, pRecordIdFieldToIgnore, pRecordIdValueToIgnore, + pFormatValuesConsumeWebserviceCallback, useExternalWebservice) } /* - * Concatenates the fields with a semicolon as separated. This can be used in a sql select. + * Concatenates the fields with a semicolon as separator. This can be used in a sql select. * * @param {String[]} pIndexFieldsConfig Array of Names * @returns {String} String in the style of "Value1, Value2, Value3" @@ -439,9 +445,11 @@ DuplicateScannerUtils.BuildSqlSelectFieldsFromFieldConfig = function(pIndexField /* * Executes a indexsearch.lookupIndexField for eacht entity field in the parameterized array * and returns it as Map. + * * @param {String} pEntityName ...Name of the entity * @param {String[]} pEntityFields Array of the entities Fields to translate to index fields * @returns Map-like object where (key = entity field) and (value = index field) + * * @example * let entityResultFields = ["LASTNAME"]; * let entityIndexFields = DuplicateScannerUtils.TranslateEntityToIndexFields("Person_entity", entityResultFields); @@ -465,24 +473,13 @@ DuplicateScannerUtils.TranslateEntityToIndexFields = function(pEntityName, pEnti return entityIndexFields; } - - - - - - - - - - - - - - - - - - +/* + * + * + * @param {String} + * @param {String[]} + * @returns {String} + */ DuplicateScannerUtils.MergePerson = function(pSourceContactId, pTargetContactId) { let updateStatementsCurrentAlias = []; @@ -554,56 +551,33 @@ DuplicateScannerUtils.MergeOrganisation = function(pSourceContactId, pTargetCont return (affectedRowsCurrentAlias > 0 && deletedRows >= 2); } -function _DuplicateScannerUtils() {} - -var INDEX_FILTER_CONDITION = 0; -var INDEX_COUNT_CHARS_TO_USE = 1; -var INDEX_MAX_RESULTS_THRESHOLD = 2; - -var INDEX_TABLE_NAME = 0; -var INDEX_COLUMN_NAME = 1; -var INDEX_CONDITION = 2; - -_DuplicateScannerUtils._isUseExternalWebservice = function(pFilterName, pTargetEntity) -{ - let scannerUseExternalWebserviceQuery = SqlCondition.begin() - .andPrepare("DUPLICATESCANNER.FILTER_NAME", pFilterName) - .andPrepare("DUPLICATESCANNER.ENTITY_TO_SCAN_NAME", pTargetEntity) - .buildSql("select EXTERNAL_SERVICE_USAGE_ALLOWED from DUPLICATESCANNER" - , "1=2"); - - logging.log("scannerUseExternalWebserviceQuery -> " + scannerUseExternalWebserviceQuery); - let isUseWebservice = db.cell(scannerUseExternalWebserviceQuery); - return (isUseWebservice == 0) ? false : true; -} - -/* +/* * Creates an array of arrays containing the entity field name paired with it's value. - * + * * @param {[]} pDuplicateFieldsConfig An Array with the configured fields in the form of [DB_FIELD, ENTITY_FIELD, IS_ID, USE_FOR_SEARCH]. @see LoadDuplicateIndexFieldsConfiguration() - * @param {[]} pTargetRecordData One record containing the values for the configured fields. Has to be in the same order as the fields in the first parameter + * @param {[]} pTargetRecordData One record containing the values for the configured fields. Has to be in the same order as the fields in the first parameter * @return {[[]]} An array of arrays containing the entity field name an its value. [["CONTACTID", "d786045c-8b21-4f22-b6d9-72be9f61c04d"]] * @example * pDuplicateFieldsConfig * ["CONTACTID", "CONTACTID", true, false] * ["FIRSTNAME", "FIRSTNAME", false, true] - * + * * pTargetRecordData * ["d786045c-8b21-4f22-b6d9-72be9f61c04d", "PETER"] - * + * * => [["CONTACTID", "d786045c-8b21-4f22-b6d9-72be9f61c04d"], ["FIRSTNAME", "PETER"]] */ -_DuplicateScannerUtils._buildEntityFieldNameValueRays = function(pDuplicateFieldsConfig, pTargetRecordData) +DuplicateScannerUtils.BuildEntityFieldNameValueRays = function(pDuplicateFieldsConfig, pTargetRecordData) { let INDEX_CONFIG_ENTITY_FIELD = 1; let INDEX_CONFIG_USE_FOR_SEARCH = 3; let entityFieldValuesRay = []; - /* - * Based on the parameterized filter field names and the values loaded via the query, - * an array which contains records in the style of ["FilterFieldName", "FilterFieldValueFromQuery"] gets created. + /* + * Based on the parameterized filter field names and the values loaded via the query, + * an array which contains records in the style of ["FilterFieldName", "FilterFieldValueFromQuery"] gets created. * This is mandatory to run the scan for this record. */ - for (a = 0; a < pDuplicateFieldsConfig.length; a++) + for (a = 0; a < pDuplicateFieldsConfig.length; a++) { logging.log("pDuplicateFieldsConfig[a][1] -> " + pDuplicateFieldsConfig[a][INDEX_CONFIG_ENTITY_FIELD]); logging.log(" pTargetRecordData[a] -> " + pTargetRecordData[a]); @@ -613,13 +587,90 @@ _DuplicateScannerUtils._buildEntityFieldNameValueRays = function(pDuplicateField return entityFieldValuesRay; } +function _DuplicateScannerUtils() {} + +var INDEX_FILTER_CONDITION = 0; +var INDEX_COUNT_CHARS_TO_USE = 1; +var INDEX_MAX_RESULTS_THRESHOLD = 2; + +var INDEX_TABLE_NAME = 0; +var INDEX_COLUMN_NAME = 1; +var INDEX_CONDITION = 2; -//todo doc /* - * The pre filter is used to narrow the records to be searched by the duplicate scan service - * It loads the target entity and uses filters achieve this. + * @see DuplicateScannerUtils.ScanForDuplicates for the documentation + */ +_DuplicateScannerUtils._scanForDuplicates = function(pFilterName, pTargetEntity, pFilterFieldValueRays, +pResultFields, pRecordIdFieldToIgnore, pRecordIdValueToIgnore, pFormatValuesConsumeWebserviceCallback, pUseExternalWebservice) +{ + let possibleDuplicates = []; + let ignoredRecordFilter = _DuplicateScannerUtils._getIgnoreRecordFilter(pRecordIdFieldToIgnore, pRecordIdValueToIgnore, pTargetEntity); + let configuredFilters = _DuplicateScannerUtils._loadFilters(pFilterName, pTargetEntity); + + //To ensure the record which the current search is based on isnt found as result, the other configured filters get appended to + //the filter of said records to ignore + configuredFilters = [ignoredRecordFilter].concat(configuredFilters); + + logging.log("Found filters -> " + configuredFilters); + + let preFilter = _DuplicateScannerUtils._applyPreFilter(pTargetEntity, configuredFilters, pFilterFieldValueRays); + + logging.log("preFilter welcher Elemente im erlaubten bereich ausgibt -> " + preFilter); + + if(preFilter == null) + return null; + + possibleDuplicates = _DuplicateScannerUtils._callIndexSearch(pTargetEntity, preFilter, pFilterFieldValueRays, pResultFields, 100); + possibleDuplicates = possibleDuplicates[indexsearch.HITS]; + + if(pUseExternalWebservice && possibleDuplicates.length > 0 && pFormatValuesConsumeWebserviceCallback != null) + possibleDuplicates = pFormatValuesConsumeWebserviceCallback.apply(this, [possibleDuplicates]); + + logging.log("pTargetEntity -> " + pTargetEntity); + logging.log("preFilter -> " + preFilter); + logging.log("pFilterFieldValueRays -> " + pFilterFieldValueRays); + logging.log("pRecordIdFieldToIgnore -> " + pRecordIdFieldToIgnore); + logging.log("possibleDuplicates -> " + possibleDuplicates); + + return possibleDuplicates; +} + +/* + * Returns a bool which say wether or not an external service should be used + * + * @param {String} pFilterName Name of the filter + * @param {String} pTargetEntity Entity which has been configured + * @returns {Bool} True = use, False = no use + */ +_DuplicateScannerUtils._isUseExternalWebservice = function(pFilterName, pTargetEntity) +{ + let scannerUseExternalWebserviceQuery = SqlCondition.begin() + .andPrepare("DUPLICATESCANNER.FILTER_NAME", pFilterName) + .andPrepare("DUPLICATESCANNER.ENTITY_TO_SCAN_NAME", pTargetEntity) + .buildSql("select EXTERNAL_SERVICE_USAGE_ALLOWED from DUPLICATESCANNER" + , "1=2"); + + logging.log("scannerUseExternalWebserviceQuery -> " + scannerUseExternalWebserviceQuery); + let isUseWebservice = db.cell(scannerUseExternalWebserviceQuery); + return (isUseWebservice == 0) ? false : true; +} + +/* + * Applies... the configured prefilters. The pre filter is used to narrow the records to be searched by the duplicate scan service + * The filters get applied sequentially one after another. + * If the number of results equal or lower as the configured threshold and greater than zero, the current filter combination gets returned. + * If no more filters are available and the number of results still exceed the threshold, null gets returned. + * If the number of results reach zero while aplying filters, null gets returned + * The reason is, that if a huge count of records were to be used, the time to search for duplicates would be substantially longer. + * If the prefilters to not have the desired effect it is advised to modify the configured filter conditions + * + * @param {String} pTargetEntity Entity which has been configured + * @param {String[[]]} pFilterCountCharactersToUseRay Array of Arrays containing the configuration of the filters. + * The structure is as follows: [INDEX_FILTER_CONDITION, INDEX_COUNT_CHARS_TO_USE, INDEX_MAX_RESULTS_THRESHOLD] + * @param {String[[]]} pFilterFieldValueRays Array of Arrays containing the name of a used field and its value. + * Use "DuplicateScannerUtils.BuildEntityFieldNameValueRays". The fieldnames and values have to be in the same order + * @returns {String} Null if the records count wasnt inside the threshold, the used combined filter to achieve a successfull prefiltering */ -//only returns null if the number of results exceeds the allowed count _DuplicateScannerUtils._applyPreFilter = function(pTargetEntity, pFilterCountCharactersToUseRay, pFilterFieldValueRays) { var combinedFilter = {}; @@ -635,9 +686,6 @@ _DuplicateScannerUtils._applyPreFilter = function(pTargetEntity, pFilterCountCha filter = JSON.parse(filter); -// logging.log("countCharsOfValueToUse -> " + countCharsOfValueToUse); -// logging.log("maxResultsThreshold -> " + maxResultsThreshold); - let filterValuesObject = {}; for (a = 0; a < pFilterFieldValueRays.length; a++) { @@ -653,8 +701,6 @@ _DuplicateScannerUtils._applyPreFilter = function(pTargetEntity, pFilterCountCha filter.filter.childs = _DuplicateScannerUtils._insertValuesInFilterTemplate(filter.filter.childs, filterValuesObject, countCharsOfValueToUse); logging.log("2 ###### gefüllter filter prefilter index-> " + JSON.stringify(filter)); - - if(i == 0) combinedFilter = filter.filter; else @@ -670,9 +716,6 @@ _DuplicateScannerUtils._applyPreFilter = function(pTargetEntity, pFilterCountCha logging.log("5 ###### combinedFilter + gefüllter filter vor indexsuche-> " + JSON.stringify(combinedFilter)); -// let completeFilter = JSON.parse(filter); -// completeFilter.filter = combinedFilter; - filter.filter = combinedFilter; logging.log("6 ###### completeFilter -> " + JSON.stringify(filter)); @@ -699,8 +742,7 @@ _DuplicateScannerUtils._applyPreFilter = function(pTargetEntity, pFilterCountCha } let totalHits = searchResult[indexsearch.TOTALHITS] - - + logging.log("totalHits -> " + totalHits); if(totalHits > maxResultsThreshold) @@ -724,6 +766,19 @@ _DuplicateScannerUtils._applyPreFilter = function(pTargetEntity, pFilterCountCha return null; } +/* + * Runs the indexsearch based on the given parameters. + * If the "pEntityFieldValueRays" is empty, only the prefilters get applied as pattern. + * if not, the prefilters will be applies as pattern and the contents of "pEntityFieldValueRays" get applies as filter. + * + * @param {String} pTargetEntity Entity which has been configured + * @param {String} pPreFilterJson The prefilters + * @param {String[[]]} pEntityFieldValueRays Array of Arrays containing the name of a used field and its value. + * Use "DuplicateScannerUtils.BuildEntityFieldNameValueRays". The fieldnames and values have to be in the same order + * @param {String} pResultFields The result field config. Use "DuplicateScannerUtils.LoadResultFields" + * @param {String} pResultSetRows todo + * @returns {[["key", "value"]]} Array of Key-Value-Pairs based on the configured pResultFields + */ _DuplicateScannerUtils._callIndexSearch = function(pTargetEntity, pPreFilterJson, pEntityFieldValueRays, pResultFields, pResultSetRows) { let parsedFilterAsPatternTerm = indexsearch.buildQueryFromSearchCondition(pPreFilterJson); @@ -753,6 +808,15 @@ _DuplicateScannerUtils._callIndexSearch = function(pTargetEntity, pPreFilterJson return indexsearch.searchIndex(indexQuery); } +/* + * Sets each field of the given array as resultfield on the given query. + * Supports indexsearch internal fields aswell + * (indexsearch.FIELD_ID, indexsearch.FIELD_TITLE, indexsearch.FIELD_TYPE, indexsearch.FIELD_DESCRIPTION) + * + * @param {IndexQuery} pIndexQuery An indexquery created with "indexsearch.createIndexQuery()" + * @param {String[]} pResultFields The result field config. Use "DuplicateScannerUtils.LoadResultFields" + * @returns {IndexQuery} IndexQuery with the resultfields set + */ _DuplicateScannerUtils._setResultFields = function(pIndexQuery, pResultFields) { logging.log("pResultFields.length -> " + pResultFields.length); @@ -788,6 +852,14 @@ _DuplicateScannerUtils._setResultFields = function(pIndexQuery, pResultFields) return pIndexQuery; } +/* + * Translates the fields and the respective value which are configured to use for duplicates to a pattern config to use with + * an index search query. + * + * @param {String[[]]} pEntityFieldValueRays Array of Arrays containing the name of a used field and its value. + * @param {String} pTargetEntity Entity which has been configured + * @returns {PatternConfig} PatternConfig created with "indexsearch.createPatternConfig()" + */ _DuplicateScannerUtils._buildFilterPatternConfig = function(pEntityFieldValueRays, pTargetEntity) { //The index to get the fields value for USE_FOR_SEARCH @@ -823,6 +895,13 @@ _DuplicateScannerUtils._buildFilterPatternConfig = function(pEntityFieldValueRay return filterPatternConfig; } +/* + * Creates the queries to insert new duplicates into a new cluster based on the pDuplicatesRay + * + * @param {String[]} pDuplicatesRay Array of duplicate ids + * @param {String} pTargetEntity Entity which has been configured + * @returns {String[]} Array of insert queries + */ _DuplicateScannerUtils._createInsertDuplicatesClusterQuery = function (pDuplicatesRay, pTargetEntity) { let duplicatesToInsertQueries = []; @@ -839,9 +918,14 @@ _DuplicateScannerUtils._createInsertDuplicatesClusterQuery = function (pDuplicat return duplicatesToInsertQueries; } +/* + * Deletes all clusters + * + * @returns {String} Count of records deleted + */ _DuplicateScannerUtils._deleteDuplicateClusters = function () { - db.deleteData("DUPLICATECLUSTERS"); + return db.deleteData("DUPLICATECLUSTERS"); } /* @@ -850,18 +934,13 @@ _DuplicateScannerUtils._deleteDuplicateClusters = function () * Also if they already are in the same "group" those records shouldn't be updated because it would lead to the same outcome. * * Mandatory: All records ignored for the time being have to be deleted aswell! See #_DuplicateScannerUtils._buildRemoveObsoleteParticipantsRecordsDeleteQuery - * + * @returns {String[]} Query to update records */ _DuplicateScannerUtils._buildUpdateAttachParticipantsToNewContactQuery = function (pTableName, pContactIdColumn, pAssignableIdColumn, pSourceContactId, pTargetContactId, updateStatements) { var selectAssignableIdsOfTargetContactQuery = "select " + pAssignableIdColumn + " from " + pTableName + " where " + pContactIdColumn + " = '" + pTargetContactId + "'"; - -//+ " ( select tab." + pAssignableIdColumn + " from (" + selectAssignableIdsOfTargetContactQuery + ")tab ) " -//+ " (select tab." + pAssignableIdColumn + " from (" + selectAssignableIdsOfTargetContactQuery + ")tab ) " - - let updateCondition = pAssignableIdColumn + " not in" + " (" + selectAssignableIdsOfTargetContactQuery + ")" @@ -870,6 +949,7 @@ _DuplicateScannerUtils._buildUpdateAttachParticipantsToNewContactQuery = functio return [[pTableName, [pContactIdColumn], null, [pTargetContactId], updateCondition]]; } + _DuplicateScannerUtils._buildDeleteRemoveObsoleteParticipantsRecordsQuery = function (pTableName, pContactIdColumn, pAssignableIdColumn, pSourceContactId, pTargetContactId, updateStatements) { var selectAssignableIdsOfTargetContactQuery = "select " + pAssignableIdColumn @@ -884,6 +964,12 @@ _DuplicateScannerUtils._buildDeleteRemoveObsoleteParticipantsRecordsQuery = func return recordsToDelete; } + +/* + * Creates Queries to delete from the PERSON and CONTACT table. + * + * @returns {String[]} Queries to delete + */ _DuplicateScannerUtils._buildDeletePersonAndContactQuery = function(pSourcePersonId, pSourceContactId) { let recordsToDelete = [] @@ -908,6 +994,14 @@ _DuplicateScannerUtils._buildDeleteCachedUnrelatedDuplicateQuery = function(pSou return recordsToDelete; } +/* + * Creates a filter JSON which excludes the field and it's value using the operator "NOT_EQUAL" + * + * @param {String} pRecordIdFieldToIgnore Field to be ignored + * @param {String} pRecordIdValueToIgnore The fields value + * @param {String} pTargetEntity Entity which has been configured + * @returns {String[]} Array where 0 = filter, 1 = null(INDEX_COUNT_CHARS_TO_USE), 2 = null(INDEX_MAX_RESULTS_THRESHOLD) + */ _DuplicateScannerUtils._getIgnoreRecordFilter = function(pRecordIdFieldToIgnore, pRecordIdValueToIgnore, pTargetEntity) { let ignoreFilterJson = JSON.stringify({"entity":pTargetEntity,"filter":{"type":"group","operator":"AND","childs":[{"type":"row","name":pRecordIdFieldToIgnore,"operator":"NOT_EQUAL","value":pRecordIdValueToIgnore,"key":"","contenttype":"TEXT"}]}}); @@ -942,6 +1036,13 @@ _DuplicateScannerUtils._buildStatement = function(pTableinfos, pSourceContactId, return [tableName, [columnName], null, [pTargetContactId], condition]; } +/* + * Contains all Tables and their fields which may contain the contact id to be replaced + * + * @param {String} + * @param {String[]} + * @returns {String} + */ _DuplicateScannerUtils._getMergeUpdateTableInfosCurrentAlias = function(pSourceContactId, pTargetContactId) { var tableInfos = new Array(); @@ -989,36 +1090,14 @@ _DuplicateScannerUtils._getMergeUpdateTableInfosSystemAlias = function(pSourceCo return tableInfos; } -_DuplicateScannerUtils._isExternalScanServiceAvailable = function() -{ - //todo check if a webservice has been configured - //todo can there be a different webservice for each type? -} - -_DuplicateScannerUtils._callExternalScanService = function(pFormattedJsonValues) -{ - let serviceConfig = _DuplicateScannerUtils._getExternalServiceConfiguration(); - let serviceUrl = ""; - let actionType = ""; - let queryParameter = ""; -// let serviceUrl = ""; -// let serviceUrl = ""; -// let serviceUrl = ""; -// let serviceUrl = ""; -// let serviceUrl = ""; -// let serviceUrl = ""; -// let serviceUrl = ""; -// let serviceUrl = ""; -// let serviceUrl = ""; -// -// net.callRestWebserviceBasicAuth(serviceUrl, actionType, pQueryParams, pRequestEntity, pHeaders, pDatatypeAccept, pDataTypeSend, pDatatypeJDitoAccept, pDatatypeJDitoSend, pUser, pPassword, pReturnExtendedResponseInfos); -} - -_DuplicateScannerUtils._getExternalServiceConfiguration = function(pFormattedJsonValues) -{ - //todo load config and call ws -} - +/* + * Loads all filters for the requested scanner in the format of: + * [CONDITION, COUNT_CHARACTERS_TO_USE, MAX_RESULTS_THRESHOLD] + * + * @param {String} pFilterName Name of the filter + * @param {String} pTargetEntity Entity which has been configured + * @returns {String[[]]} Array of arrays in the format [CONDITION, COUNT_CHARACTERS_TO_USE, MAX_RESULTS_THRESHOLD] + */ _DuplicateScannerUtils._loadFilters = function(pFilterName, pTargetEntity) { let query = "select \"CONDITION\", COUNT_CHARACTERS_TO_USE, MAX_RESULTS_THRESHOLD from DUPLICATESCANNERPREFILTERCONFIG" @@ -1028,47 +1107,43 @@ _DuplicateScannerUtils._loadFilters = function(pFilterName, pTargetEntity) return db.table(query); } +/* + * Starts at the pJsonRootNode and recursively traverses thru the jsons "row" and "group" nodes to fill the fields values based on + * the values in pEntitiyFieldAndValueMap. + * + * @param {JSON} pJsonRootNode A filter Json + * @param {Map{"key", "value"}}} pEntitiyFieldAndValueMap {"key", "value"} + * @returns {JSON} The JSON with filled values based on pEntitiyFieldAndValueMap and pCountCharsOfValueToUse + */ _DuplicateScannerUtils._insertValuesInFilterTemplate = function(pJsonRootNode, pEntitiyFieldAndValueMap, pCountCharsOfValueToUse) { for(var filterChildNode in pJsonRootNode) { var currentNode = pJsonRootNode[filterChildNode]; -// logging.log("currentNode -> " + JSON.stringify(currentNode)); if(currentNode.type == "row") { - let fieldName = currentNode.name; let fieldValue = pEntitiyFieldAndValueMap[fieldName]; pCountCharsOfValueToUse = parseInt(pCountCharsOfValueToUse, 10); -// logging.log("fieldValue -> " + JSON.stringify(pEntitiyFieldAndValueMap)); if(fieldValue == null) { logging.show("Duplicate Scan: Requested value for field " + fieldName + " not present in the provided valueslist"); continue; } - -// logging.log("pEntitiyFieldAndValueMap -> " + JSON.stringify(pEntitiyFieldAndValueMap)); -// logging.log("fieldName -> " + fieldName); -// logging.log("fieldValue -> " + fieldValue); -// logging.log("fieldValue.length -> " + fieldValue.length); -// logging.log("pCountCharsOfValueToUse -> " + pCountCharsOfValueToUse); - if(_DuplicateScannerUtils._isNotNullAndANumber(pCountCharsOfValueToUse) + if(_DuplicateScannerUtils._isNotNullAndANumber(pCountCharsOfValueToUse) && _DuplicateScannerUtils._isValueLongerThanCharsToUse(fieldValue.length, pCountCharsOfValueToUse)) { fieldValue = fieldValue.substring(0, pCountCharsOfValueToUse); -// logging.log("fieldValue geschnitten -> " + fieldValue); } pJsonRootNode[filterChildNode].value = fieldValue; } else { - //currentNode.type == "group" -// logging.log("type == group -> "); + //Type of curren node is "group", now run thru it's nodes let populatedChildNodes = _DuplicateScannerUtils._insertValuesInFilterTemplate(currentNode.childs, pEntitiyFieldAndValueMap, pCountCharsOfValueToUse); -// logging.log("populatedChildNodes -> " + JSON.stringify(populatedChildNodes)); pJsonRootNode[filterChildNode].childs = populatedChildNodes; } } @@ -1087,6 +1162,12 @@ _DuplicateScannerUtils._isValueLongerThanCharsToUse = function(pValueLength, pCo && pValueLength > pCountCharsOfValueToUse; } +/* + * Returns wether or not the parameter isnt null and a number or not + * + * @param {String} pCountCharsOfValueToUse Hopefully a number + * @returns {String} True if parameter isnt null and a number, False if it's null or no number + */ _DuplicateScannerUtils._isNotNullAndANumber = function(pCountCharsOfValueToUse) { return pCountCharsOfValueToUse != null && !isNaN(pCountCharsOfValueToUse); -- GitLab