diff --git a/process/DuplicateScanner_lib/process.js b/process/DuplicateScanner_lib/process.js index 3759c2a2ad3e33f07ef855100735975ee133ac03..4bd19f78e4033b5135b0d849c4cfa7c1a684b89b 100644 --- a/process/DuplicateScanner_lib/process.js +++ b/process/DuplicateScanner_lib/process.js @@ -278,7 +278,7 @@ DuplicateScannerUtils.GetCachedDuplicatesForClusterId = function(pClusterId) * { * let entityFieldName = resultFields[b]; * let indexFieldName = indexResultFields[entityFieldName]; - * //logging.log("Entity Field -> "+ pPossibleDuplicatesRay[i][indexFieldName]); + * ////logging.log("Entity Field -> "+ pPossibleDuplicatesRay[i][indexFieldName]); * //format values * } * } @@ -303,19 +303,19 @@ DuplicateScannerUtils.RebuildDuplicatesCache = function(pFilterName, pTargetEnti let entityFields = _DuplicateScannerUtils._loadEntityFieldsFromPattern(indexPattern); let entityIdField = _DuplicateScannerUtils._loadEntityIdField(pFilterName, pTargetEntity); - logging.log("indexPattern -> " +indexPattern ); + //logging.log("indexPattern -> " +indexPattern ); - logging.log("entityIdField -> " + entityIdField); + //logging.log("entityIdField -> " + entityIdField); //Add the id field to ensure that it's on the first position entityFields = [entityIdField].concat(entityFields); - logging.log("entityFields -> " + entityFields); + //logging.log("entityFields -> " + entityFields); let alreadyIdentifiedIds = []; let targetRecords = DuplicateScannerUtils.LoadEntityRecords(pTargetEntity, entityFields, 0, pRecordsBlockSize); - //logging.log("Initialer Block geladen targetRecords-> " + JSON.stringify(targetRecords)); + ////logging.log("Initialer Block geladen targetRecords-> " + JSON.stringify(targetRecords)); let currentRecordIndex = pRecordsBlockSize; while(targetRecords.length > 0) @@ -323,29 +323,29 @@ DuplicateScannerUtils.RebuildDuplicatesCache = function(pFilterName, pTargetEnti foundDuplicateIds = DuplicateScannerUtils.ScanRecords(pTargetEntity, targetRecords, entityFields, resultFields, useExternalWebservice, pFormatValuesConsumeWebserviceCallback, alreadyIdentifiedIds, indexPattern); - //logging.log("gefundene ids zum ignorieren foundDuplicateIds -> " + foundDuplicateIds); + ////logging.log("gefundene ids zum ignorieren foundDuplicateIds -> " + foundDuplicateIds); alreadyIdentifiedIds = alreadyIdentifiedIds.concat(foundDuplicateIds); - //logging.log("Gesamte ignorierListe -> " + alreadyIdentifiedIds); + ////logging.log("Gesamte ignorierListe -> " + alreadyIdentifiedIds); if(targetRecords.length < pRecordsBlockSize) { - logging.log("weniger records geladen als möglich => Ende der verfügbaren Records -> Abbrechen"); + //logging.log("weniger records geladen als möglich => Ende der verfügbaren Records -> Abbrechen"); break; } //logging.log("Nächster Block wird geladen mit startRow -> " + currentRecordIndex); - targetRecords = DuplicateScannerUtils.LoadEntityRecords(pTargetEntity, entityFieldsToLoad, + targetRecords = DuplicateScannerUtils.LoadEntityRecords(pTargetEntity, entityFields, currentRecordIndex, pRecordsBlockSize); currentRecordIndex += pRecordsBlockSize; - //logging.log("Nächster Block geladen mit targetRecords -> " + JSON.stringify(targetRecords)); + ////logging.log("Nächster Block geladen mit targetRecords -> " + JSON.stringify(targetRecords)); } } DuplicateScannerUtils.ScanRecords = function(pTargetEntity, pTargetRecordsData, pEntityFields, pResultFields, pUseExternalWebservice, pFormatValuesConsumeWebserviceCallback, pAlreadyIdentifiedIds, pIndexPattern) { - //logging.log("in ScanRecords -> "); + ////logging.log("in ScanRecords -> "); let foundDuplicateIds = []; //If the contact id loading query has no results, stop. @@ -355,27 +355,27 @@ DuplicateScannerUtils.ScanRecords = function(pTargetEntity, pTargetRecordsData, //First it gets checked if the current id has already been identified. If that's the case it'll continue with the next. //Otherwise an object gets build in the form of ["FilterFieldName" = "FilterFieldValueFromQuery"] with which a scan for possible duplicates get's started - //logging.log("configured pResultFields -> " + pResultFields); + ////logging.log("configured pResultFields -> " + pResultFields); var duplicatesToInsertQueries = []; for (b = 0; b < pTargetRecordsData.length; b++) { - //logging.log("Nächster Datensatz in RebuildDuplicatesCache -> " + b); - // logging.log("Neuer Record -> " + pTargetRecordsData[b]); + ////logging.log("Nächster Datensatz in RebuildDuplicatesCache -> " + b); + // //logging.log("Neuer Record -> " + pTargetRecordsData[b]); - //logging.log("pTargetRecordsData[b] -> " + JSON.stringify(pTargetRecordsData[b])); + ////logging.log("pTargetRecordsData[b] -> " + JSON.stringify(pTargetRecordsData[b])); let entityFieldValuesRay = DuplicateScannerUtils.BuildEntityFieldNameValueRays(pEntityFields, pTargetRecordsData[b]); - logging.log("Werte des Datensatzes entityFieldValuesRay -> " + JSON.stringify(entityFieldValuesRay)); + //logging.log("Werte des Datensatzes entityFieldValuesRay -> " + JSON.stringify(entityFieldValuesRay)); //The first field in this Array must always be the configured id field. This is ensured using onValidation-logic let idField = entityFieldValuesRay[0][0]; let idValue = entityFieldValuesRay[0][1]; - logging.log("idField -> " + idField); - logging.log("idValue -> " + idValue); - //logging.log("pTargetRecordsData[b][idField] -> " + pTargetRecordsData[b][idField]); + //logging.log("idField -> " + idField); + //logging.log("idValue -> " + idValue); + ////logging.log("pTargetRecordsData[b][idField] -> " + pTargetRecordsData[b][idField]); //If the current Id has already been identified, continue if(pAlreadyIdentifiedIds.indexOf(pTargetRecordsData[b][idField]) > -1) { - //logging.log("Id schon behandelt, continue; -> "); + ////logging.log("Id schon behandelt, continue; -> "); continue; } @@ -384,17 +384,17 @@ DuplicateScannerUtils.ScanRecords = function(pTargetEntity, pTargetRecordsData, if(foundDuplicates == null || foundDuplicates.length == 0) { - logging.log("Keine Datensätze gefunden continue;-> "); + //logging.log("Keine Datensätze gefunden continue;-> "); continue; } - logging.log("foundDuplicates.length nach _scanForDuplicates -> " + foundDuplicates.length); + //logging.log("foundDuplicates.length nach _scanForDuplicates -> " + foundDuplicates.length); //Insert all found duplicate ids into an cache array because those ids don't have to be checked again lateron. for (let i = 0; i < foundDuplicates.length; i++) { let localId = foundDuplicates[i][indexsearch.FIELD_ID]; - logging.log("foundDuplicates[i] -> " + foundDuplicates[i]); + //logging.log("foundDuplicates[i] -> " + foundDuplicates[i]); foundDuplicateIds.push(localId); } @@ -402,14 +402,14 @@ DuplicateScannerUtils.ScanRecords = function(pTargetEntity, pTargetRecordsData, //The duplicates list contains only the found duplicates to the original id, therefore it get's added manually foundDuplicateIds.push(pTargetRecordsData[b][idField]); - logging.log("foundDuplicates -> " + JSON.stringify(foundDuplicates)); - logging.log("foundDuplicateIds -> " + JSON.stringify(foundDuplicateIds)); + //logging.log("foundDuplicates -> " + JSON.stringify(foundDuplicates)); + //logging.log("foundDuplicateIds -> " + JSON.stringify(foundDuplicateIds)); let insertQueriesRay = _DuplicateScannerUtils._createInsertDuplicatesClusterQuery(foundDuplicateIds, pTargetEntity) duplicatesToInsertQueries = duplicatesToInsertQueries.concat(insertQueriesRay); foundDuplicateIds = []; } - logging.log("duplicatesToInsertQueries -> " + JSON.stringify(duplicatesToInsertQueries)); + //logging.log("duplicatesToInsertQueries -> " + JSON.stringify(duplicatesToInsertQueries)); db.inserts(duplicatesToInsertQueries, db.getCurrentAlias(), 10 * datetime.ONE_MINUTE); return foundDuplicateIds; } @@ -447,7 +447,7 @@ DuplicateScannerUtils.CacheNewScanResults = function(pNewRecordId, pDuplicateIds else idRayToInsert.push(pNewRecordId); - //logging.log("idRayToInsert -> " + idRayToInsert); + ////logging.log("idRayToInsert -> " + idRayToInsert); insertQueriesRay = _DuplicateScannerUtils._createInsertDuplicatesClusterQuery(idRayToInsert, pTargetEntity, clusterId) return db.inserts(insertQueriesRay); @@ -477,7 +477,7 @@ DuplicateScannerUtils.GetClusterWithIdenticalDuplicates = function(pDuplicateIds clusterIdCondition = clusterIdConditionBuilder.buildSql("select distinct CLUSTERID from DUPLICATECLUSTERS", "1=2"); let foundClusterId = db.cell(clusterIdCondition); - //logging.log("clusterid des clusters der die gleichen ids enthält-> " + foundClusterId); + ////logging.log("clusterid des clusters der die gleichen ids enthält-> " + foundClusterId); if(foundClusterId == null || foundClusterId == "") return RESULT_NO_CLUSTER_FOUND; @@ -485,8 +485,8 @@ DuplicateScannerUtils.GetClusterWithIdenticalDuplicates = function(pDuplicateIds .andPrepare("DUPLICATECLUSTERS.CLUSTERID", foundClusterId) .buildSql("select DUPLICATEID from DUPLICATECLUSTERS"); let duplicatesInCluster = db.array(db.COLUMN, duplicatesInClusterCondition); - //logging.log("länge gefundener cluster -> " + duplicatesInCluster.length); - //logging.log("länge der angefragten ids -> " + pDuplicateIds.length); + ////logging.log("länge gefundener cluster -> " + duplicatesInCluster.length); + ////logging.log("länge der angefragten ids -> " + pDuplicateIds.length); /* * A cluster has been searched which contains all duplicate ids as specified via parameter. * There's the possibility that this cluster contains even more duplicates than specified via the parameter. @@ -593,7 +593,7 @@ DuplicateScannerUtils.LoadIndexFieldsConfiguration = function(pFilterName, pTarg * { * let entityFieldName = resultFields[b]; * let indexFieldName = indexResultFields[entityFieldName]; - * //logging.log("Entity Field -> "+ pPossibleDuplicatesRay[i][indexFieldName]); + * ////logging.log("Entity Field -> "+ pPossibleDuplicatesRay[i][indexFieldName]); * //format values * } * } @@ -622,7 +622,7 @@ DuplicateScannerUtils.ScanForDuplicates = function(pFilterName, pTargetEntity, p let entityFieldValuesRay = DuplicateScannerUtils.BuildEntityFieldNameValueRays(entityFields, pValuesToCheck); //The first field in this Array must always be the configured id field. - logging.log("ray " + entityFieldValuesRay.toSource()) + //logging.log("ray " + entityFieldValuesRay.toSource()) let idField = entityFieldValuesRay[0][0]; let idValue = entityFieldValuesRay[0][1]; @@ -643,7 +643,7 @@ DuplicateScannerUtils.ScanForDuplicates = function(pFilterName, pTargetEntity, p * let entityResultFields = ["LASTNAME"]; * let entityIndexFields = DuplicateScannerUtils.TranslateEntityToIndexFields("Person_entity", entityResultFields); * - * logging.log(entityIndexFields["LASTNAME"]);//=> "LASTNAME_value" + * //logging.log(entityIndexFields["LASTNAME"]);//=> "LASTNAME_value" */ DuplicateScannerUtils.TranslateEntityToIndexFields = function(pEntityName, pEntityFields) { @@ -697,15 +697,15 @@ DuplicateScannerUtils.MergePerson = function(pSourceContactId, pTargetContactId) deleteStatements = deleteStatements.concat(_DuplicateScannerUtils._buildDeletePersonAndContactQuery(sourcePersonId, pSourceContactId)); deleteStatements = deleteStatements.concat(_DuplicateScannerUtils._buildDeleteCachedUnrelatedDuplicateQuery(pSourceContactId)); - //logging.log("updateStatementsCurrentAlias -> " + JSON.stringify(updateStatementsCurrentAlias)); - //logging.log("deleteStatements -> " + JSON.stringify(deleteStatements)); + ////logging.log("updateStatementsCurrentAlias -> " + JSON.stringify(updateStatementsCurrentAlias)); + ////logging.log("deleteStatements -> " + JSON.stringify(deleteStatements)); let affectedRowsCurrentAlias = db.updates(updateStatementsCurrentAlias); let affectedRowsSystemAlias = db.updates(updateStatementsSystemAlias, SqlUtils.getSystemAlias()); let deletedRows = db.deletes(deleteStatements) - //logging.log("affectedRowsCurrentAlias -> " + affectedRowsCurrentAlias); - //logging.log("deletedRows -> " + deletedRows); + ////logging.log("affectedRowsCurrentAlias -> " + affectedRowsCurrentAlias); + ////logging.log("deletedRows -> " + deletedRows); DuplicateScannerUtils.DeleteCachedDuplicate(pSourceContactId); @@ -750,15 +750,15 @@ DuplicateScannerUtils.MergeOrganisation = function(pSourceContactId, pTargetCont deleteStatements = deleteStatements.concat(_DuplicateScannerUtils._buildDeleteOrganisationAndContactQuery(sourceOrganisationId, pSourceContactId)); deleteStatements = deleteStatements.concat(_DuplicateScannerUtils._buildDeleteCachedUnrelatedDuplicateQuery(pSourceContactId)); - //logging.log("updateStatementsCurrentAlias -> " + JSON.stringify(updateStatementsCurrentAlias)); - //logging.log("deleteStatements -> " + JSON.stringify(deleteStatements)); + ////logging.log("updateStatementsCurrentAlias -> " + JSON.stringify(updateStatementsCurrentAlias)); + ////logging.log("deleteStatements -> " + JSON.stringify(deleteStatements)); let affectedRowsCurrentAlias = db.updates(updateStatementsCurrentAlias); let affectedRowsSystemAlias = db.updates(updateStatementsSystemAlias, SqlUtils.getSystemAlias()); let deletedRows = db.deletes(deleteStatements) - //logging.log("affectedRowsCurrentAlias -> " + affectedRowsCurrentAlias); - //logging.log("deletedRows -> " + deletedRows); + ////logging.log("affectedRowsCurrentAlias -> " + affectedRowsCurrentAlias); + ////logging.log("deletedRows -> " + deletedRows); DuplicateScannerUtils.DeleteCachedDuplicate(pSourceContactId); @@ -792,8 +792,8 @@ DuplicateScannerUtils.BuildEntityFieldNameValueRays = function(pDuplicateFieldsC */ for (a = 0; a < pDuplicateFieldsConfig.length; a++) { - // logging.log("pDuplicateFieldsConfig[a][1] -> " + pDuplicateFieldsConfig[a][INDEX_CONFIG_ENTITY_FIELD]); - // logging.log(" pTargetRecordData[a] -> " + pTargetRecordData[a]); + // //logging.log("pDuplicateFieldsConfig[a][1] -> " + pDuplicateFieldsConfig[a][INDEX_CONFIG_ENTITY_FIELD]); + // //logging.log(" pTargetRecordData[a] -> " + pTargetRecordData[a]); let entityField = pDuplicateFieldsConfig[a]; let entityFieldValue = pTargetRecordData[entityField]; @@ -840,8 +840,11 @@ _DuplicateScannerUtils._loadIndexPattern = function(pScannerName, pTargetEntity) .and("DUPLICATESCANNER.ENTITY_TO_SCAN_NAME = '" + pTargetEntity + "'") .and("DUPLICATESCANNER.FILTER_NAME = '" + pScannerName + "'") .buildSql("select SCAN_PATTERN from DUPLICATESCANNER"); - - return db.cell(scanPatternQuery); + + let scanPattern = db.cell(scanPatternQuery); + scanPattern = scanPattern.trim(); + + return scanPattern; } _DuplicateScannerUtils._loadEntityFieldsFromPattern = function(indexPattern) @@ -849,7 +852,7 @@ _DuplicateScannerUtils._loadEntityFieldsFromPattern = function(indexPattern) return indexPattern.match(/[^{}]+(?=\})/g); } -_DuplicateScannerUtils._replacePlaceholderForValuesInPattern = function(pIndexPattern, pEntityFieldValueRays) +_DuplicateScannerUtils._replacePlaceholderForValuesInPattern = function(pIndexPattern, pEntityFieldValueRays, pDefaultValue) { let INDEX_ENTITY_FIELD_NAME = 0; let INDEX_ENTITY_FIELD_VALUE = 1; @@ -861,8 +864,11 @@ _DuplicateScannerUtils._replacePlaceholderForValuesInPattern = function(pIndexPa placeholder = "{" + pEntityFieldValueRays[i][INDEX_ENTITY_FIELD_NAME] + "}"; fieldValue = pEntityFieldValueRays[i][INDEX_ENTITY_FIELD_VALUE]; - logging.log("placeholder -> " + placeholder); - logging.log("fieldValue -> " + fieldValue); + if(pDefaultValue != null && (fieldValue == null || fieldValue == "")) + fieldValue = pDefaultValue; + + //logging.log("placeholder -> " + placeholder); + //logging.log("fieldValue -> " + fieldValue); pIndexPattern = pIndexPattern.replace(placeholder, fieldValue); } return pIndexPattern; @@ -890,14 +896,14 @@ pResultFields, pRecordIdFieldToIgnore, pRecordIdValueToIgnore, pFormatValuesCons let possibleDuplicates = []; let ignoreSourceRecordPattern = _DuplicateScannerUtils._getIgnoreSourceRecordPattern(pRecordIdFieldToIgnore, pRecordIdValueToIgnore); - logging.log("ignoreSourceRecordPattern -> " + ignoreSourceRecordPattern); - let indexPatternWithValues = _DuplicateScannerUtils._replacePlaceholderForValuesInPattern(pIndexPattern, pEntityFieldNameValueRays); - logging.log("indexPatternWithValues -> " + indexPatternWithValues); + //logging.log("ignoreSourceRecordPattern -> " + ignoreSourceRecordPattern); + let indexPatternWithValues = _DuplicateScannerUtils._replacePlaceholderForValuesInPattern(pIndexPattern, pEntityFieldNameValueRays, "*"); + //logging.log("indexPatternWithValues -> " + indexPatternWithValues); indexPatternWithValues = ignoreSourceRecordPattern + indexPatternWithValues + ")"; - logging.log("indexPatternWithValues -> " + indexPatternWithValues); + //logging.log("indexPatternWithValues -> " + indexPatternWithValues); possibleDuplicates = _DuplicateScannerUtils._callIndexSearch(pTargetEntity, indexPatternWithValues, pResultFields, 100); - logging.log("possibleDuplicates -> " + JSON.stringify(possibleDuplicates)); + //logging.log("possibleDuplicates -> " + JSON.stringify(possibleDuplicates)); if(possibleDuplicates == null) return null; @@ -907,11 +913,11 @@ pResultFields, pRecordIdFieldToIgnore, pRecordIdValueToIgnore, pFormatValuesCons if(pUseExternalWebservice && pFormatValuesConsumeWebserviceCallback != null) possibleDuplicates = pFormatValuesConsumeWebserviceCallback.apply(this, [possibleDuplicates]); -// //logging.log("pTargetEntity -> " + pTargetEntity); -// //logging.log("preFilter -> " + preFilter); -// //logging.log("pFilterFieldValueRays -> " + pFilterFieldValueRays); -// //logging.log("pRecordIdFieldToIgnore -> " + pRecordIdFieldToIgnore); -// //logging.log("possibleDuplicates -> " + possibleDuplicates); +// ////logging.log("pTargetEntity -> " + pTargetEntity); +// ////logging.log("preFilter -> " + preFilter); +// ////logging.log("pFilterFieldValueRays -> " + pFilterFieldValueRays); +// ////logging.log("pRecordIdFieldToIgnore -> " + pRecordIdFieldToIgnore); +// ////logging.log("possibleDuplicates -> " + possibleDuplicates); return possibleDuplicates; } @@ -947,6 +953,7 @@ _DuplicateScannerUtils._isUseExternalWebservice = function(pFilterName, pTargetE */ _DuplicateScannerUtils._callIndexSearch = function(pTargetEntity, pIndexPatternWithValues, pResultFields, pResultSetRows) { + logging.log("pIndexPatternWithValues -> " + pIndexPatternWithValues); //The indexPattern can't be null because it is required to run the search. if(pIndexPatternWithValues == null || pIndexPatternWithValues == "") return null; @@ -1000,7 +1007,7 @@ _DuplicateScannerUtils._setResultFields = function(pIndexQuery, pResultFields) } // } - //logging.log("resultFields -> " + resultFields); + ////logging.log("resultFields -> " + resultFields); return pIndexQuery; } @@ -1040,7 +1047,7 @@ _DuplicateScannerUtils._buildFilterPatternConfig = function(pEntityFieldValueRay if(pEntityFieldValueRays[i][INDEX_CONFIG_USE_FOR_SEARCH] == 1) { - logging.log("Nutze fuzzy search -> "); + //logging.log("Nutze fuzzy search -> "); filterTerm = filterTerm.setFuzzySearchFactor(2); } @@ -1303,7 +1310,7 @@ _DuplicateScannerUtils._insertValuesInFilterTemplate = function(pJsonRootNode, p if(fieldValue == null) { - //logging.log("Duplicate Scan: Requested value for field " + fieldName + " not present in the provided valueslist"); + ////logging.log("Duplicate Scan: Requested value for field " + fieldName + " not present in the provided valueslist"); continue; }