Skip to content
Snippets Groups Projects
Commit ca57d1a0 authored by David Büchler's avatar David Büchler
Browse files

The Cache rebuilding has been changed. The values to check for duplicates are...

The Cache rebuilding has been changed. The values to check for duplicates are now loaded using the entities api instead of pure sql. If the cache for all duplicates of a scanner gets rebuilt, it's now done in blocks. This ensures that even while wokring on huge recordsets, the ressources are enough. It sure may take some time to check huge recordsets...
The flag "Use external Webservice" has been removed from the view because no native api to a duplicate scanning service exists, therefore this is unnecessary right now. The config of the resultfields has been removed aswell, if no external service is used, it's enough to work with the id which is always in the index's response.
The configuration of the indexfields has been changed, no manual connection between entity and db field is necessary because no sql is used any more. Instead, the fields can be selected via dropdown which offers the entity fields to select.
Some refactoring
Changes on the docks to reflect the changed datastructure of some arrays
parent 85414b4d
No related branches found
No related tags found
No related merge requests found
Showing
with 211 additions and 81 deletions
......@@ -14,6 +14,7 @@
<entityField>
<name>ENTITY_FIELD_NAME</name>
<title>Entity field name</title>
<dropDownProcess>%aditoprj%/entity/DuplicateScannerIndexConfig_entity/entityfields/entity_field_name/dropDownProcess.js</dropDownProcess>
</entityField>
<entityProvider>
<name>ScannerIndexConfigProvider</name>
......@@ -66,6 +67,11 @@
<contentType>BOOLEAN</contentType>
<valueProcess>%aditoprj%/entity/DuplicateScannerIndexConfig_entity/entityfields/is_id_field/valueProcess.js</valueProcess>
</entityField>
<entityParameter>
<name>ScannerEntity_param</name>
<expose v="true" />
<mandatory v="true" />
</entityParameter>
</entityFields>
<recordContainers>
<dbRecordContainer>
......
import("system.result");
import("system.vars");
import("system.project");
let currentEntity = vars.get("$param.ScannerEntity_param");
var model = project.getEntityStructure(currentEntity);
let entityFields = [];
for (fieldname in model.fields)
{
field = model.fields[fieldname];
if(field.fieldType == project.ENTITYFIELDTYPE_FIELD)
{
entityFields.push([field.name, field.name]);
}
// logging.log(" Title: " + field.title);
}
result.object(entityFields);
\ No newline at end of file
......@@ -85,6 +85,10 @@
<name>DuplicateScannerId_param</name>
<valueProcess>%aditoprj%/entity/DuplicateScanner_entity/entityfields/scannerindexconfigs_consumer/children/duplicatescannerid_param/valueProcess.js</valueProcess>
</entityParameter>
<entityParameter>
<name>ScannerEntity_param</name>
<valueProcess>%aditoprj%/entity/DuplicateScanner_entity/entityfields/scannerindexconfigs_consumer/children/scannerentity_param/valueProcess.js</valueProcess>
</entityParameter>
</children>
</entityConsumer>
<entityConsumer>
......
......@@ -10,40 +10,11 @@ let resultFields = DuplicateScannerUtils.LoadResultFields(filterName, targetEnti
logging.log("duplicateFieldsConfig -> " + duplicateFieldsConfig);
logging.log("resultFields -> " + resultFields);
let querySelectFields = DuplicateScannerUtils.BuildSqlSelectFieldsFromFieldConfig(duplicateFieldsConfig);
logging.log("querySelectFields -> " + querySelectFields);
let queryPersonFieldData = "select " + querySelectFields + " from CONTACT"
+ " join PERSON on PERSONID = PERSON_ID"
+ " left join ADDRESS on ADDRESS.CONTACT_ID = CONTACT.CONTACTID";
//+ " join COMMUNICATION on COMMUNICATION.CONTACT_ID = CONTACT.CONTACTID";
logging.log("Löschen von PERSON Dubletten -> ");
DuplicateScannerUtils.DeleteDuplicateClustersByTargetEntity(targetEntity);
let formatToJsonAndCallWsCallback = function(pPossibleDuplicatesRay)
{
logging.log("inside callback -> ");
let indexResultFields = DuplicateScannerUtils.TranslateEntityToIndexFields(targetEntity, resultFields)
//Run thru every duplicate result an read out the resultfields
for (let i = 0; i < pPossibleDuplicatesRay.length; i++)
{
for (let b = 0; b < resultFields.length; b++)
{
let entityFieldName = resultFields[b];
let indexFieldName = indexResultFields[entityFieldName];
//logging.log("Entity Field -> "+ pPossibleDuplicatesRay[i][indexFieldName]);
//format values
}
}
//call webservice
//reformat results to same structure as before
return pPossibleDuplicatesRay;
};
logging.log("Neu berechnen von PERSON Dubletten -> ");
DuplicateScannerUtils.RebuildDuplicatesCache(filterName, targetEntity, queryPersonFieldData,
duplicateFieldsConfig, resultFields, formatToJsonAndCallWsCallback);
DuplicateScannerUtils.RebuildDuplicatesCache(filterName, targetEntity, 5,
duplicateFieldsConfig, resultFields, null);
DuplicateScannerUtils.RefreshUnrelatedDuplicateRelations(targetEntity);
\ No newline at end of file
import("system.vars");
import("system.result");
result.string(vars.get("$field.ENTITY_TO_SCAN_NAME"));
\ No newline at end of file
import("system.entities");
import("system.project");
import("system.indexsearch");
import("system.question");
import("system.logging");
......@@ -274,4 +276,78 @@ import("JditoFilter_lib");
//
////The result values can be accessed as seen above in "formatToJsonAndCallWsCallback"
//DuplicateScannerUtils.ScanForDuplicates(filterName, targetEntity, entityFieldValuesRay, resultFields,
//idField, idValue, formatToJsonAndCallWsCallback);
\ No newline at end of file
//idField, idValue, formatToJsonAndCallWsCallback);
//let entityStructure = project.getEntityStructure("Person_entity");
//logging.log("entityStructure -> " + entityStructure);
//logging.log("entityStructure.fields -> " + entityStructure.fields);
//for (fieldname in model.fields)
//{
// field = model.fields[fieldname];
//}
//##############################################################################
//var model = project.getEntityStructure("Person_entity");
//logging.log("Name: " + model.name);
//logging.log("Title: " + model.title);
//logging.log("Description: " + model.description);
//logging.log("UsePermissions: " + model.usePermissions);
//for (fieldname in model.fields)
//{
// field = model.fields[fieldname];
// logging.log(" Name: " + field.name);
// logging.log(" Type: " + field.fieldType);
// logging.log(" Title: " + field.title);
// logging.log(" Description: " + field.description);
// logging.log(" UsePermissions: " + field.usePermissions);
//}
//##############################################################################
var targetEntity = "Person_entity";
filterName = "PersonDuplicates";
let pRecordsBlockSize = 5;
let duplicateFieldsConfig = DuplicateScannerUtils.LoadIndexFieldsConfiguration(filterName, targetEntity);
let entityFieldsToLoad = [];
for (field in duplicateFieldsConfig)
{
entityFieldsToLoad.push(duplicateFieldsConfig[field][0]);
}
logging.log("entityFieldsToLoad -> " + entityFieldsToLoad);
let loadInitialRecordsConfig = entities.createConfigForLoadingRows()
.entity(targetEntity)
.fields(entityFieldsToLoad)
.count(pRecordsBlockSize);
let targetRecords = entities.getRows(loadInitialRecordsConfig);
logging.log("Initialer Block geladen targetRecords-> " + JSON.stringify(targetRecords));
let currentRecordIndex = pRecordsBlockSize;
while(targetRecords.length > 0)
{
//scan for duplicates
if(targetRecords.length < pRecordsBlockSize)
{
logging.log("weniger records geladen als möglich => Ende der verfügbaren Records -> Abbrechen");
break;
}
let loadNextRows = entities.createConfigForLoadingRows()
.entity(targetEntity)
.fields(entityFieldsToLoad)
.count(pRecordsBlockSize)
.startrow(currentRecordIndex);
logging.log("Nächster Block wird geladen mit startRow -> " + currentRecordIndex);
currentRecordIndex += pRecordsBlockSize;
targetRecords = entities.getRows(loadNextRows);
logging.log("Nächster Block geladen mit targetRecords -> " + JSON.stringify(targetRecords));
}
\ No newline at end of file
......@@ -57,7 +57,6 @@ else
+ " join CONTACT on CONTACT.CONTACTID = DUPLICATEID"
+ " join ORGANISATION on ORGANISATION.ORGANISATIONID = CONTACT.CONTACTID", "1=2", " ORDER BY CLUSTERID");
}
let duplicateInfos = db.table(duplicateInfosQuery);
let MAX_SHOW_CLUSTER_RECORDS = 4;
......
......@@ -16,11 +16,7 @@
<devices />
<columns>
<neonTableColumn>
<name>bc39c449-d9d6-4049-902e-24740d7a951f</name>
<entityField>DB_FIELD_NAME</entityField>
</neonTableColumn>
<neonTableColumn>
<name>d6119b22-69b6-4b3c-8e83-99c000586a67</name>
<name>b32d6711-17bc-4783-841d-b4adc1d35432</name>
<entityField>ENTITY_FIELD_NAME</entityField>
</neonTableColumn>
<neonTableColumn>
......
......@@ -297,34 +297,70 @@ DuplicateScannerUtils.GetCachedDuplicatesForClusterId = function(pClusterId)
* DuplicateScannerUtils.RefreshUnrelatedDuplicateRelations(targetEntity);
*/
DuplicateScannerUtils.RebuildDuplicatesCache = function(pFilterName, pTargetEntity,
pQueryTargetRecords, pDuplicateFieldsConfig, pResultFields, pFormatValuesConsumeWebserviceCallback)
pRecordsBlockSize, pDuplicateFieldsConfig, pResultFields, pFormatValuesConsumeWebserviceCallback)
{
logging.log("in RebuildDuplicatesCache -> ");
let alreadyIdentifiedIds = [];
let targetRecordsData = db.table(pQueryTargetRecords);
let useExternalWebservice = _DuplicateScannerUtils._isUseExternalWebservice(pFilterName, pTargetEntity);
logging.log("useExternalWebservice -> " + useExternalWebservice);
logging.log("targetRecordsData -> " + JSON.stringify(targetRecordsData));
let alreadyIdentifiedIds = [];
let entityFieldsToLoad = [];
for (field in duplicateFieldsConfig)
{
entityFieldsToLoad.push(duplicateFieldsConfig[field][0]);
}
logging.log("entityFieldsToLoad -> " + entityFieldsToLoad);
let targetRecords = DuplicateScannerUtils.LoadEntityRecords(pTargetEntity, entityFieldsToLoad, 0, pRecordsBlockSize);
logging.log("Initialer Block geladen targetRecords-> " + JSON.stringify(targetRecords));
let currentRecordIndex = pRecordsBlockSize;
while(targetRecords.length > 0)
{
foundDuplicateIds = DuplicateScannerUtils.ScanRecords(pFilterName, pTargetEntity, targetRecords,
pDuplicateFieldsConfig, pResultFields, useExternalWebservice,
pFormatValuesConsumeWebserviceCallback, alreadyIdentifiedIds);
logging.log("gefundene ids zum ignorieren foundDuplicateIds -> " + foundDuplicateIds);
alreadyIdentifiedIds = alreadyIdentifiedIds.concat(foundDuplicateIds);
logging.log("Gesamte ignorierListe -> " + alreadyIdentifiedIds);
if(targetRecords.length < pRecordsBlockSize)
{
logging.log("weniger records geladen als möglich => Ende der verfügbaren Records -> Abbrechen");
break;
}
logging.log("Nächster Block wird geladen mit startRow -> " + currentRecordIndex);
targetRecords = DuplicateScannerUtils.LoadEntityRecords(pTargetEntity, entityFieldsToLoad,
currentRecordIndex, pRecordsBlockSize);
currentRecordIndex += pRecordsBlockSize;
logging.log("Nächster Block geladen mit targetRecords -> " + JSON.stringify(targetRecords));
}
}
DuplicateScannerUtils.ScanRecords = function(pFilterName, pTargetEntity, pTargetRecordsData,
pDuplicateFieldsConfig, pResultFields, pUseExternalWebservice, pFormatValuesConsumeWebserviceCallback, pAlreadyIdentifiedIds)
{
logging.log("in ScanRecords -> ");
let foundDuplicateIds = [];
//If the contact id loading query has no results, stop.
//No ids should be deleted if an error has been made in this query.
if(targetRecordsData.length <= 0)
if(pTargetRecordsData.length <= 0)
return;
//First it gets checked if the current id has already been identified. If that's the case it'll continue with the next.
//Otherwise an object gets build in the form of ["FilterFieldName" = "FilterFieldValueFromQuery"] with which a scan for possible duplicates get's started
logging.log("configured pResultFields -> " + pResultFields);
var duplicatesToInsertQueries = [];
for (b = 0; b < targetRecordsData.length; b++)
for (b = 0; b < pTargetRecordsData.length; b++)
{
logging.log("Nächster Datensatz in RebuildDuplicatesCache -> " + b);
// logging.log("Neuer Record -> " + targetRecordsData[b]);
// logging.log("Neuer Record -> " + pTargetRecordsData[b]);
//If the current Id has already been identified, continue
if(alreadyIdentifiedIds.indexOf(targetRecordsData[b][0]) > -1)
continue;
let entityFieldValuesRay = DuplicateScannerUtils.BuildEntityFieldNameValueRays(pDuplicateFieldsConfig, targetRecordsData[b]);
logging.log("pTargetRecordsData[b] -> " + JSON.stringify(pTargetRecordsData[b]));
let entityFieldValuesRay = DuplicateScannerUtils.BuildEntityFieldNameValueRays(pDuplicateFieldsConfig, pTargetRecordsData[b]);
logging.log("Werte des Datensatzes entityFieldValuesRay -> " + JSON.stringify(entityFieldValuesRay));
//The first field in this Array must always be the configured id field. This is ensured using onValidation-logic
......@@ -332,9 +368,16 @@ pQueryTargetRecords, pDuplicateFieldsConfig, pResultFields, pFormatValuesConsume
let idValue = entityFieldValuesRay[0][1];
// logging.log("idField -> " + idField);
// logging.log("idValue -> " + idValue);
logging.log("pTargetRecordsData[b][idField] -> " + pTargetRecordsData[b][idField]);
//If the current Id has already been identified, continue
if(pAlreadyIdentifiedIds.indexOf(pTargetRecordsData[b][idField]) > -1)
{
logging.log("Id schon behandelt, continue; -> ");
continue;
}
let foundDuplicates = _DuplicateScannerUtils._scanForDuplicates(pFilterName, pTargetEntity,
entityFieldValuesRay, pResultFields, idField, idValue, pFormatValuesConsumeWebserviceCallback, useExternalWebservice)
entityFieldValuesRay, pResultFields, idField, idValue, pFormatValuesConsumeWebserviceCallback, pUseExternalWebservice)
if(foundDuplicates == null || foundDuplicates.length == 0)
{
......@@ -344,7 +387,7 @@ pQueryTargetRecords, pDuplicateFieldsConfig, pResultFields, pFormatValuesConsume
logging.log("foundDuplicates.length nach _scanForDuplicates -> " + foundDuplicates.length);
//Insert all found duplicate ids into an cache array because those ids don't have to be checked again lateron.
let foundDuplicateIds = [];
for (let i = 0; i < foundDuplicates.length; i++)
{
let localId = foundDuplicates[i][indexsearch.FIELD_ID];
......@@ -352,10 +395,10 @@ pQueryTargetRecords, pDuplicateFieldsConfig, pResultFields, pFormatValuesConsume
foundDuplicateIds.push(localId);
}
alreadyIdentifiedIds = alreadyIdentifiedIds.concat(foundDuplicateIds);
pAlreadyIdentifiedIds = pAlreadyIdentifiedIds.concat(foundDuplicateIds);
//The duplicates list contains only the found duplicates to the original id, therefore it get's added manually
foundDuplicateIds.push(targetRecordsData[b][0]);
foundDuplicateIds.push(pTargetRecordsData[b][idField]);
logging.log("foundDuplicates -> " + JSON.stringify(foundDuplicates));
logging.log("foundDuplicateIds -> " + JSON.stringify(foundDuplicateIds));
......@@ -363,9 +406,23 @@ pQueryTargetRecords, pDuplicateFieldsConfig, pResultFields, pFormatValuesConsume
duplicatesToInsertQueries = duplicatesToInsertQueries.concat(insertQueriesRay);
}
logging.log("duplicatesToInsertQueries -> " + JSON.stringify(duplicatesToInsertQueries));
return db.inserts(duplicatesToInsertQueries);
db.inserts(duplicatesToInsertQueries);
return foundDuplicateIds;
}
DuplicateScannerUtils.LoadEntityRecords = function(pTargetEntity, pEntityFields, pStartRow, pCountRecordsToLoad)
{
logging.log("pTargetEntity -> " + pTargetEntity);
logging.log("pEntityFields -> " + pEntityFields);
logging.log("pCountRecordsToLoad -> " + pCountRecordsToLoad);
logging.log("pStartRow -> " + pStartRow);
let getRowsConfig = entities.createConfigForLoadingRows()
.entity(pTargetEntity)
.fields(pEntityFields)
.count(pCountRecordsToLoad)
.startrow(pStartRow);
return entities.getRows(getRowsConfig)
}
/*
* Loads the configured resultfields as array
*
......@@ -387,9 +444,9 @@ DuplicateScannerUtils.LoadResultFields = function(pFilterName, pTargetEntity)
/*
* Loads the fields and their configuration.
* One field record is in the following format:
* [DB_FELD, ENTITY_FELD, IS_ID, USE_FOR_SEARCH]
* [ENTITY_FELD, IS_ID, USE_FOR_SEARCH]
* Example:
* ["FIRSTNAME", "FIRSTNAME", false, true]
* ["FIRSTNAME", false, true]
*
* @param {String} pFilterName Name of the filter
* @param {String} pTargetEntity Entity which has been configured
......@@ -400,7 +457,7 @@ DuplicateScannerUtils.LoadIndexFieldsConfiguration = function(pFilterName, pTarg
let duplicateIndexFieldConfigurations = SqlCondition.begin()
.andPrepare("DUPLICATESCANNER.FILTER_NAME", pFilterName)
.andPrepare("DUPLICATESCANNER.ENTITY_TO_SCAN_NAME", pTargetEntity)
.buildSql("select dsic.DB_FIELD_NAME, dsic.ENTITY_FIELD_NAME, dsic.IS_ID_FIELD, dsic.USE_FOR_INDEX_DUPLICATE_SEARCH from DUPLICATESCANNERINDEXCONFIG dsic join DUPLICATESCANNER on DUPLICATESCANNER.ID = dsic.DUPLICATESCANNER_ID"
.buildSql("select dsic.ENTITY_FIELD_NAME, dsic.IS_ID_FIELD, dsic.USE_FOR_INDEX_DUPLICATE_SEARCH from DUPLICATESCANNERINDEXCONFIG dsic join DUPLICATESCANNER on DUPLICATESCANNER.ID = dsic.DUPLICATESCANNER_ID"
, "1=2", "order by dsic.IS_ID_FIELD desc");
return db.table(duplicateIndexFieldConfigurations);
......@@ -616,13 +673,13 @@ DuplicateScannerUtils.MergeOrganisation = function(pSourceContactId, pTargetCont
/*
* Creates an array of arrays containing the entity field name paired with it's value.
*
* @param {[]} pDuplicateFieldsConfig An Array with the configured fields in the form of [DB_FIELD, ENTITY_FIELD, IS_ID, USE_FOR_SEARCH]. @see LoadDuplicateIndexFieldsConfiguration()
* @param {[]} pDuplicateFieldsConfig An Array with the configured fields in the form of [ENTITY_FIELD, IS_ID, USE_FOR_SEARCH]. @see LoadDuplicateIndexFieldsConfiguration()
* @param {[]} pTargetRecordData One record containing the values for the configured fields. Has to be in the same order as the fields in the first parameter
* @return {[[]]} An array of arrays containing the entity field name an its value. [["CONTACTID", "d786045c-8b21-4f22-b6d9-72be9f61c04d"]]
* @example
* pDuplicateFieldsConfig
* ["CONTACTID", "CONTACTID", true, false]
* ["FIRSTNAME", "FIRSTNAME", false, true]
* ["CONTACTID", true, false]
* ["FIRSTNAME", false, true]
*
* pTargetRecordData
* ["d786045c-8b21-4f22-b6d9-72be9f61c04d", "PETER"]
......@@ -631,8 +688,8 @@ DuplicateScannerUtils.MergeOrganisation = function(pSourceContactId, pTargetCont
*/
DuplicateScannerUtils.BuildEntityFieldNameValueRays = function(pDuplicateFieldsConfig, pTargetRecordData)
{
let INDEX_CONFIG_ENTITY_FIELD = 1;
let INDEX_CONFIG_USE_FOR_SEARCH = 3;
let INDEX_CONFIG_ENTITY_FIELD = 0;
let INDEX_CONFIG_USE_FOR_SEARCH = 2;
let entityFieldValuesRay = [];
/*
* Based on the parameterized filter field names and the values loaded via the query,
......@@ -643,8 +700,8 @@ DuplicateScannerUtils.BuildEntityFieldNameValueRays = function(pDuplicateFieldsC
{
// logging.log("pDuplicateFieldsConfig[a][1] -> " + pDuplicateFieldsConfig[a][INDEX_CONFIG_ENTITY_FIELD]);
// logging.log(" pTargetRecordData[a] -> " + pTargetRecordData[a]);
entityFieldValuesRay.push([pDuplicateFieldsConfig[a][INDEX_CONFIG_ENTITY_FIELD], pTargetRecordData[a], pDuplicateFieldsConfig[a][INDEX_CONFIG_USE_FOR_SEARCH]])
let entityField = pDuplicateFieldsConfig[a][INDEX_CONFIG_ENTITY_FIELD];
entityFieldValuesRay.push([entityField, pTargetRecordData[entityField], pDuplicateFieldsConfig[a][INDEX_CONFIG_USE_FOR_SEARCH]])
}
return entityFieldValuesRay;
}
......@@ -790,20 +847,20 @@ _DuplicateScannerUtils._applyPreFilter = function(pTargetEntity, pFilterCountCha
[], 1);//todo use again after this has been fixed!! insert the local id after fix
logging.log("searchResults hits length -> " + searchResult[indexsearch.HITS].length);
if(searchResult[indexsearch.TOTALHITS] < 80)//todo entfernen?!
{
for (let i = 0; i < searchResult[indexsearch.HITS].length; i++)
{
logging.log("Treffer Nr -> " + i);
//searchResults hits 0 -> {#ADITO_SEARCH_ID=1868bd3a-05af-4b7f-a633-e3aec50ac45c, _index_group_=Person, #ADITO_SEARCH_TYPE=Person, firstname_value=Peter, _local_id_=1868bd3a-05af-4b7f-a633-e3aec50ac45c}
let localId = searchResult[indexsearch.HITS][i]["_local_id_"];
let firstname = searchResult[indexsearch.HITS][i]["firstname_value"];
let indexGroup = searchResult[indexsearch.HITS][i]["_index_group_"];
logging.log("localId -> " + localId);
logging.log("firstname -> " + firstname);
logging.log("indexGroup -> " + indexGroup);
}
}
// if(searchResult[indexsearch.TOTALHITS] < 80)//todo entfernen?!
// {
// for (let i = 0; i < searchResult[indexsearch.HITS].length; i++)
// {
// logging.log("Treffer Nr -> " + i);
// //searchResults hits 0 -> {#ADITO_SEARCH_ID=1868bd3a-05af-4b7f-a633-e3aec50ac45c, _index_group_=Person, #ADITO_SEARCH_TYPE=Person, firstname_value=Peter, _local_id_=1868bd3a-05af-4b7f-a633-e3aec50ac45c}
// let localId = searchResult[indexsearch.HITS][i]["_local_id_"];
// let firstname = searchResult[indexsearch.HITS][i]["firstname_value"];
// let indexGroup = searchResult[indexsearch.HITS][i]["_index_group_"];
// logging.log("localId -> " + localId);
// logging.log("firstname -> " + firstname);
// logging.log("indexGroup -> " + indexGroup);
// }
// }
let totalHits = searchResult[indexsearch.TOTALHITS]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment