From e677b8d4cf43484359868c448136f6685f89851d Mon Sep 17 00:00:00 2001 From: "d.buechler" <d.buechler@adito.de> Date: Wed, 30 Oct 2019 10:07:52 +0100 Subject: [PATCH] Removed the block size configuration from the client. Default value is 5000. All scanners now use the same value, it can be changed via designer custom setting in the preferences 2 fields in the person entity have been added, now the firstname and the lastname fields exist as phonetic and text_no_stopwords Refactorings --- .../Duplicates/create_duplicateScanner.xml | 3 --- .../DuplicateScanner_entity.aod | 8 ------ .../onActionProcess.js | 11 +++----- .../onActionProcess.js | 14 +++++----- entity/Person_entity/Person_entity.aod | 26 ++++++++++++++++-- .../indexphoneticfirstname/valueProcess.js | 4 +++ .../indexphoneticlastname/valueProcess.js | 4 +++ .../recordcontainers/index/query.js | 4 ++- .../DuplicateScannerEdit_view.aod | 4 --- .../_____PREFERENCES_PROJECT.aod | 4 +++ process/DuplicateScanner_lib/process.js | 27 ++++++++++++++----- .../process.js | 2 +- .../process.js | 2 +- 13 files changed, 72 insertions(+), 41 deletions(-) create mode 100644 entity/Person_entity/entityfields/indexphoneticfirstname/valueProcess.js create mode 100644 entity/Person_entity/entityfields/indexphoneticlastname/valueProcess.js diff --git a/.liquibase/Data_alias/basic/2019.2.1/Duplicates/create_duplicateScanner.xml b/.liquibase/Data_alias/basic/2019.2.1/Duplicates/create_duplicateScanner.xml index 19d8c86619..fc7be7e82c 100644 --- a/.liquibase/Data_alias/basic/2019.2.1/Duplicates/create_duplicateScanner.xml +++ b/.liquibase/Data_alias/basic/2019.2.1/Duplicates/create_duplicateScanner.xml @@ -14,9 +14,6 @@ <column name="EXTERNAL_SERVICE_USAGE_ALLOWED" type="INTEGER" > <constraints nullable="false"/> </column> - <column name="BLOCK_SIZE" type="INTEGER"> - <constraints nullable="false"/> - </column> <column name="USER_NEW" type="NVARCHAR(50)"> <constraints nullable="false"/> </column> diff --git a/entity/DuplicateScanner_entity/DuplicateScanner_entity.aod b/entity/DuplicateScanner_entity/DuplicateScanner_entity.aod index 6091a9846a..7d29b2f6db 100644 --- a/entity/DuplicateScanner_entity/DuplicateScanner_entity.aod +++ b/entity/DuplicateScanner_entity/DuplicateScanner_entity.aod @@ -120,10 +120,6 @@ </entityActionField> </children> </entityActionGroup> - <entityField> - <name>BLOCK_SIZE</name> - <title>Data block size</title> - </entityField> </entityFields> <recordContainers> <dbRecordContainer> @@ -172,10 +168,6 @@ <name>DATE_EDIT.value</name> <recordfield>DUPLICATESCANNER.DATE_EDIT</recordfield> </dbRecordFieldMapping> - <dbRecordFieldMapping> - <name>BLOCK_SIZE.value</name> - <recordfield>DUPLICATESCANNER.BLOCK_SIZE</recordfield> - </dbRecordFieldMapping> </recordFieldMappings> </dbRecordContainer> </recordContainers> diff --git a/entity/DuplicateScanner_entity/entityfields/runactiongroup/children/rebuildorganisationduplicatescache/onActionProcess.js b/entity/DuplicateScanner_entity/entityfields/runactiongroup/children/rebuildorganisationduplicatescache/onActionProcess.js index 0b3d4905f1..f6a1827f05 100644 --- a/entity/DuplicateScanner_entity/entityfields/runactiongroup/children/rebuildorganisationduplicatescache/onActionProcess.js +++ b/entity/DuplicateScanner_entity/entityfields/runactiongroup/children/rebuildorganisationduplicatescache/onActionProcess.js @@ -3,21 +3,18 @@ import("DuplicateScanner_lib"); var filterName = "OrganisationDuplicates"; var targetEntity = "Organisation_entity"; -var recordBlockSize = 500; +var recordBlockSize = DuplicateScannerUtils.GetBlockSize(); let duplicateFieldsConfig = DuplicateScannerUtils.LoadIndexFieldsConfiguration(filterName, targetEntity); let resultFields = DuplicateScannerUtils.LoadResultFields(filterName, targetEntity); -logging.log(filterName + ": duplicateFieldsConfig -> " + duplicateFieldsConfig); -logging.log(filterName + ": resultFields -> " + resultFields); - -logging.log(filterName + ": Löschen von ORGANISATION Dubletten -> "); +logging.log(filterName + ": Delete duplicates -> "); DuplicateScannerUtils.DeleteDuplicateClustersByTargetEntity(targetEntity); -logging.log(filterName + ": Neu berechnen von ORGANISATION Dubletten -> "); +logging.log(filterName + ": Recalculate duplicates -> "); DuplicateScannerUtils.RebuildDuplicatesCache(filterName, targetEntity, recordBlockSize, duplicateFieldsConfig, resultFields, null); -logging.log(filterName + ": Refresh Unrelated Duplicates von ORGANISATION Dubletten -> "); +logging.log(filterName + ": Refresh unrelated duplicates -> "); DuplicateScannerUtils.RefreshUnrelatedDuplicateRelations(targetEntity); logging.log(filterName + ": Done rebuilding "); \ No newline at end of file diff --git a/entity/DuplicateScanner_entity/entityfields/runactiongroup/children/rebuildpersonduplicatescache/onActionProcess.js b/entity/DuplicateScanner_entity/entityfields/runactiongroup/children/rebuildpersonduplicatescache/onActionProcess.js index 6189ff788b..29ce7c3af2 100644 --- a/entity/DuplicateScanner_entity/entityfields/runactiongroup/children/rebuildpersonduplicatescache/onActionProcess.js +++ b/entity/DuplicateScanner_entity/entityfields/runactiongroup/children/rebuildpersonduplicatescache/onActionProcess.js @@ -1,23 +1,23 @@ +import("system.project"); import("system.logging"); import("DuplicateScanner_lib"); var filterName = "PersonDuplicates"; var targetEntity = "Person_entity"; -var recordBlockSize = 500; +var recordBlockSize = DuplicateScannerUtils.GetBlockSize(); +logging.log("recordBlockSize -> " + recordBlockSize); + let duplicateFieldsConfig = DuplicateScannerUtils.LoadIndexFieldsConfiguration(filterName, targetEntity); let resultFields = DuplicateScannerUtils.LoadResultFields(filterName, targetEntity); -logging.log(filterName + ": duplicateFieldsConfig -> " + duplicateFieldsConfig); -logging.log(filterName + ": resultFields -> " + resultFields); - -logging.log(filterName + ": Löschen von PERSON Dubletten -> "); +logging.log(filterName + ": Delete duplicates -> "); DuplicateScannerUtils.DeleteDuplicateClustersByTargetEntity(targetEntity); -logging.log("Neu berechnen von PERSON Dubletten -> "); +logging.log(filterName + ": Recalculate duplicates -> "); DuplicateScannerUtils.RebuildDuplicatesCache(filterName, targetEntity, recordBlockSize, duplicateFieldsConfig, resultFields, null); -logging.log(filterName + ": Refresh Unrelated Duplicates von ORGANISATION Dubletten -> "); +logging.log(filterName + ": Refresh unrelated duplicates -> "); DuplicateScannerUtils.RefreshUnrelatedDuplicateRelations(targetEntity); logging.log(filterName + ": Done rebuilding "); \ No newline at end of file diff --git a/entity/Person_entity/Person_entity.aod b/entity/Person_entity/Person_entity.aod index 8ddf9367f4..c8c9b4469f 100644 --- a/entity/Person_entity/Person_entity.aod +++ b/entity/Person_entity/Person_entity.aod @@ -1115,6 +1115,14 @@ Usually this is used for filtering COMMUNICATION-entries by a specified contact <name>OnlyOwnSupervised_param</name> <expose v="true" /> </entityParameter> + <entityField> + <name>IndexPhoneticLastname</name> + <valueProcess>%aditoprj%/entity/Person_entity/entityfields/indexphoneticlastname/valueProcess.js</valueProcess> + </entityField> + <entityField> + <name>IndexPhoneticFirstname</name> + <valueProcess>%aditoprj%/entity/Person_entity/entityfields/indexphoneticfirstname/valueProcess.js</valueProcess> + </entityField> </entityFields> <recordContainers> <dbRecordContainer> @@ -1388,12 +1396,12 @@ Usually this is used for filtering COMMUNICATION-entries by a specified contact <indexFieldMappings> <indexRecordFieldMapping> <name>FIRSTNAME.value</name> - <indexFieldType>PHONETIC_NAME</indexFieldType> + <indexFieldType>TEXT_NO_STOPWORDS</indexFieldType> <isBoosted v="true" /> </indexRecordFieldMapping> <indexRecordFieldMapping> <name>LASTNAME.value</name> - <indexFieldType>PHONETIC_NAME</indexFieldType> + <indexFieldType>TEXT_NO_STOPWORDS</indexFieldType> <isBoosted v="true" /> </indexRecordFieldMapping> <indexRecordFieldMapping> @@ -1464,6 +1472,20 @@ Usually this is used for filtering COMMUNICATION-entries by a specified contact </additionalFieldNameAliases> <isMultiValued v="true" /> </indexRecordFieldMapping> + <indexRecordFieldMapping> + <name>IndexPhoneticFirstname.value</name> + <indexFieldType>PHONETIC_NAME</indexFieldType> + <additionalFieldNameAliases> + <element>firstname</element> + </additionalFieldNameAliases> + </indexRecordFieldMapping> + <indexRecordFieldMapping> + <name>IndexPhoneticLastname.value</name> + <indexFieldType>PHONETIC_NAME</indexFieldType> + <additionalFieldNameAliases> + <element>lastname</element> + </additionalFieldNameAliases> + </indexRecordFieldMapping> </indexFieldMappings> </indexRecordContainer> </recordContainers> diff --git a/entity/Person_entity/entityfields/indexphoneticfirstname/valueProcess.js b/entity/Person_entity/entityfields/indexphoneticfirstname/valueProcess.js new file mode 100644 index 0000000000..f042d9baa7 --- /dev/null +++ b/entity/Person_entity/entityfields/indexphoneticfirstname/valueProcess.js @@ -0,0 +1,4 @@ +import("system.result"); +import("system.vars"); + +result.string(vars.get("$field.FIRSTNAME")); \ No newline at end of file diff --git a/entity/Person_entity/entityfields/indexphoneticlastname/valueProcess.js b/entity/Person_entity/entityfields/indexphoneticlastname/valueProcess.js new file mode 100644 index 0000000000..e921110334 --- /dev/null +++ b/entity/Person_entity/entityfields/indexphoneticlastname/valueProcess.js @@ -0,0 +1,4 @@ +import("system.result"); +import("system.vars"); + +result.string(vars.get("$field.LASTNAME")); \ No newline at end of file diff --git a/entity/Person_entity/recordcontainers/index/query.js b/entity/Person_entity/recordcontainers/index/query.js index b4e527cc2d..6798290c2e 100644 --- a/entity/Person_entity/recordcontainers/index/query.js +++ b/entity/Person_entity/recordcontainers/index/query.js @@ -25,8 +25,10 @@ sqlQuery = 'select CONTACT.CONTACTID as "_uid_" ' + ", PERSON.PERSONID " + ", PERSON.FIRSTNAME " + ", PERSON.LASTNAME " + + ", PERSON.FIRSTNAME as IndexPhoneticFirstname " + + ", PERSON.LASTNAME as IndexPhoneticLastname " + ", PERSON.SALUTATION " - + ", PERSON.GENDER " + + ", TRIM(PERSON.GENDER) as GENDER " + ', PERSON.TITLE as "TITLE.value"' + ", CONTACT.CONTACTID " + ", CONTACT.ORGANISATION_ID " diff --git a/neonView/DuplicateScannerEdit_view/DuplicateScannerEdit_view.aod b/neonView/DuplicateScannerEdit_view/DuplicateScannerEdit_view.aod index ce0410db5b..73c01d8d54 100644 --- a/neonView/DuplicateScannerEdit_view/DuplicateScannerEdit_view.aod +++ b/neonView/DuplicateScannerEdit_view/DuplicateScannerEdit_view.aod @@ -22,10 +22,6 @@ <name>c021711c-9f3e-454d-964c-1339e3463329</name> <entityField>ENTITY_TO_SCAN_NAME</entityField> </entityFieldLink> - <entityFieldLink> - <name>bc526338-19bb-4587-8321-7bd53c267c6d</name> - <entityField>BLOCK_SIZE</entityField> - </entityFieldLink> </fields> </genericViewTemplate> <neonViewReference> diff --git a/preferences/_____PREFERENCES_PROJECT/_____PREFERENCES_PROJECT.aod b/preferences/_____PREFERENCES_PROJECT/_____PREFERENCES_PROJECT.aod index 97d496bccc..46a24a9548 100644 --- a/preferences/_____PREFERENCES_PROJECT/_____PREFERENCES_PROJECT.aod +++ b/preferences/_____PREFERENCES_PROJECT/_____PREFERENCES_PROJECT.aod @@ -106,5 +106,9 @@ <name>nominatim.pw</name> <property>useradito</property> </customStringProperty> + <customIntegerProperty> + <name>duplicates.dataBlockSize</name> + <property v="5000" /> + </customIntegerProperty> </customProperties> </preferences> diff --git a/process/DuplicateScanner_lib/process.js b/process/DuplicateScanner_lib/process.js index 411ed083c5..971cd1f9a4 100644 --- a/process/DuplicateScanner_lib/process.js +++ b/process/DuplicateScanner_lib/process.js @@ -1,3 +1,4 @@ +import("system.project"); import("ActivityTask_lib"); import("KeywordRegistry_basic"); import("system.translate"); @@ -262,7 +263,7 @@ DuplicateScannerUtils.GetCachedDuplicatesForClusterId = function(pClusterId) * @example * var filterName = "PersonDuplicates"; * var targetEntity = "Person_entity"; - * var recordBlockSize = 5; + * var recordBlockSize = DuplicateScannerUtils.GetBlockSize(); * * let resultFields = DuplicateScannerUtils.LoadResultFields(filterName, targetEntity); * @@ -834,6 +835,12 @@ DuplicateScannerUtils.BuildEntityFieldNameValueRays = function(pDuplicateFieldsC return entityFieldValuesRay.length > 0 ? entityFieldValuesRay : [["", ""]]; } +DuplicateScannerUtils.GetBlockSize = function() +{ + return project.getPreferenceValue("custom.duplicates.dataBlockSize", "5000"); +} + + function _DuplicateScannerUtils() {} var INDEX_FILTER_CONDITION = 0; @@ -926,7 +933,7 @@ pResultFields, pRecordIdFieldToIgnore, pRecordIdValueToIgnore, pFormatValuesCons return null; possibleDuplicates = possibleDuplicates[indexsearch.HITS]; - + logging.log("possibleDuplicates -> " + possibleDuplicates.length); if(pUseExternalWebservice && possibleDuplicates.length > 0 && pFormatValuesConsumeWebserviceCallback != null) possibleDuplicates = pFormatValuesConsumeWebserviceCallback.apply(this, [possibleDuplicates]); @@ -1174,15 +1181,16 @@ _DuplicateScannerUtils._buildFilterPatternConfig = function(pEntityFieldValueRay let filterPatternConfig = null; //logging.log("pEntityFieldValueRays.length -> " + pEntityFieldValueRays.length); - if(pEntityFieldValueRays.length > 0) + if(pEntityFieldValueRays.length > 1) { filterPatternConfig = indexsearch.createPatternConfig(); - for (let i = 0; i < pEntityFieldValueRays.length; i++) + for (let i = 1; i < pEntityFieldValueRays.length; i++) { let entityFieldValue = pEntityFieldValueRays[i][1]; let entityFieldName = pEntityFieldValueRays[i][0]; - if(pEntityFieldValueRays[i][INDEX_CONFIG_USE_FOR_SEARCH] == 0 || entityFieldValue == "") + //if(pEntityFieldValueRays[i][INDEX_CONFIG_USE_FOR_SEARCH] == 0 || entityFieldValue == "") + if(entityFieldValue == "") continue; //logging.log("entityFieldValue -> " + entityFieldValue); @@ -1192,8 +1200,13 @@ _DuplicateScannerUtils._buildFilterPatternConfig = function(pEntityFieldValueRay //logging.log("indexField -> " + indexField); var filterTerm = indexsearch.createTerm(entityFieldValue) - .setIndexField(indexField) - .setFuzzySearchFactor(0); + .setIndexField(indexField); + + if(pEntityFieldValueRays[i][INDEX_CONFIG_USE_FOR_SEARCH] == 1) + { + logging.log("Nutze fuzzy search -> "); + filterTerm = filterTerm.setFuzzySearchFactor(2); + } filterPatternConfig.and(filterTerm); } diff --git a/process/RebuildAllDuplicateCaches_serverProcess/process.js b/process/RebuildAllDuplicateCaches_serverProcess/process.js index 5a6cd7aaac..c06fc90ff3 100644 --- a/process/RebuildAllDuplicateCaches_serverProcess/process.js +++ b/process/RebuildAllDuplicateCaches_serverProcess/process.js @@ -23,7 +23,7 @@ import("DuplicateScanner_lib"); var filterName = "PersonDuplicates"; var targetEntity = "Person_entity"; -var recordBlockSize = 5; +var recordBlockSize = DuplicateScannerUtils.GetBlockSize(); DuplicateScannerUtils.DeleteDuplicateClustersByTargetEntity(targetEntity); diff --git a/process/RebuildDuplicatesCache_serverProcess/process.js b/process/RebuildDuplicatesCache_serverProcess/process.js index 15b0c3c3ff..32f3f380bf 100644 --- a/process/RebuildDuplicatesCache_serverProcess/process.js +++ b/process/RebuildDuplicatesCache_serverProcess/process.js @@ -27,7 +27,7 @@ var filterName = vars.get("$local.filterName"); var targetEntity = vars.get("$local.targetEntity"); logging.log("filterName -> " + filterName); logging.log("targetEntity -> " + targetEntity); -var recordBlockSize = DuplicateScannerUtils.GetBlockSizeForScanner(filterName, targetEntity); +var recordBlockSize = DuplicateScannerUtils.GetBlockSize(); let duplicateFieldsConfig = DuplicateScannerUtils.LoadIndexFieldsConfiguration(filterName, targetEntity); let resultFields = DuplicateScannerUtils.LoadResultFields(filterName, targetEntity); -- GitLab