From e677b8d4cf43484359868c448136f6685f89851d Mon Sep 17 00:00:00 2001
From: "d.buechler" <d.buechler@adito.de>
Date: Wed, 30 Oct 2019 10:07:52 +0100
Subject: [PATCH] Removed the block size configuration from the client. Default
 value is 5000. All scanners now use the same value, it can be changed via
 designer custom setting in the preferences 2 fields in the person entity have
 been added, now the firstname and the lastname fields exist as phonetic and
 text_no_stopwords Refactorings

---
 .../Duplicates/create_duplicateScanner.xml    |  3 ---
 .../DuplicateScanner_entity.aod               |  8 ------
 .../onActionProcess.js                        | 11 +++-----
 .../onActionProcess.js                        | 14 +++++-----
 entity/Person_entity/Person_entity.aod        | 26 ++++++++++++++++--
 .../indexphoneticfirstname/valueProcess.js    |  4 +++
 .../indexphoneticlastname/valueProcess.js     |  4 +++
 .../recordcontainers/index/query.js           |  4 ++-
 .../DuplicateScannerEdit_view.aod             |  4 ---
 .../_____PREFERENCES_PROJECT.aod              |  4 +++
 process/DuplicateScanner_lib/process.js       | 27 ++++++++++++++-----
 .../process.js                                |  2 +-
 .../process.js                                |  2 +-
 13 files changed, 72 insertions(+), 41 deletions(-)
 create mode 100644 entity/Person_entity/entityfields/indexphoneticfirstname/valueProcess.js
 create mode 100644 entity/Person_entity/entityfields/indexphoneticlastname/valueProcess.js

diff --git a/.liquibase/Data_alias/basic/2019.2.1/Duplicates/create_duplicateScanner.xml b/.liquibase/Data_alias/basic/2019.2.1/Duplicates/create_duplicateScanner.xml
index 19d8c86619..fc7be7e82c 100644
--- a/.liquibase/Data_alias/basic/2019.2.1/Duplicates/create_duplicateScanner.xml
+++ b/.liquibase/Data_alias/basic/2019.2.1/Duplicates/create_duplicateScanner.xml
@@ -14,9 +14,6 @@
             <column name="EXTERNAL_SERVICE_USAGE_ALLOWED" type="INTEGER" >
                 <constraints nullable="false"/>
             </column>
-            <column name="BLOCK_SIZE" type="INTEGER">
-                <constraints nullable="false"/>
-            </column>
             <column name="USER_NEW" type="NVARCHAR(50)">
                 <constraints nullable="false"/>
             </column>
diff --git a/entity/DuplicateScanner_entity/DuplicateScanner_entity.aod b/entity/DuplicateScanner_entity/DuplicateScanner_entity.aod
index 6091a9846a..7d29b2f6db 100644
--- a/entity/DuplicateScanner_entity/DuplicateScanner_entity.aod
+++ b/entity/DuplicateScanner_entity/DuplicateScanner_entity.aod
@@ -120,10 +120,6 @@
         </entityActionField>
       </children>
     </entityActionGroup>
-    <entityField>
-      <name>BLOCK_SIZE</name>
-      <title>Data block size</title>
-    </entityField>
   </entityFields>
   <recordContainers>
     <dbRecordContainer>
@@ -172,10 +168,6 @@
           <name>DATE_EDIT.value</name>
           <recordfield>DUPLICATESCANNER.DATE_EDIT</recordfield>
         </dbRecordFieldMapping>
-        <dbRecordFieldMapping>
-          <name>BLOCK_SIZE.value</name>
-          <recordfield>DUPLICATESCANNER.BLOCK_SIZE</recordfield>
-        </dbRecordFieldMapping>
       </recordFieldMappings>
     </dbRecordContainer>
   </recordContainers>
diff --git a/entity/DuplicateScanner_entity/entityfields/runactiongroup/children/rebuildorganisationduplicatescache/onActionProcess.js b/entity/DuplicateScanner_entity/entityfields/runactiongroup/children/rebuildorganisationduplicatescache/onActionProcess.js
index 0b3d4905f1..f6a1827f05 100644
--- a/entity/DuplicateScanner_entity/entityfields/runactiongroup/children/rebuildorganisationduplicatescache/onActionProcess.js
+++ b/entity/DuplicateScanner_entity/entityfields/runactiongroup/children/rebuildorganisationduplicatescache/onActionProcess.js
@@ -3,21 +3,18 @@ import("DuplicateScanner_lib");
 
 var filterName = "OrganisationDuplicates";
 var targetEntity = "Organisation_entity";
-var recordBlockSize = 500;
+var recordBlockSize = DuplicateScannerUtils.GetBlockSize();
 let duplicateFieldsConfig = DuplicateScannerUtils.LoadIndexFieldsConfiguration(filterName, targetEntity);
 let resultFields = DuplicateScannerUtils.LoadResultFields(filterName, targetEntity);
 
-logging.log(filterName + ": duplicateFieldsConfig -> " + duplicateFieldsConfig);
-logging.log(filterName + ": resultFields -> " + resultFields);
-
-logging.log(filterName + ": Löschen von ORGANISATION Dubletten -> ");
+logging.log(filterName + ": Delete duplicates -> ");
 DuplicateScannerUtils.DeleteDuplicateClustersByTargetEntity(targetEntity);
 
-logging.log(filterName + ": Neu berechnen von ORGANISATION Dubletten -> ");
+logging.log(filterName + ": Recalculate duplicates -> ");
 DuplicateScannerUtils.RebuildDuplicatesCache(filterName, targetEntity, recordBlockSize,
 duplicateFieldsConfig, resultFields, null);
 
-logging.log(filterName + ": Refresh Unrelated Duplicates von ORGANISATION Dubletten -> ");
+logging.log(filterName + ": Refresh unrelated duplicates -> ");
 DuplicateScannerUtils.RefreshUnrelatedDuplicateRelations(targetEntity);
 
 logging.log(filterName + ": Done rebuilding ");
\ No newline at end of file
diff --git a/entity/DuplicateScanner_entity/entityfields/runactiongroup/children/rebuildpersonduplicatescache/onActionProcess.js b/entity/DuplicateScanner_entity/entityfields/runactiongroup/children/rebuildpersonduplicatescache/onActionProcess.js
index 6189ff788b..29ce7c3af2 100644
--- a/entity/DuplicateScanner_entity/entityfields/runactiongroup/children/rebuildpersonduplicatescache/onActionProcess.js
+++ b/entity/DuplicateScanner_entity/entityfields/runactiongroup/children/rebuildpersonduplicatescache/onActionProcess.js
@@ -1,23 +1,23 @@
+import("system.project");
 import("system.logging");
 import("DuplicateScanner_lib");
 
 var filterName = "PersonDuplicates";
 var targetEntity = "Person_entity";
-var recordBlockSize = 500;
+var recordBlockSize = DuplicateScannerUtils.GetBlockSize();
+logging.log("recordBlockSize -> " + recordBlockSize);
+
 let duplicateFieldsConfig = DuplicateScannerUtils.LoadIndexFieldsConfiguration(filterName, targetEntity);
 let resultFields = DuplicateScannerUtils.LoadResultFields(filterName, targetEntity);
 
-logging.log(filterName + ": duplicateFieldsConfig -> " + duplicateFieldsConfig);
-logging.log(filterName + ": resultFields -> " + resultFields);
-
-logging.log(filterName + ": Löschen von PERSON Dubletten -> ");
+logging.log(filterName + ": Delete duplicates -> ");
 DuplicateScannerUtils.DeleteDuplicateClustersByTargetEntity(targetEntity);
 
-logging.log("Neu berechnen von PERSON Dubletten -> ");
+logging.log(filterName + ": Recalculate duplicates -> ");
 DuplicateScannerUtils.RebuildDuplicatesCache(filterName, targetEntity, recordBlockSize,
 duplicateFieldsConfig, resultFields, null);
 
-logging.log(filterName + ": Refresh Unrelated Duplicates von ORGANISATION Dubletten -> ");
+logging.log(filterName + ": Refresh unrelated duplicates -> ");
 DuplicateScannerUtils.RefreshUnrelatedDuplicateRelations(targetEntity);
 
 logging.log(filterName + ": Done rebuilding ");
\ No newline at end of file
diff --git a/entity/Person_entity/Person_entity.aod b/entity/Person_entity/Person_entity.aod
index 8ddf9367f4..c8c9b4469f 100644
--- a/entity/Person_entity/Person_entity.aod
+++ b/entity/Person_entity/Person_entity.aod
@@ -1115,6 +1115,14 @@ Usually this is used for filtering COMMUNICATION-entries by a specified contact
       <name>OnlyOwnSupervised_param</name>
       <expose v="true" />
     </entityParameter>
+    <entityField>
+      <name>IndexPhoneticLastname</name>
+      <valueProcess>%aditoprj%/entity/Person_entity/entityfields/indexphoneticlastname/valueProcess.js</valueProcess>
+    </entityField>
+    <entityField>
+      <name>IndexPhoneticFirstname</name>
+      <valueProcess>%aditoprj%/entity/Person_entity/entityfields/indexphoneticfirstname/valueProcess.js</valueProcess>
+    </entityField>
   </entityFields>
   <recordContainers>
     <dbRecordContainer>
@@ -1388,12 +1396,12 @@ Usually this is used for filtering COMMUNICATION-entries by a specified contact
       <indexFieldMappings>
         <indexRecordFieldMapping>
           <name>FIRSTNAME.value</name>
-          <indexFieldType>PHONETIC_NAME</indexFieldType>
+          <indexFieldType>TEXT_NO_STOPWORDS</indexFieldType>
           <isBoosted v="true" />
         </indexRecordFieldMapping>
         <indexRecordFieldMapping>
           <name>LASTNAME.value</name>
-          <indexFieldType>PHONETIC_NAME</indexFieldType>
+          <indexFieldType>TEXT_NO_STOPWORDS</indexFieldType>
           <isBoosted v="true" />
         </indexRecordFieldMapping>
         <indexRecordFieldMapping>
@@ -1464,6 +1472,20 @@ Usually this is used for filtering COMMUNICATION-entries by a specified contact
           </additionalFieldNameAliases>
           <isMultiValued v="true" />
         </indexRecordFieldMapping>
+        <indexRecordFieldMapping>
+          <name>IndexPhoneticFirstname.value</name>
+          <indexFieldType>PHONETIC_NAME</indexFieldType>
+          <additionalFieldNameAliases>
+            <element>firstname</element>
+          </additionalFieldNameAliases>
+        </indexRecordFieldMapping>
+        <indexRecordFieldMapping>
+          <name>IndexPhoneticLastname.value</name>
+          <indexFieldType>PHONETIC_NAME</indexFieldType>
+          <additionalFieldNameAliases>
+            <element>lastname</element>
+          </additionalFieldNameAliases>
+        </indexRecordFieldMapping>
       </indexFieldMappings>
     </indexRecordContainer>
   </recordContainers>
diff --git a/entity/Person_entity/entityfields/indexphoneticfirstname/valueProcess.js b/entity/Person_entity/entityfields/indexphoneticfirstname/valueProcess.js
new file mode 100644
index 0000000000..f042d9baa7
--- /dev/null
+++ b/entity/Person_entity/entityfields/indexphoneticfirstname/valueProcess.js
@@ -0,0 +1,4 @@
+import("system.result");
+import("system.vars");
+
+result.string(vars.get("$field.FIRSTNAME"));
\ No newline at end of file
diff --git a/entity/Person_entity/entityfields/indexphoneticlastname/valueProcess.js b/entity/Person_entity/entityfields/indexphoneticlastname/valueProcess.js
new file mode 100644
index 0000000000..e921110334
--- /dev/null
+++ b/entity/Person_entity/entityfields/indexphoneticlastname/valueProcess.js
@@ -0,0 +1,4 @@
+import("system.result");
+import("system.vars");
+
+result.string(vars.get("$field.LASTNAME"));
\ No newline at end of file
diff --git a/entity/Person_entity/recordcontainers/index/query.js b/entity/Person_entity/recordcontainers/index/query.js
index b4e527cc2d..6798290c2e 100644
--- a/entity/Person_entity/recordcontainers/index/query.js
+++ b/entity/Person_entity/recordcontainers/index/query.js
@@ -25,8 +25,10 @@ sqlQuery = 'select CONTACT.CONTACTID as "_uid_" '
     + ", PERSON.PERSONID "
     + ", PERSON.FIRSTNAME "
     + ", PERSON.LASTNAME "
+    + ", PERSON.FIRSTNAME as IndexPhoneticFirstname "
+    + ", PERSON.LASTNAME as IndexPhoneticLastname "
     + ", PERSON.SALUTATION "
-    + ", PERSON.GENDER "
+    + ", TRIM(PERSON.GENDER) as GENDER "
     + ', PERSON.TITLE as "TITLE.value"'
     + ", CONTACT.CONTACTID "
     + ", CONTACT.ORGANISATION_ID "
diff --git a/neonView/DuplicateScannerEdit_view/DuplicateScannerEdit_view.aod b/neonView/DuplicateScannerEdit_view/DuplicateScannerEdit_view.aod
index ce0410db5b..73c01d8d54 100644
--- a/neonView/DuplicateScannerEdit_view/DuplicateScannerEdit_view.aod
+++ b/neonView/DuplicateScannerEdit_view/DuplicateScannerEdit_view.aod
@@ -22,10 +22,6 @@
           <name>c021711c-9f3e-454d-964c-1339e3463329</name>
           <entityField>ENTITY_TO_SCAN_NAME</entityField>
         </entityFieldLink>
-        <entityFieldLink>
-          <name>bc526338-19bb-4587-8321-7bd53c267c6d</name>
-          <entityField>BLOCK_SIZE</entityField>
-        </entityFieldLink>
       </fields>
     </genericViewTemplate>
     <neonViewReference>
diff --git a/preferences/_____PREFERENCES_PROJECT/_____PREFERENCES_PROJECT.aod b/preferences/_____PREFERENCES_PROJECT/_____PREFERENCES_PROJECT.aod
index 97d496bccc..46a24a9548 100644
--- a/preferences/_____PREFERENCES_PROJECT/_____PREFERENCES_PROJECT.aod
+++ b/preferences/_____PREFERENCES_PROJECT/_____PREFERENCES_PROJECT.aod
@@ -106,5 +106,9 @@
       <name>nominatim.pw</name>
       <property>useradito</property>
     </customStringProperty>
+    <customIntegerProperty>
+      <name>duplicates.dataBlockSize</name>
+      <property v="5000" />
+    </customIntegerProperty>
   </customProperties>
 </preferences>
diff --git a/process/DuplicateScanner_lib/process.js b/process/DuplicateScanner_lib/process.js
index 411ed083c5..971cd1f9a4 100644
--- a/process/DuplicateScanner_lib/process.js
+++ b/process/DuplicateScanner_lib/process.js
@@ -1,3 +1,4 @@
+import("system.project");
 import("ActivityTask_lib");
 import("KeywordRegistry_basic");
 import("system.translate");
@@ -262,7 +263,7 @@ DuplicateScannerUtils.GetCachedDuplicatesForClusterId = function(pClusterId)
  * @example
  * var filterName = "PersonDuplicates";
  * var targetEntity = "Person_entity";
- * var recordBlockSize = 5;
+ * var recordBlockSize = DuplicateScannerUtils.GetBlockSize();
  *
  * let resultFields = DuplicateScannerUtils.LoadResultFields(filterName, targetEntity);
  *
@@ -834,6 +835,12 @@ DuplicateScannerUtils.BuildEntityFieldNameValueRays = function(pDuplicateFieldsC
     return entityFieldValuesRay.length > 0 ? entityFieldValuesRay : [["", ""]];
 }
 
+DuplicateScannerUtils.GetBlockSize = function()
+{
+    return project.getPreferenceValue("custom.duplicates.dataBlockSize", "5000");
+}
+
+
 function _DuplicateScannerUtils() {}
 
 var INDEX_FILTER_CONDITION = 0;
@@ -926,7 +933,7 @@ pResultFields, pRecordIdFieldToIgnore, pRecordIdValueToIgnore, pFormatValuesCons
         return null;
 
     possibleDuplicates = possibleDuplicates[indexsearch.HITS];
-
+    logging.log("possibleDuplicates -> " + possibleDuplicates.length);
     if(pUseExternalWebservice && possibleDuplicates.length > 0 && pFormatValuesConsumeWebserviceCallback != null)
         possibleDuplicates = pFormatValuesConsumeWebserviceCallback.apply(this, [possibleDuplicates]);
 
@@ -1174,15 +1181,16 @@ _DuplicateScannerUtils._buildFilterPatternConfig = function(pEntityFieldValueRay
     let filterPatternConfig = null;
 
     //logging.log("pEntityFieldValueRays.length -> " + pEntityFieldValueRays.length);
-    if(pEntityFieldValueRays.length > 0)
+    if(pEntityFieldValueRays.length > 1)
     {
         filterPatternConfig = indexsearch.createPatternConfig();
-        for (let i = 0; i < pEntityFieldValueRays.length; i++)
+        for (let i = 1; i < pEntityFieldValueRays.length; i++)
         {
             let entityFieldValue = pEntityFieldValueRays[i][1];
             let entityFieldName = pEntityFieldValueRays[i][0];
 
-            if(pEntityFieldValueRays[i][INDEX_CONFIG_USE_FOR_SEARCH] == 0 || entityFieldValue == "")
+            //if(pEntityFieldValueRays[i][INDEX_CONFIG_USE_FOR_SEARCH] == 0 || entityFieldValue == "")
+            if(entityFieldValue == "")
                 continue;
 
             //logging.log("entityFieldValue -> " + entityFieldValue);
@@ -1192,8 +1200,13 @@ _DuplicateScannerUtils._buildFilterPatternConfig = function(pEntityFieldValueRay
 
             //logging.log("indexField -> " + indexField);
             var filterTerm = indexsearch.createTerm(entityFieldValue)
-            .setIndexField(indexField)
-            .setFuzzySearchFactor(0);
+            .setIndexField(indexField);
+            
+            if(pEntityFieldValueRays[i][INDEX_CONFIG_USE_FOR_SEARCH] == 1)
+            {
+                logging.log("Nutze fuzzy search -> ");
+                filterTerm = filterTerm.setFuzzySearchFactor(2);
+            }
 
             filterPatternConfig.and(filterTerm);
         }
diff --git a/process/RebuildAllDuplicateCaches_serverProcess/process.js b/process/RebuildAllDuplicateCaches_serverProcess/process.js
index 5a6cd7aaac..c06fc90ff3 100644
--- a/process/RebuildAllDuplicateCaches_serverProcess/process.js
+++ b/process/RebuildAllDuplicateCaches_serverProcess/process.js
@@ -23,7 +23,7 @@ import("DuplicateScanner_lib");
 
 var filterName = "PersonDuplicates";
 var targetEntity = "Person_entity";
-var recordBlockSize = 5;
+var recordBlockSize = DuplicateScannerUtils.GetBlockSize();
 
 DuplicateScannerUtils.DeleteDuplicateClustersByTargetEntity(targetEntity);
 
diff --git a/process/RebuildDuplicatesCache_serverProcess/process.js b/process/RebuildDuplicatesCache_serverProcess/process.js
index 15b0c3c3ff..32f3f380bf 100644
--- a/process/RebuildDuplicatesCache_serverProcess/process.js
+++ b/process/RebuildDuplicatesCache_serverProcess/process.js
@@ -27,7 +27,7 @@ var filterName = vars.get("$local.filterName");
 var targetEntity = vars.get("$local.targetEntity");
 logging.log("filterName -> " + filterName);
 logging.log("targetEntity -> " + targetEntity);
-var recordBlockSize = DuplicateScannerUtils.GetBlockSizeForScanner(filterName, targetEntity);
+var recordBlockSize = DuplicateScannerUtils.GetBlockSize();
 let duplicateFieldsConfig = DuplicateScannerUtils.LoadIndexFieldsConfiguration(filterName, targetEntity);
 let resultFields = DuplicateScannerUtils.LoadResultFields(filterName, targetEntity);
 
-- 
GitLab