diff --git a/mycore-mods/src/main/java/org/mycore/mods/enrichment/MCRIdentifier.java b/mycore-mods/src/main/java/org/mycore/mods/enrichment/MCRIdentifier.java
index 1354b6c50a..a9502d18a5 100644
--- a/mycore-mods/src/main/java/org/mycore/mods/enrichment/MCRIdentifier.java
+++ b/mycore-mods/src/main/java/org/mycore/mods/enrichment/MCRIdentifier.java
@@ -18,6 +18,10 @@
package org.mycore.mods.enrichment;
+import java.nio.charset.StandardCharsets;
+import java.net.URLDecoder;
+import java.util.Locale;
+
import org.jaxen.JaxenException;
import org.jdom2.Element;
import org.mycore.common.MCRException;
@@ -47,14 +51,22 @@ public String getValue() {
return value;
}
+ public static String simplifyID(String id) {
+ return URLDecoder.decode(id.toLowerCase(Locale.ENGLISH),StandardCharsets.UTF_8).replace("-","");
+ }
+
+ public String simplifiedID() {
+ return simplifyID(toString());
+ }
+
@Override
public boolean equals(Object other) {
- return (other instanceof MCRIdentifier && this.toString().equals(other.toString()));
+ return (other instanceof MCRIdentifier && this.simplifiedID().equals(((MCRIdentifier)other).simplifiedID()));
}
@Override
public int hashCode() {
- return toString().hashCode();
+ return simplifiedID().hashCode();
}
@Override
diff --git a/mycore-mods/src/main/java/org/mycore/mods/merger/MCRIdentifierMerger.java b/mycore-mods/src/main/java/org/mycore/mods/merger/MCRIdentifierMerger.java
index 31d72ac803..87e564a923 100644
--- a/mycore-mods/src/main/java/org/mycore/mods/merger/MCRIdentifierMerger.java
+++ b/mycore-mods/src/main/java/org/mycore/mods/merger/MCRIdentifierMerger.java
@@ -20,9 +20,9 @@
import java.nio.charset.StandardCharsets;
import java.net.URLDecoder;
-import java.util.Locale;
import org.jdom2.Element;
+import org.mycore.mods.enrichment.MCRIdentifier;
/**
* Compares and merges mods:identifier elements.
@@ -43,8 +43,7 @@ private String getType() {
}
private String getSimplifiedID() {
- return URLDecoder.decode(this.element.getTextNormalize().toLowerCase(Locale.ENGLISH),StandardCharsets.UTF_8)
- .replace("-","");
+ return MCRIdentifier.simplifyID(this.element.getTextNormalize());
}
@Override
@@ -60,7 +59,9 @@ public boolean isProbablySameAs(MCRMerger other) {
@Override
public void mergeFrom(MCRMerger other) {
- if (!this.element.getText().contains("-") && other.element.getText().contains("-")) {
+ if ((!this.element.getText().contains("-") && other.element.getText().contains("-")) ||
+ (!URLDecoder.decode(this.element.getText(),StandardCharsets.UTF_8).equals(this.element.getText())
+ && URLDecoder.decode(other.element.getText(),StandardCharsets.UTF_8).equals(other.element.getText()))) {
this.element.setText(other.element.getText());
}
}
diff --git a/mycore-mods/src/test/java/org/mycore/mods/merger/MCRIdentifierMergerTest.java b/mycore-mods/src/test/java/org/mycore/mods/merger/MCRIdentifierMergerTest.java
index 469d4d2233..9d220dc34b 100644
--- a/mycore-mods/src/test/java/org/mycore/mods/merger/MCRIdentifierMergerTest.java
+++ b/mycore-mods/src/test/java/org/mycore/mods/merger/MCRIdentifierMergerTest.java
@@ -36,6 +36,15 @@ public void testMergeSame() throws Exception {
String a = "[mods:identifier[@type='issn']='12345678']";
String b = "[mods:identifier[@type='issn']='1234-5678']";
MCRMergerTest.test(a, b, b);
+ MCRMergerTest.test(b, a, b);
+ }
+
+ @Test
+ public void testMergeURLEncoded() throws Exception {
+ String a = "[mods:identifier[@type='doi']='10.1002/%28issn%291521-3765']";
+ String b = "[mods:identifier[@type='doi']='10.1002/(issn)1521-3765']";
+ MCRMergerTest.test(a, b, b);
+ MCRMergerTest.test(b, a, b);
}
@Test
@@ -51,5 +60,6 @@ public void testCaseInsensitiveDOIs() throws Exception {
String a = "[mods:identifier[@type='doi']='10.1530/EJE-21-1086']";
String b = "[mods:identifier[@type='doi']='10.1530/eje-21-1086']";
MCRMergerTest.test(a, b, a);
+ MCRMergerTest.test(b, a, b);
}
}
diff --git a/mycore-mods/src/test/resources/MCREnrichmentTest/testBasicEnrichment-debug.xml b/mycore-mods/src/test/resources/MCREnrichmentTest/testBasicEnrichment-debug.xml
index 6f52245ebe..9c37c799e9 100644
--- a/mycore-mods/src/test/resources/MCREnrichmentTest/testBasicEnrichment-debug.xml
+++ b/mycore-mods/src/test/resources/MCREnrichmentTest/testBasicEnrichment-debug.xml
@@ -67,14 +67,14 @@
- 1234-5678
- 1234-6789
1000-9999
+ 1234-6789
+ 1234-5678
- from data source C for ISSN 1234-5678
- 1234-5678
+ from data source C for ISSN 1234-6789
+ 1234-6789
@@ -84,13 +84,13 @@
host from data source B
1234-5678
1234-6789
- from data source C for ISSN 1234-5678
+ from data source C for ISSN 1234-6789
- from data source C for ISSN 1234-6789
- 1234-6789
+ from data source C for ISSN 1234-5678
+ 1234-5678
@@ -100,9 +100,9 @@
host from data source B
1234-5678
1234-6789
- from data source C for ISSN 1234-5678
from data source C for ISSN 1234-6789
+ from data source C for ISSN 1234-5678
-
\ No newline at end of file
+
diff --git a/mycore-mods/src/test/resources/MCREnrichmentTest/testBasicEnrichment-result.xml b/mycore-mods/src/test/resources/MCREnrichmentTest/testBasicEnrichment-result.xml
index 1035df2563..399ced4d46 100644
--- a/mycore-mods/src/test/resources/MCREnrichmentTest/testBasicEnrichment-result.xml
+++ b/mycore-mods/src/test/resources/MCREnrichmentTest/testBasicEnrichment-result.xml
@@ -4,8 +4,8 @@
host from data source A
host from data source B
- from data source C for ISSN 1234-5678
from data source C for ISSN 1234-6789
+ from data source C for ISSN 1234-5678
1000-9999
1234-5678
1234-6789