From b637695aa8dcf9f99d4e0da05e824e6674e30b84 Mon Sep 17 00:00:00 2001
From: Aliaksandr Dziarkach
<18146690+AliaksandrDziarkach@users.noreply.github.com>
Date: Tue, 5 Nov 2024 19:18:53 +0300
Subject: [PATCH] Backmerge: #2603 - Unknown 'a' CIP stereochemistry cause
error in CDXML parser
backmerge to 1.26
---
.../integration/ref/formats/cdx_to_ket.py.out | 1073 ++++++
.../formats/molecules/cdx/issue_2603.cdxml | 3423 +++++++++++++++++
core/indigo-core/molecule/CDXCommons.h | 19 +-
.../molecule/src/molecule_cdxml_loader.cpp | 8 +-
4 files changed, 4521 insertions(+), 2 deletions(-)
create mode 100644 api/tests/integration/tests/formats/molecules/cdx/issue_2603.cdxml
diff --git a/api/tests/integration/ref/formats/cdx_to_ket.py.out b/api/tests/integration/ref/formats/cdx_to_ket.py.out
index 7281e5a19c..e5f5437986 100644
--- a/api/tests/integration/ref/formats/cdx_to_ket.py.out
+++ b/api/tests/integration/ref/formats/cdx_to_ket.py.out
@@ -16645,6 +16645,1079 @@ invalid-ascii.cdx
"bonds": []
}
}
+issue_2603.cdxml
+{
+ "root": {
+ "nodes": [
+ {
+ "$ref": "mol0"
+ }
+ ]
+ },
+ "mol0": {
+ "type": "molecule",
+ "atoms": [
+ {
+ "label": "C",
+ "location": [
+ 0.1293332427740097,
+ -2.9196665287017824,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 0.1293332427740097,
+ -3.3996665477752687,
+ 0.0
+ ]
+ },
+ {
+ "label": "N",
+ "location": [
+ 0.5453333258628845,
+ -3.639666795730591,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 0.9609997868537903,
+ -3.3996665477752687,
+ 0.0
+ ],
+ "stereoLabel": "abs"
+ },
+ {
+ "label": "C",
+ "location": [
+ 0.9609997868537903,
+ -2.9196665287017824,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 0.5453333258628845,
+ -2.679666757583618,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 1.3176666498184205,
+ -2.5986666679382326,
+ 0.0
+ ]
+ },
+ {
+ "label": "N",
+ "location": [
+ 1.122333288192749,
+ -2.1599998474121095,
+ 0.0
+ ]
+ },
+ {
+ "label": "N",
+ "location": [
+ 0.6449999213218689,
+ -2.2103333473205568,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 1.3623331785202027,
+ -1.7443333864212037,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 1.842333197593689,
+ -1.7443333864212037,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 2.0823330879211427,
+ -1.3286666870117188,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 1.842333197593689,
+ -0.9130000472068787,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 1.3623331785202027,
+ -0.9130000472068787,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 1.122333288192749,
+ -1.3286666870117188,
+ 0.0
+ ]
+ },
+ {
+ "label": "F",
+ "location": [
+ 2.0823330879211427,
+ -0.4973333477973938,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 1.122333288192749,
+ -0.4973333477973938,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 1.122333288192749,
+ -0.017333349213004113,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 0.7066665887832642,
+ -0.257333368062973,
+ 0.0
+ ]
+ },
+ {
+ "label": "N",
+ "location": [
+ 1.787000060081482,
+ -2.698333263397217,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 1.9823334217071534,
+ -3.13700008392334,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 2.4596667289733888,
+ -3.0866665840148927,
+ 0.0
+ ]
+ },
+ {
+ "label": "N",
+ "location": [
+ 2.559333086013794,
+ -2.617333173751831,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 2.1436667442321779,
+ -2.377333402633667,
+ 0.0
+ ]
+ },
+ {
+ "label": "O",
+ "location": [
+ 2.0936667919158937,
+ -1.899999976158142,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 2.997999906539917,
+ -2.421999931335449,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 3.386333465576172,
+ -2.7039997577667238,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 3.8246665000915529,
+ -2.508999824523926,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 3.874999761581421,
+ -2.0316665172576906,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 3.486666679382324,
+ -1.749333143234253,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 3.047999858856201,
+ -1.9446665048599244,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 4.263333320617676,
+ -2.7039997577667238,
+ 0.0
+ ]
+ },
+ {
+ "label": "N",
+ "location": [
+ 4.584666728973389,
+ -2.3473331928253176,
+ 0.0
+ ]
+ },
+ {
+ "label": "N",
+ "location": [
+ 4.344666481018066,
+ -1.931666612625122,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 4.539666652679443,
+ -1.4933334589004517,
+ 0.0
+ ]
+ },
+ {
+ "label": "F",
+ "location": [
+ 3.3359997272491457,
+ -3.181666612625122,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 0.5453333258628845,
+ -4.119666576385498,
+ 0.0
+ ]
+ },
+ {
+ "label": "O",
+ "location": [
+ 0.1293332427740097,
+ -4.359666347503662,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 0.9609997868537903,
+ -4.359666347503662,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 1.0109997987747193,
+ -4.8373332023620609,
+ 0.0
+ ]
+ },
+ {
+ "label": "N",
+ "location": [
+ 1.4806666374206544,
+ -4.936999797821045,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 1.720666527748108,
+ -4.52133321762085,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 1.3993332386016846,
+ -4.164666652679443,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 1.720666527748108,
+ -5.352666854858398,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 2.200666666030884,
+ -5.352666854858398,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 2.440666437149048,
+ -4.936999797821045,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 2.200666666030884,
+ -4.52133321762085,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 0.654333233833313,
+ -5.1583333015441898,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 0.7539998292922974,
+ -5.627999782562256,
+ 0.0
+ ]
+ },
+ {
+ "label": "N",
+ "location": [
+ 0.43299993872642519,
+ -5.984666347503662,
+ 0.0
+ ]
+ },
+ {
+ "label": "O",
+ "location": [
+ 0.6729997992515564,
+ -6.400332927703857,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 1.1423332691192628,
+ -6.300333023071289,
+ 0.0
+ ]
+ },
+ {
+ "label": "N",
+ "location": [
+ 1.1926666498184205,
+ -5.822999954223633,
+ 0.0
+ ]
+ },
+ {
+ "label": "O",
+ "location": [
+ 1.499000072479248,
+ -6.621666431427002,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 0.33299991488456728,
+ -4.801666736602783,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 0.1846666932106018,
+ -5.258000373840332,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 1.37666654586792,
+ -3.639666795730591,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 2.9206666946411135,
+ -4.936999797821045,
+ 0.0
+ ],
+ "stereoLabel": "abs"
+ },
+ {
+ "label": "C",
+ "location": [
+ 3.1606667041778566,
+ -5.352666854858398,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 3.6406664848327638,
+ -5.352666854858398,
+ 0.0
+ ]
+ },
+ {
+ "label": "O",
+ "location": [
+ 3.8806662559509279,
+ -4.936999797821045,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 3.6406664848327638,
+ -4.52133321762085,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 3.1606667041778566,
+ -4.52133321762085,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 3.6406664848327638,
+ -4.0413336753845219,
+ 0.0
+ ]
+ },
+ {
+ "label": "C",
+ "location": [
+ 4.056333065032959,
+ -4.2813334465026859,
+ 0.0
+ ]
+ }
+ ],
+ "bonds": [
+ {
+ "type": 1,
+ "atoms": [
+ 0,
+ 1
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 1,
+ 2
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 2,
+ 3
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 3,
+ 4
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 4,
+ 5
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 5,
+ 0
+ ]
+ },
+ {
+ "type": 2,
+ "atoms": [
+ 4,
+ 6
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 6,
+ 7
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 7,
+ 8
+ ]
+ },
+ {
+ "type": 2,
+ "atoms": [
+ 8,
+ 5
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 7,
+ 9
+ ]
+ },
+ {
+ "type": 2,
+ "atoms": [
+ 9,
+ 10
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 10,
+ 11
+ ]
+ },
+ {
+ "type": 2,
+ "atoms": [
+ 11,
+ 12
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 12,
+ 13
+ ]
+ },
+ {
+ "type": 2,
+ "atoms": [
+ 13,
+ 14
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 14,
+ 9
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 12,
+ 15
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 13,
+ 16
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 16,
+ 17
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 17,
+ 18
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 18,
+ 16
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 6,
+ 19
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 19,
+ 20
+ ]
+ },
+ {
+ "type": 2,
+ "atoms": [
+ 20,
+ 21
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 21,
+ 22
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 22,
+ 23
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 23,
+ 19
+ ]
+ },
+ {
+ "type": 2,
+ "atoms": [
+ 23,
+ 24
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 22,
+ 25
+ ]
+ },
+ {
+ "type": 2,
+ "atoms": [
+ 25,
+ 26
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 26,
+ 27
+ ]
+ },
+ {
+ "type": 2,
+ "atoms": [
+ 27,
+ 28
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 28,
+ 29
+ ]
+ },
+ {
+ "type": 2,
+ "atoms": [
+ 29,
+ 30
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 30,
+ 25
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 27,
+ 31
+ ]
+ },
+ {
+ "type": 2,
+ "atoms": [
+ 31,
+ 32
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 32,
+ 33
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 33,
+ 28
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 33,
+ 34
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 26,
+ 35
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 2,
+ 36
+ ]
+ },
+ {
+ "type": 2,
+ "atoms": [
+ 36,
+ 37
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 36,
+ 38
+ ]
+ },
+ {
+ "type": 2,
+ "atoms": [
+ 38,
+ 39
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 39,
+ 40
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 40,
+ 41
+ ]
+ },
+ {
+ "type": 2,
+ "atoms": [
+ 41,
+ 42
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 42,
+ 38
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 40,
+ 43
+ ]
+ },
+ {
+ "type": 2,
+ "atoms": [
+ 43,
+ 44
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 44,
+ 45
+ ]
+ },
+ {
+ "type": 2,
+ "atoms": [
+ 45,
+ 46
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 46,
+ 41
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 39,
+ 47
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 47,
+ 48
+ ]
+ },
+ {
+ "type": 2,
+ "atoms": [
+ 48,
+ 49
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 49,
+ 50
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 50,
+ 51
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 51,
+ 52
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 52,
+ 48
+ ]
+ },
+ {
+ "type": 2,
+ "atoms": [
+ 51,
+ 53
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 47,
+ 54
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 54,
+ 55
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 55,
+ 47
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 3,
+ 56
+ ],
+ "stereo": 6
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 57,
+ 45
+ ],
+ "stereo": 6
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 57,
+ 58
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 58,
+ 59
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 59,
+ 60
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 60,
+ 61
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 61,
+ 62
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 62,
+ 57
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 61,
+ 63
+ ]
+ },
+ {
+ "type": 1,
+ "atoms": [
+ 61,
+ 64
+ ]
+ }
+ ]
+ }
+}
marvin.cdx
CDXML loader: Not a molecule. Found 1 arrows.
*** Try as Reaction ***
diff --git a/api/tests/integration/tests/formats/molecules/cdx/issue_2603.cdxml b/api/tests/integration/tests/formats/molecules/cdx/issue_2603.cdxml
new file mode 100644
index 0000000000..afe61dcc88
--- /dev/null
+++ b/api/tests/integration/tests/formats/molecules/cdx/issue_2603.cdxml
@@ -0,0 +1,3423 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+NNNFNNONNFONNONHOO
\ No newline at end of file
diff --git a/core/indigo-core/molecule/CDXCommons.h b/core/indigo-core/molecule/CDXCommons.h
index 9f50c97b6c..2cf065425b 100644
--- a/core/indigo-core/molecule/CDXCommons.h
+++ b/core/indigo-core/molecule/CDXCommons.h
@@ -1266,7 +1266,24 @@ namespace indigo
{kCDXBracketUsage_Anypolymer, "Anypolymer"}};
const std::vector kCIPStereochemistryIndexToChar = {'U', 'N', 'R', 'S', 'r', 's', 'u'};
- const std::unordered_map kCIPStereochemistryCharToIndex = {{'U', 0}, {'N', 1}, {'R', 2}, {'S', 3}, {'r', 4}, {'s', 5}, {'u', 6}};
+ namespace CIPStereochemistry
+ {
+ enum
+ {
+ Undetermined = 0,
+ Symmetric,
+ Asymmetric_R,
+ Asymmetric_S,
+ Pseudoasymmetric_r,
+ Pseudoasymmetric_s,
+ Unspecified
+ };
+ }
+ const std::unordered_map kCIPStereochemistryCharToIndex = {
+ {'U', CIPStereochemistry::Undetermined}, {'N', CIPStereochemistry::Symmetric}, {'R', CIPStereochemistry::Asymmetric_R},
+ {'S', CIPStereochemistry::Asymmetric_S}, {'r', CIPStereochemistry::Pseudoasymmetric_r}, {'s', CIPStereochemistry::Pseudoasymmetric_s},
+ {'u', CIPStereochemistry::Unspecified},
+ };
const std::vector kCIPBondStereochemistryIndexToChar = {'U', 'N', 'E', 'Z'};
const std::unordered_map kCIPBondStereochemistryCharToIndex = {{'U', 0}, {'N', 1}, {'E', 2}, {'Z', 3}};
diff --git a/core/indigo-core/molecule/src/molecule_cdxml_loader.cpp b/core/indigo-core/molecule/src/molecule_cdxml_loader.cpp
index 4815b8ae95..de93903899 100644
--- a/core/indigo-core/molecule/src/molecule_cdxml_loader.cpp
+++ b/core/indigo-core/molecule/src/molecule_cdxml_loader.cpp
@@ -1184,7 +1184,13 @@ void MoleculeCdxmlLoader::_parseNode(CdxmlNode& node, BaseCDXElement& elem)
auto pos_lambda = [&node, this](const std::string& data) { this->parsePos(data, node.pos); };
- auto stereo_lambda = [&node](const std::string& data) { node.stereo = kCIPStereochemistryCharToIndex.at(data.front()); };
+ auto stereo_lambda = [&node](const std::string& data) {
+ const auto it = kCIPStereochemistryCharToIndex.find(data.front());
+ if (it != kCIPStereochemistryCharToIndex.end())
+ node.stereo = it->second;
+ else
+ node.stereo = CIPStereochemistry::Undetermined;
+ };
auto node_type_lambda = [&node](const std::string& data) {
node.type = KNodeTypeNameToInt.at(data);