From b637695aa8dcf9f99d4e0da05e824e6674e30b84 Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Tue, 5 Nov 2024 19:18:53 +0300 Subject: [PATCH] Backmerge: #2603 - Unknown 'a' CIP stereochemistry cause error in CDXML parser backmerge to 1.26 --- .../integration/ref/formats/cdx_to_ket.py.out | 1073 ++++++ .../formats/molecules/cdx/issue_2603.cdxml | 3423 +++++++++++++++++ core/indigo-core/molecule/CDXCommons.h | 19 +- .../molecule/src/molecule_cdxml_loader.cpp | 8 +- 4 files changed, 4521 insertions(+), 2 deletions(-) create mode 100644 api/tests/integration/tests/formats/molecules/cdx/issue_2603.cdxml diff --git a/api/tests/integration/ref/formats/cdx_to_ket.py.out b/api/tests/integration/ref/formats/cdx_to_ket.py.out index 7281e5a19c..e5f5437986 100644 --- a/api/tests/integration/ref/formats/cdx_to_ket.py.out +++ b/api/tests/integration/ref/formats/cdx_to_ket.py.out @@ -16645,6 +16645,1079 @@ invalid-ascii.cdx "bonds": [] } } +issue_2603.cdxml +{ + "root": { + "nodes": [ + { + "$ref": "mol0" + } + ] + }, + "mol0": { + "type": "molecule", + "atoms": [ + { + "label": "C", + "location": [ + 0.1293332427740097, + -2.9196665287017824, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.1293332427740097, + -3.3996665477752687, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 0.5453333258628845, + -3.639666795730591, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.9609997868537903, + -3.3996665477752687, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 0.9609997868537903, + -2.9196665287017824, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.5453333258628845, + -2.679666757583618, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.3176666498184205, + -2.5986666679382326, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 1.122333288192749, + -2.1599998474121095, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 0.6449999213218689, + -2.2103333473205568, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.3623331785202027, + -1.7443333864212037, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.842333197593689, + -1.7443333864212037, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 2.0823330879211427, + -1.3286666870117188, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.842333197593689, + -0.9130000472068787, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.3623331785202027, + -0.9130000472068787, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.122333288192749, + -1.3286666870117188, + 0.0 + ] + }, + { + "label": "F", + "location": [ + 2.0823330879211427, + -0.4973333477973938, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.122333288192749, + -0.4973333477973938, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.122333288192749, + -0.017333349213004113, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.7066665887832642, + -0.257333368062973, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 1.787000060081482, + -2.698333263397217, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.9823334217071534, + -3.13700008392334, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 2.4596667289733888, + -3.0866665840148927, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 2.559333086013794, + -2.617333173751831, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 2.1436667442321779, + -2.377333402633667, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 2.0936667919158937, + -1.899999976158142, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 2.997999906539917, + -2.421999931335449, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 3.386333465576172, + -2.7039997577667238, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 3.8246665000915529, + -2.508999824523926, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 3.874999761581421, + -2.0316665172576906, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 3.486666679382324, + -1.749333143234253, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 3.047999858856201, + -1.9446665048599244, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 4.263333320617676, + -2.7039997577667238, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 4.584666728973389, + -2.3473331928253176, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 4.344666481018066, + -1.931666612625122, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 4.539666652679443, + -1.4933334589004517, + 0.0 + ] + }, + { + "label": "F", + "location": [ + 3.3359997272491457, + -3.181666612625122, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.5453333258628845, + -4.119666576385498, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.1293332427740097, + -4.359666347503662, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.9609997868537903, + -4.359666347503662, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.0109997987747193, + -4.8373332023620609, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 1.4806666374206544, + -4.936999797821045, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.720666527748108, + -4.52133321762085, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.3993332386016846, + -4.164666652679443, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.720666527748108, + -5.352666854858398, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 2.200666666030884, + -5.352666854858398, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 2.440666437149048, + -4.936999797821045, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 2.200666666030884, + -4.52133321762085, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.654333233833313, + -5.1583333015441898, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.7539998292922974, + -5.627999782562256, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 0.43299993872642519, + -5.984666347503662, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 0.6729997992515564, + -6.400332927703857, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.1423332691192628, + -6.300333023071289, + 0.0 + ] + }, + { + "label": "N", + "location": [ + 1.1926666498184205, + -5.822999954223633, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 1.499000072479248, + -6.621666431427002, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.33299991488456728, + -4.801666736602783, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 0.1846666932106018, + -5.258000373840332, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 1.37666654586792, + -3.639666795730591, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 2.9206666946411135, + -4.936999797821045, + 0.0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 3.1606667041778566, + -5.352666854858398, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 3.6406664848327638, + -5.352666854858398, + 0.0 + ] + }, + { + "label": "O", + "location": [ + 3.8806662559509279, + -4.936999797821045, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 3.6406664848327638, + -4.52133321762085, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 3.1606667041778566, + -4.52133321762085, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 3.6406664848327638, + -4.0413336753845219, + 0.0 + ] + }, + { + "label": "C", + "location": [ + 4.056333065032959, + -4.2813334465026859, + 0.0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 5, + 0 + ] + }, + { + "type": 2, + "atoms": [ + 4, + 6 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 7, + 8 + ] + }, + { + "type": 2, + "atoms": [ + 8, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 7, + 9 + ] + }, + { + "type": 2, + "atoms": [ + 9, + 10 + ] + }, + { + "type": 1, + "atoms": [ + 10, + 11 + ] + }, + { + "type": 2, + "atoms": [ + 11, + 12 + ] + }, + { + "type": 1, + "atoms": [ + 12, + 13 + ] + }, + { + "type": 2, + "atoms": [ + 13, + 14 + ] + }, + { + "type": 1, + "atoms": [ + 14, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 12, + 15 + ] + }, + { + "type": 1, + "atoms": [ + 13, + 16 + ] + }, + { + "type": 1, + "atoms": [ + 16, + 17 + ] + }, + { + "type": 1, + "atoms": [ + 17, + 18 + ] + }, + { + "type": 1, + "atoms": [ + 18, + 16 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 19 + ] + }, + { + "type": 1, + "atoms": [ + 19, + 20 + ] + }, + { + "type": 2, + "atoms": [ + 20, + 21 + ] + }, + { + "type": 1, + "atoms": [ + 21, + 22 + ] + }, + { + "type": 1, + "atoms": [ + 22, + 23 + ] + }, + { + "type": 1, + "atoms": [ + 23, + 19 + ] + }, + { + "type": 2, + "atoms": [ + 23, + 24 + ] + }, + { + "type": 1, + "atoms": [ + 22, + 25 + ] + }, + { + "type": 2, + "atoms": [ + 25, + 26 + ] + }, + { + "type": 1, + "atoms": [ + 26, + 27 + ] + }, + { + "type": 2, + "atoms": [ + 27, + 28 + ] + }, + { + "type": 1, + "atoms": [ + 28, + 29 + ] + }, + { + "type": 2, + "atoms": [ + 29, + 30 + ] + }, + { + "type": 1, + "atoms": [ + 30, + 25 + ] + }, + { + "type": 1, + "atoms": [ + 27, + 31 + ] + }, + { + "type": 2, + "atoms": [ + 31, + 32 + ] + }, + { + "type": 1, + "atoms": [ + 32, + 33 + ] + }, + { + "type": 1, + "atoms": [ + 33, + 28 + ] + }, + { + "type": 1, + "atoms": [ + 33, + 34 + ] + }, + { + "type": 1, + "atoms": [ + 26, + 35 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 36 + ] + }, + { + "type": 2, + "atoms": [ + 36, + 37 + ] + }, + { + "type": 1, + "atoms": [ + 36, + 38 + ] + }, + { + "type": 2, + "atoms": [ + 38, + 39 + ] + }, + { + "type": 1, + "atoms": [ + 39, + 40 + ] + }, + { + "type": 1, + "atoms": [ + 40, + 41 + ] + }, + { + "type": 2, + "atoms": [ + 41, + 42 + ] + }, + { + "type": 1, + "atoms": [ + 42, + 38 + ] + }, + { + "type": 1, + "atoms": [ + 40, + 43 + ] + }, + { + "type": 2, + "atoms": [ + 43, + 44 + ] + }, + { + "type": 1, + "atoms": [ + 44, + 45 + ] + }, + { + "type": 2, + "atoms": [ + 45, + 46 + ] + }, + { + "type": 1, + "atoms": [ + 46, + 41 + ] + }, + { + "type": 1, + "atoms": [ + 39, + 47 + ] + }, + { + "type": 1, + "atoms": [ + 47, + 48 + ] + }, + { + "type": 2, + "atoms": [ + 48, + 49 + ] + }, + { + "type": 1, + "atoms": [ + 49, + 50 + ] + }, + { + "type": 1, + "atoms": [ + 50, + 51 + ] + }, + { + "type": 1, + "atoms": [ + 51, + 52 + ] + }, + { + "type": 1, + "atoms": [ + 52, + 48 + ] + }, + { + "type": 2, + "atoms": [ + 51, + 53 + ] + }, + { + "type": 1, + "atoms": [ + 47, + 54 + ] + }, + { + "type": 1, + "atoms": [ + 54, + 55 + ] + }, + { + "type": 1, + "atoms": [ + 55, + 47 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 56 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 57, + 45 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 57, + 58 + ] + }, + { + "type": 1, + "atoms": [ + 58, + 59 + ] + }, + { + "type": 1, + "atoms": [ + 59, + 60 + ] + }, + { + "type": 1, + "atoms": [ + 60, + 61 + ] + }, + { + "type": 1, + "atoms": [ + 61, + 62 + ] + }, + { + "type": 1, + "atoms": [ + 62, + 57 + ] + }, + { + "type": 1, + "atoms": [ + 61, + 63 + ] + }, + { + "type": 1, + "atoms": [ + 61, + 64 + ] + } + ] + } +} marvin.cdx CDXML loader: Not a molecule. Found 1 arrows. *** Try as Reaction *** diff --git a/api/tests/integration/tests/formats/molecules/cdx/issue_2603.cdxml b/api/tests/integration/tests/formats/molecules/cdx/issue_2603.cdxml new file mode 100644 index 0000000000..afe61dcc88 --- /dev/null +++ b/api/tests/integration/tests/formats/molecules/cdx/issue_2603.cdxml @@ -0,0 +1,3423 @@ + + + + + + + + + + + + + +NNNFNNONNFONNONHOO \ No newline at end of file diff --git a/core/indigo-core/molecule/CDXCommons.h b/core/indigo-core/molecule/CDXCommons.h index 9f50c97b6c..2cf065425b 100644 --- a/core/indigo-core/molecule/CDXCommons.h +++ b/core/indigo-core/molecule/CDXCommons.h @@ -1266,7 +1266,24 @@ namespace indigo {kCDXBracketUsage_Anypolymer, "Anypolymer"}}; const std::vector kCIPStereochemistryIndexToChar = {'U', 'N', 'R', 'S', 'r', 's', 'u'}; - const std::unordered_map kCIPStereochemistryCharToIndex = {{'U', 0}, {'N', 1}, {'R', 2}, {'S', 3}, {'r', 4}, {'s', 5}, {'u', 6}}; + namespace CIPStereochemistry + { + enum + { + Undetermined = 0, + Symmetric, + Asymmetric_R, + Asymmetric_S, + Pseudoasymmetric_r, + Pseudoasymmetric_s, + Unspecified + }; + } + const std::unordered_map kCIPStereochemistryCharToIndex = { + {'U', CIPStereochemistry::Undetermined}, {'N', CIPStereochemistry::Symmetric}, {'R', CIPStereochemistry::Asymmetric_R}, + {'S', CIPStereochemistry::Asymmetric_S}, {'r', CIPStereochemistry::Pseudoasymmetric_r}, {'s', CIPStereochemistry::Pseudoasymmetric_s}, + {'u', CIPStereochemistry::Unspecified}, + }; const std::vector kCIPBondStereochemistryIndexToChar = {'U', 'N', 'E', 'Z'}; const std::unordered_map kCIPBondStereochemistryCharToIndex = {{'U', 0}, {'N', 1}, {'E', 2}, {'Z', 3}}; diff --git a/core/indigo-core/molecule/src/molecule_cdxml_loader.cpp b/core/indigo-core/molecule/src/molecule_cdxml_loader.cpp index 4815b8ae95..de93903899 100644 --- a/core/indigo-core/molecule/src/molecule_cdxml_loader.cpp +++ b/core/indigo-core/molecule/src/molecule_cdxml_loader.cpp @@ -1184,7 +1184,13 @@ void MoleculeCdxmlLoader::_parseNode(CdxmlNode& node, BaseCDXElement& elem) auto pos_lambda = [&node, this](const std::string& data) { this->parsePos(data, node.pos); }; - auto stereo_lambda = [&node](const std::string& data) { node.stereo = kCIPStereochemistryCharToIndex.at(data.front()); }; + auto stereo_lambda = [&node](const std::string& data) { + const auto it = kCIPStereochemistryCharToIndex.find(data.front()); + if (it != kCIPStereochemistryCharToIndex.end()) + node.stereo = it->second; + else + node.stereo = CIPStereochemistry::Undetermined; + }; auto node_type_lambda = [&node](const std::string& data) { node.type = KNodeTypeNameToInt.at(data);