Skip to content

Commit

Permalink
#1282 SMARTS load/save errors in Indigo. (#1284)
Browse files Browse the repository at this point in the history
Co-authored-by: Aliaksandr Dziarkach <[email protected]>
  • Loading branch information
AliaksandrDziarkach and Aliaksandr Dziarkach authored Sep 22, 2023
1 parent 67d5cf1 commit 229868c
Show file tree
Hide file tree
Showing 16 changed files with 200 additions and 44 deletions.
8 changes: 4 additions & 4 deletions api/c/tests/unit/tests/formats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,14 @@ TEST_F(IndigoApiFormatsTest, molecule)
EXPECT_EQ(7, indigoCountBonds(obj));

// 3
const string expectedSmarts = "[#6;A]1-[#6;A]=[#6;A]-[#6;A]=[#6;A]-[#6;A]=1-[!#1]";
const string expectedSmarts = "[C]1-[C]=[C]-[C]=[C]-[C]=1-[*]";
obj = indigoLoadStructureFromString(mStr.c_str(), "smarts");
EXPECT_STREQ(expectedSmarts.c_str(), indigoSmarts(obj));
EXPECT_EQ(7, indigoCountAtoms(obj));
EXPECT_EQ(7, indigoCountBonds(obj));

// 4
const string expectedQuery = "[#6]1-[#6]=[#6]-[#6]=[#6]-[#6]=1-[!#1]";
const string expectedQuery = "[#6]1-[#6]=[#6]-[#6]=[#6]-[#6]=1-[*]";
obj = indigoLoadStructureFromString(mStr.c_str(), "query");
EXPECT_STREQ(expectedQuery.c_str(), indigoSmarts(obj));
EXPECT_EQ(7, indigoCountAtoms(obj));
Expand All @@ -83,13 +83,13 @@ TEST_F(IndigoApiFormatsTest, reaction)
{
const string react = "C1=C(*)C=CC=C1>>C1=CC=CC(*)=C1";
const string expected = "C1C=CC=CC=1*>>C1C=C(*)C=CC=1";
const string expected_smarts = "[C]1-[C]=[C]-[C]=[C]-[C]=1-[*]>>[C]1-[C]=[C](-[*])-[C]=[C]-[C]=1";

try
{
int obj = -1;
obj = indigoLoadStructureFromString(react.c_str(), "smarts");
EXPECT_STREQ(expected.c_str(), indigoSmiles(obj));
EXPECT_STREQ(expected.c_str(), indigoCanonicalSmiles(obj));
EXPECT_STREQ(expected_smarts.c_str(), indigoSmarts(obj));
EXPECT_EQ(1, indigoCountReactants(obj));
EXPECT_EQ(1, indigoCountProducts(obj));
EXPECT_EQ(2, indigoCountMolecules(obj));
Expand Down
6 changes: 5 additions & 1 deletion api/tests/integration/common/rendering/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import os
import platform
import sys
Expand Down Expand Up @@ -204,14 +205,17 @@ def checkBitmapSimilarity(filename, ref_filename):
return "%s rendering status: Problem: %s" % (filename, str(e))

channels = ["red", "green", "blue", "alpha"]
with open("%s/out/%s" % (dirname, filename), "rb") as file:
binary_data = file.read()
for i, result in enumerate(results):
if result > (HASH_SIZE**2) * 0.1:
return (
"%s rendering status: Problem: PNG similarity is %s for %s channel"
"%s rendering status: Problem: PNG similarity is %s for %s channel\n%s\n"
% (
filename,
round(1 - (result / float(HASH_SIZE**2)), 2),
channels[i],
base64.b64encode(binary_data),
)
)

Expand Down
2 changes: 1 addition & 1 deletion api/tests/integration/ref/basic/load_structure.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ C1C=CC=CC=1*>>C1C=C(*)C=CC=1
C1C=CC=CC=1* |$;;;;;;A$|

(** #8 **): Smarts, (smarts parameter):
[#8;A;H]-[#6;a]1-,:[#6;a]-,:[#6;a]-,:[#6;a]-,:[#6;a]-,:[#6;a]-,:1
[O;H]-[c]1-,:[c]-,:[c]-,:[c]-,:[c]-,:[c]-,:1

(** #9 **): Smarts, (query parameter):
[OH]C1:C:C:C:C:C:1
Expand Down
17 changes: 17 additions & 0 deletions api/tests/integration/ref/formats/smarts.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,20 @@ CC[C+5]CCCCC
**** Load and Save as Query with component-level grouping ****
([#8].[#6]) is ok. smarts_in==smarts_out
([#8].[#6]).([#8].[#6]) is ok. smarts_in==smarts_out
[!C;!b] is ok. smarts_in==smarts_out
[*] is ok. smarts_in==smarts_out
[*;R1] is ok. smarts_in==smarts_out
[*;R3] is ok. smarts_in==smarts_out
[r] is ok. smarts_in==smarts_out
[r0] is ok. smarts_in==smarts_out
[r1] is ok. smarts_in==smarts_out
[r3] is ok. smarts_in==smarts_out
[v] is ok. smarts_in==smarts_out
[v0] is ok. smarts_in==smarts_out
[v3] is ok. smarts_in==smarts_out
[+0] is ok. smarts_in==smarts_out
[#6]@[#6] is ok. smarts_in==smarts_out
[#9]/[#6] is ok. smarts_in==smarts_out
[#9]/[#6]=[C]/[#17] is ok. smarts_in==smarts_out
[O;H] is ok. smarts_in==smarts_out
[!O;H] is ok. smarts_in==smarts_out
2 changes: 2 additions & 0 deletions api/tests/integration/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,8 @@ def run_analyze_test(args):
f.close()
with stdout_lock:
sys.__stdout__.write(out_message + "\n")
if test_status == "[FAILED]":
sys.__stdout__.write(msg + "\n")
test_result = (root, filename, test_status, msg, tspent)
return test_result

Expand Down
23 changes: 21 additions & 2 deletions api/tests/integration/tests/formats/smarts.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/bin/env python3
import os
import sys

Expand All @@ -23,8 +24,8 @@ def test_smarts_load_save(smarts_in):
print("%s is ok. smarts_in==smarts_out" % smarts_in)
else:
print("smarts_in!=smarts_out")
print(" smarts_in=%s", smarts_in)
print("smarts_out=%s", smarts_out)
print(" smarts_in=%s" % smarts_in)
print("smarts_out=%s" % smarts_out)


molstr = """
Expand Down Expand Up @@ -96,3 +97,21 @@ def test_smarts_load_save(smarts_in):
print("**** Load and Save as Query with component-level grouping ****")
test_smarts_load_save("([#8].[#6])")
test_smarts_load_save("([#8].[#6]).([#8].[#6])")

test_smarts_load_save("[!C;!b]")
test_smarts_load_save("[*]")
test_smarts_load_save("[*;R1]")
test_smarts_load_save("[*;R3]")
test_smarts_load_save("[r]")
test_smarts_load_save("[r0]")
test_smarts_load_save("[r1]")
test_smarts_load_save("[r3]")
test_smarts_load_save("[v]")
test_smarts_load_save("[v0]")
test_smarts_load_save("[v3]")
test_smarts_load_save("[+0]")
test_smarts_load_save("[#6]@[#6]")
test_smarts_load_save("[#9]/[#6]")
test_smarts_load_save("[#9]/[#6]=[C]/[#17]")
test_smarts_load_save("[O;H]")
test_smarts_load_save("[!O;H]")
Binary file modified api/tests/integration/tests/rendering/ref/linux/smarts/0190.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified api/tests/integration/tests/rendering/ref/mac/smarts/0190.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified api/tests/integration/tests/rendering/ref/win/smarts/0190.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
17 changes: 8 additions & 9 deletions core/indigo-core/common/base_cpp/output.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ namespace indigo

virtual void write(const void* data, int size) = 0;
virtual void flush() = 0;
virtual long long tell() const noexcept
{
return 0;
}

virtual void writeByte(byte value);

Expand All @@ -63,18 +67,13 @@ namespace indigo
void printfCR(const char* format, ...);
};

class DLLEXPORT OutputTell
{
virtual long long tell() const noexcept = 0;
};

class DLLEXPORT OutputSeek
{
virtual void seek(long long offset, int from) = 0;
void skip(int count);
};

class DLLEXPORT FileOutput : public Output, public OutputSeek, public OutputTell
class DLLEXPORT FileOutput : public Output, public OutputSeek
{
public:
FileOutput(Encoding filename_encoding, const char* filename);
Expand All @@ -96,7 +95,7 @@ namespace indigo
FILE* _file;
};

class DLLEXPORT ArrayOutput : public Output, public OutputTell
class DLLEXPORT ArrayOutput : public Output
{
public:
explicit ArrayOutput(Array<char>& arr);
Expand All @@ -111,7 +110,7 @@ namespace indigo
Array<char>& _arr;
};

class DLLEXPORT StringOutput : public Output, public OutputTell
class DLLEXPORT StringOutput : public Output
{
public:
StringOutput() = delete;
Expand All @@ -127,7 +126,7 @@ namespace indigo
std::string& _str;
};

class DLLEXPORT StandardOutput : public Output, public OutputTell
class DLLEXPORT StandardOutput : public Output
{
public:
explicit StandardOutput();
Expand Down
2 changes: 1 addition & 1 deletion core/indigo-core/common/gzip/gzip_output.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
namespace indigo
{

class GZipOutput : public Output, OutputTell
class GZipOutput : public Output
{
public:
enum
Expand Down
4 changes: 3 additions & 1 deletion core/indigo-core/molecule/base_molecule.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@ namespace indigo
_BOND_DOUBLE_OR_AROMATIC = 7,
_BOND_ANY = 8,
_BOND_COORDINATION = 9,
_BOND_HYDROGEN = 10
_BOND_HYDROGEN = 10,
BOND_SMARTS_UP = 11,
BOND_SMARTS_DOWN = 12,
};

enum
Expand Down
5 changes: 5 additions & 0 deletions core/indigo-core/molecule/query_molecule.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ namespace indigo
ATOM_TEMPLATE_SEQID,
ATOM_TEMPLATE_CLASS,
ATOM_PI_BONDED,
ATOM_CHILARITY,

BOND_ORDER,
BOND_TOPOLOGY,
Expand All @@ -117,6 +118,8 @@ namespace indigo
// otherwise: no children
PtrArray<Node> children;

bool artificial; // if true - added by parser to comply restrictions

// Check if node has any constraint of the specific type
bool hasConstraint(int what_type);

Expand All @@ -137,6 +140,8 @@ namespace indigo
bool sureValueBelongs(int what_type, const int* arr, int count);
bool sureValueBelongsInv(int what_type, const int* arr, int count);

bool hasOP_OR();

// Optimize query for faster substructure search
void optimize();

Expand Down
25 changes: 24 additions & 1 deletion core/indigo-core/molecule/src/query_molecule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ bool QueryMolecule::isSaturatedAtom(int idx)
throw Error("not implemented");
}

QueryMolecule::Node::Node(int type_)
QueryMolecule::Node::Node(int type_) : artificial(false)
{
type = (OpType)type_;
}
Expand Down Expand Up @@ -1186,6 +1186,29 @@ bool QueryMolecule::Node::sureValueBelongs(int what_type, const int* arr, int co
}
}

bool QueryMolecule::Node::hasOP_OR()
{
int i;

switch (type)
{
case OP_AND: {
for (i = 0; i < children.size(); i++)
if (children[i]->hasOP_OR())
return true;

return false;
}
case OP_OR: {
return true;
}
case OP_NOT:
return false;
default:
return false;
}
}

QueryMolecule::Atom* QueryMolecule::Atom::sureConstraint(int what_type)
{
int count = 0;
Expand Down
17 changes: 13 additions & 4 deletions core/indigo-core/molecule/src/smiles_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2148,8 +2148,9 @@ void SmilesLoader::_forbidHydrogens()
std::unique_ptr<QueryMolecule::Atom> newatom;
std::unique_ptr<QueryMolecule::Atom> oldatom(_qmol->releaseAtom(i));

newatom.reset(
QueryMolecule::Atom::und(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_H)), oldatom.release()));
std::unique_ptr<QueryMolecule::Atom> notH(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_H)));
notH->artificial = true;
newatom.reset(QueryMolecule::Atom::und(notH.release(), oldatom.release()));

_qmol->resetAtom(i, newatom.release());
}
Expand Down Expand Up @@ -2565,15 +2566,21 @@ void SmilesLoader::_readBondSub(Array<char>& bond_str, _BondDesc& bond, std::uni
else if (next == '/')
{
scanner.skip(1);
order = BOND_SINGLE;
if (smarts_mode)
order = BOND_SMARTS_UP;
else
order = BOND_SINGLE;
if (bond.dir == 2)
throw Error("Specificiation of both cis- and trans- bond restriction is not supported yet.");
bond.dir = 1;
}
else if (next == '\\')
{
scanner.skip(1);
order = BOND_SINGLE;
if (smarts_mode)
order = BOND_SMARTS_DOWN;
else
order = BOND_SINGLE;
if (bond.dir == 1)
throw Error("Specificiation of both cis- and trans- bond restriction is not supported yet.");
bond.dir = 2;
Expand Down Expand Up @@ -3301,6 +3308,8 @@ void SmilesLoader::_readAtom(Array<char>& atom_str, bool first_in_brackets, _Ato

if (isdigit(scanner.lookNext()))
subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_SMALLEST_RING_SIZE, scanner.readUnsigned());
else if (smarts_mode)
subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_SMALLEST_RING_SIZE, 1, 100);
else
subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_RING_BONDS, 1, 100);
}
Expand Down
Loading

0 comments on commit 229868c

Please sign in to comment.