From cce000c6f153853e4147b6ac59735913f8993a98 Mon Sep 17 00:00:00 2001 From: Matt Thompson Date: Fri, 21 Jun 2024 15:39:19 -0500 Subject: [PATCH] Raise error when encountering empty InChi string (#1898) * Raise error when encountering empty InChi string * Fix test regex, lint/merge --- .pre-commit-config.yaml | 4 +-- openff/toolkit/_tests/test_toolkits.py | 21 +++++++++++++++ openff/toolkit/utils/exceptions.py | 8 ++++++ openff/toolkit/utils/openeye_wrapper.py | 6 +++++ openff/toolkit/utils/rdkit_wrapper.py | 5 ++++ .../custom_plugins/handler_plugins.py | 26 ++++++++++++------- 6 files changed, 59 insertions(+), 11 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1026cb6ba..edea9b7d6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ ci: files: ^openff|(^examples/((?!deprecated).)*$)|^docs|(^utilities/((?!deprecated).)*$) repos: - repo: https://github.com/psf/black - rev: 24.3.0 + rev: 24.4.2 hooks: - id: black - id: black-jupyter @@ -12,7 +12,7 @@ repos: hooks: - id: isort - repo: https://github.com/PyCQA/flake8 - rev: 7.0.0 + rev: 7.1.0 hooks: - id: flake8 files: ^openff|(^utilities/((?!deprecated).)*$) diff --git a/openff/toolkit/_tests/test_toolkits.py b/openff/toolkit/_tests/test_toolkits.py index a01b38eb3..c823c627b 100644 --- a/openff/toolkit/_tests/test_toolkits.py +++ b/openff/toolkit/_tests/test_toolkits.py @@ -33,6 +33,7 @@ ChargeMethodUnavailableError, ChemicalEnvironmentParsingError, ConformerGenerationError, + EmptyInChiError, InChIParseError, IncorrectNumConformersError, IncorrectNumConformersWarning, @@ -842,6 +843,16 @@ def test_from_bad_inchi(self): with pytest.raises(InChIParseError, match="ksbfksfksfksbfks"): Molecule.from_inchi(inchi, toolkit_registry=toolkit) + def test_empty_inchi(self): + """Reproduce Issue #1897""" + with pytest.raises( + EmptyInChiError, + match="failed to generate", + ): + Molecule.from_mapped_smiles("[H:5][S:3]#[N+:2][S:1][H:4]").to_inchi( + toolkit_registry=OpenEyeToolkitWrapper() + ) + @pytest.mark.parametrize( "molecule", get_mini_drug_bank( @@ -2321,6 +2332,16 @@ def test_non_standard_inchi_round_trip(self, molecule): mol2, bond_order_matching=False, toolkit_registry=toolkit ) + def test_empty_inchi(self): + """Reproduce Issue #1897""" + with pytest.raises( + EmptyInChiError, + match="failed to generate", + ): + Molecule.from_mapped_smiles("[H:5][S:3]#[N+:2][S:1][H:4]").to_inchi( + toolkit_registry=RDKitToolkitWrapper(), + ) + def test_smiles_charged(self): """Test RDKitWrapper functions for reading/writing charged SMILES""" toolkit_wrapper = RDKitToolkitWrapper() diff --git a/openff/toolkit/utils/exceptions.py b/openff/toolkit/utils/exceptions.py index 800acdf05..1dd628741 100644 --- a/openff/toolkit/utils/exceptions.py +++ b/openff/toolkit/utils/exceptions.py @@ -135,6 +135,14 @@ class SmilesParsingError(OpenFFToolkitException): """ +class EmptyInChiError(OpenFFToolkitException): + """ + This error is raised when a toolkit returns an empty InChi string, possibly due to bugs + in the underlying InChi code. For more context, see + https://github.com/openforcefield/openff-toolkit/issues/1897 + """ + + class NotAttachedToMoleculeError(OpenFFToolkitException): """Exception for when a component does not belong to a Molecule object, but is queried""" diff --git a/openff/toolkit/utils/openeye_wrapper.py b/openff/toolkit/utils/openeye_wrapper.py index 58ab2904c..06a91a110 100644 --- a/openff/toolkit/utils/openeye_wrapper.py +++ b/openff/toolkit/utils/openeye_wrapper.py @@ -39,6 +39,7 @@ ChargeCalculationError, ChargeMethodUnavailableError, ConformerGenerationError, + EmptyInChiError, GAFFAtomTypeWarning, InChIParseError, InconsistentStereochemistryError, @@ -1916,6 +1917,11 @@ def to_inchi(self, molecule: "Molecule", fixed_hydrogens: bool = False) -> str: else: inchi = oechem.OEMolToSTDInChI(oemol) + if len(inchi) == 0: + raise EmptyInChiError( + "OEChem failed to generate an InChI for the molecule." + ) + return inchi def to_inchikey(self, molecule: "Molecule", fixed_hydrogens: bool = False) -> str: diff --git a/openff/toolkit/utils/rdkit_wrapper.py b/openff/toolkit/utils/rdkit_wrapper.py index bf9bef15e..e1bd39a18 100644 --- a/openff/toolkit/utils/rdkit_wrapper.py +++ b/openff/toolkit/utils/rdkit_wrapper.py @@ -30,6 +30,7 @@ AmbiguousBondChemicalAssignment, ChargeMethodUnavailableError, ConformerGenerationError, + EmptyInChiError, InChIParseError, InvalidAromaticityModelError, MoleculeParseError, @@ -2769,6 +2770,10 @@ def to_inchi(self, molecule: "Molecule", fixed_hydrogens: bool = False): inchi = Chem.MolToInchi(rdmol, options="-FixedH") else: inchi = Chem.MolToInchi(rdmol) + + if len(inchi) == 0: + raise EmptyInChiError("RDKit failed to generate an InChI for the molecule.") + return inchi def to_inchikey(self, molecule: "Molecule", fixed_hydrogens: bool = False) -> str: diff --git a/utilities/test_plugins/custom_plugins/handler_plugins.py b/utilities/test_plugins/custom_plugins/handler_plugins.py index 25f7e1b9d..23629ce37 100644 --- a/utilities/test_plugins/custom_plugins/handler_plugins.py +++ b/utilities/test_plugins/custom_plugins/handler_plugins.py @@ -1,7 +1,12 @@ # noqa: INP001 -from openff.toolkit.typing.engines.smirnoff import ParameterHandler, ParameterIOHandler -from openff.toolkit.typing.engines.smirnoff.parameters import ParameterAttribute, ParameterType, _NonbondedHandler from openff.toolkit import unit +from openff.toolkit.typing.engines.smirnoff import ParameterHandler, ParameterIOHandler +from openff.toolkit.typing.engines.smirnoff.parameters import ( + ParameterAttribute, + ParameterType, + _NonbondedHandler, +) + class CustomHandler(ParameterHandler): _TAGNAME = "CustomHandler" @@ -14,6 +19,7 @@ class WrongSubclass(list): class CustomIOHandler(ParameterIOHandler): _FORMAT = "JSON" + class FOOBuckinghamHandler(_NonbondedHandler): """A custom parameter handler for buckingham interactions.""" @@ -23,18 +29,20 @@ class FOOBuckinghamType(ParameterType): _ELEMENT_NAME = "Atom" # Define unit as a Unit object - a = ParameterAttribute(default=None, - unit=unit.kilojoule_per_mole, - ) + a = ParameterAttribute( + default=None, + unit=unit.kilojoule_per_mole, + ) # Define using a string - b = ParameterAttribute(default=None, - unit="nanometer**-1", - ) + b = ParameterAttribute( + default=None, + unit="nanometer**-1", + ) c = ParameterAttribute( default=None, - unit=unit.kilojoule_per_mole * unit.nanometer ** 6, + unit=unit.kilojoule_per_mole * unit.nanometer**6, ) _TAGNAME = "FOOBuckingham"