Prep for v1.0.7 (#473)

monarch-initiative · Nov 8, 2024 · 50127e5 · 50127e5
2 parents ce5c592 + eb4aa76
commit 50127e5
Show file tree

Hide file tree

Showing 8 changed files with 1,654 additions and 1,178 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "ontogpt"
-version = "1.0.6"
+version = "1.0.7"
 description = "OntoGPT"
 authors = ["Chris Mungall <[email protected]>", "J. Harry Caufield <[email protected]>"]
 license = "BSD-3"

diff --git a/src/ontogpt/engines/generic_engine.py b/src/ontogpt/engines/generic_engine.py
@@ -1,4 +1,29 @@
-"""Generic LLM interface engine."""
+"""Generic LLM interface engine.
+
+This module defines a generic engine for interfacing with
+a large language model (LLM) to answer questions based on
+provided templates and instructions.
+
+Classes:
+    Question: A Pydantic model representing a question
+    with various attributes.
+    Instruction: A Pydantic model representing an instruction
+    with a name, text, and template.
+    QuestionCollection: A Pydantic model representing a
+    collection of questions and instructions.
+    GenericEngine: A dataclass inheriting from KnowledgeEngine,
+    responsible for running the engine to process questions and
+    generate answers.
+
+Constants:
+    MAX_TOKENS: The maximum number of tokens to be generated by the LLM.
+
+Methods:
+    GenericEngine.run: Processes a collection of questions using
+    provided templates and instructions, and yields questions with
+    generated answers.
+
+"""
 
 import logging
 from copy import deepcopy

diff --git a/src/ontogpt/engines/knowledge_engine.py b/src/ontogpt/engines/knowledge_engine.py
@@ -1,4 +1,73 @@
-"""Main Knowledge Extractor class."""
+"""Main Knowledge Extractor class.
+
+This module defines the KnowledgeEngine class, an
+abstract base class for extracting knowledge from
+text using knowledge sources and language models.
+It includes methods for chunking text, extracting
+annotations from text and files, loading dictionaries,
+synthesizing and generalizing extraction results,
+mapping terms, and normalizing named entities.
+
+Classes:
+    KnowledgeEngine: Abstract base class for all knowledge engines.
+
+Functions:
+    chunk_text_by_sentence(text: str, window_size=3) -> Iterator[str]:
+        Chunk text into windows of sentences.
+
+    chunk_text_by_char(text: str, window_size=1000) -> Iterator[str]:
+        Chunk text into windows of characters.
+
+    set_api_key(key: str):
+        Set the API key for accessing external language model.
+
+    extract_from_text(text: str, cls: ClassDefinition = None,
+        object: OBJECT = None) -> ExtractionResult:
+        Abstract method to extract knowledge from text.
+
+    extract_from_file(file: Union[str, Path, TextIO]) -> pydantic.BaseModel:
+        Extract annotations from the given text file.
+
+    load_dictionary(path: Union[str, Path, list]):
+        Load a dictionary from a given path or list.
+
+    synthesize(cls: ClassDefinition = None, object: OBJECT = None) -> ExtractionResult:
+        Abstract method to synthesize extraction results.
+
+    generalize(object: Union[pydantic.BaseModel, dict],
+        examples: List[EXAMPLE], show_prompt: bool) -> ExtractionResult:
+        Abstract method to generalize extraction results.
+
+    map_terms(terms: List[str], ontology: str, show_prompt: bool) -> Dict[str, str]:
+        Abstract method to map terms to ontology.
+
+    _get_openai_api_key():
+        Get the OpenAI API key from the environment.
+
+    get_annotators(cls: ClassDefinition = None) -> List[BasicOntologyInterface]:
+
+    promptable_slots(cls: Optional[ClassDefinition] = None) -> List[SlotDefinition]:
+
+    slot_is_skipped(slot: SlotDefinition) -> bool:
+        Check if a slot is skipped for prompting.
+
+    normalize_named_entity(text: str, range: ElementName) -> str:
+        Ground and normalize named entities to preferred ID prefixes.
+
+    is_valid_identifier(input_id: str, cls: ClassDefinition) -> bool:
+        Check if an identifier is valid for a given class.
+
+    normalize_identifier(input_id: str, cls: ClassDefinition) -> Iterator[str]:
+
+    map_identifier(input_id: str, cls: ClassDefinition) -> Iterator[str]:
+        Map an identifier to a preferred prefix.
+
+    groundings(text: str, cls: ClassDefinition) -> Iterator[str]:
+
+    merge_resultsets(resultset: List[ExtractionResult],
+        unique_fields: List[str]) -> ExtractionResult:
+
+"""
 
 import logging
 import re

diff --git a/src/ontogpt/engines/spires_engine.py b/src/ontogpt/engines/spires_engine.py
@@ -1,5 +1,5 @@
 """
-Main Knowledge Extractor class.
+Main SPIRES Knowledge Extractor class.
 
 This works by recursively constructing structured prompt-completions where
 a pseudo-YAML structure is requested, where the YAML

diff --git a/src/ontogpt/ontex/extractor.py b/src/ontogpt/ontex/extractor.py
@@ -1,4 +1,68 @@
-"""Tools to extract sub-ontologies and reasoner tasks."""
+"""Tools to extract sub-ontologies and reasoner tasks.
+
+This module provides classes and functions to extract sub-ontologies and reasoner tasks from a
+given ontology. These tasks can be used to query or evaluate the ontology using a ReasonerEngine
+object.
+
+Classes:
+    Axiom: Represents an individual logical axiom.
+    Ontology: Represents a collection of axioms.
+    Query: Represents a query.
+    Explanation: Represents a set of axioms that entail some explained axiom.
+    Answer: Represents an individual answer to a query.
+    ObjectAnswer: Represents an answer that is an object, e.g., class.
+    ClassAnswer: Represents an answer that is an OWL class.
+    InstanceAnswer: Represents an answer that is an OWL individual.
+    BooleanAnswer: Represents an answer that is a boolean, e.g., true or false.
+    AxiomAnswer: Represents an answer that is an axiom.
+    ExampleQueryAnswers: Represents an example query plus all expected answers.
+    Example: Represents an example of a query plus answers in the context of an ontology.
+    LLMReasonMethodType: Enum representing different reasoning method types.
+    Task: Represents a task which is a query on an ontology that has a set of defined answers.
+    OntologyCoherencyTask: Represents a task to determine if an ontology is coherent.
+    EntailedIndirectSuperClassTask: Represents a task to determine the indirect superclasses of a
+        class.
+    EntailedTransitiveSuperClassTask: Represents a task to determine all transitive superclasses of
+        a class.
+    EntailedSubClassOfExpressionTask: Represents a task to determine the subclasses of a class
+        expression.
+    EntailedDirectSuperClassTask: Represents a task to determine the direct superclasses of a class.
+    MostRecentCommonSubsumerTask: Represents a task to determine the most specific common ancestors.
+    TaxonConstraintTask: Represents a task to determine inapplicable classes by taxon.
+    ABoxTask: Represents a task to infer assertions over property chains and transitivity in
+        aboxes.
+    TaskCollection: Represents a collection of tasks.
+    OntologyExtractor: Extracts Task objects from an ontology.
+
+Functions:
+    TaskCollection.load(file_or_object): Loads a TaskCollection from a file or object.
+    OntologyExtractor.create_task(task_type, parameters, **kwargs): Creates a task of the specified
+        type.
+    OntologyExtractor.create_random_tasks(num_tasks_per_type, methods, abox): Creates random tasks.
+    OntologyExtractor.extract_ontology(terms, roots, predicates, include_abox): Extracts an ontology
+        module.
+    OntologyExtractor.extract_indirect_superclasses_task(subclass, siblings, roots, predicates,
+        select_random, **kwargs): Extracts a task for finding all indirect superclasses of a class.
+    OntologyExtractor.extract_transitive_superclasses_task(subclass, siblings, roots, predicates,
+        select_random, **kwargs): Extracts a task for finding all
+        transitive superclasses of a class.
+    OntologyExtractor.extract_abox_task(subject, siblings, predicate, select_random, **kwargs):
+        Extracts a task for finding all entailed edges.
+    OntologyExtractor.extract_most_recent_common_subsumers_task(subclass1, subclass2, siblings,
+        roots, predicates, select_random, **kwargs): Extracts a task for finding all MRCAs of a pair
+        of classes.
+    OntologyExtractor.extract_subclass_of_expression_task(superclass, predicate, siblings,
+        predicates, select_random, **kwargs): Extracts a task for finding all subclasses of a class
+        expression.
+    OntologyExtractor.extract_incoherent_ontology_task(incoherents, siblings, disjoints,
+        spiked_relationships, roots, select_random, **kwargs): Extracts a task for testing ability
+        to find incoherencies based on disjointness axioms.
+    OntologyExtractor.extract_taxon_constraint_task(term, taxon, siblings, never_in, select_random,
+        **kwargs): Extracts a task for evaluating taxon constraints.
+    OntologyExtractor._axiom(rel, tbox): Creates an axiom from a relationship.
+    OntologyExtractor._name(curie): Returns the name of a CURIE.
+    OntologyExtractor.extract_rbox(): Extracts RBox axioms.
+"""
 
 import base64
 import logging