monarch-initiative · caufieldjh · Aug 5, 2024 · Aug 3, 2024 · Aug 5, 2024 · Aug 5, 2024
diff --git a/src/ontogpt/cli.py b/src/ontogpt/cli.py
@@ -51,6 +51,7 @@
 
 from ontogpt.io.owl_exporter import OWLExporter
 from ontogpt.io.rdf_exporter import RDFExporter
+from ontogpt.io.csv_exporter import CSVExporter
 from ontogpt.io.json_wrapper import dump_minimal_json
 from ontogpt.io.yaml_wrapper import dump_minimal_yaml
 from ontogpt.templates.core import ExtractionResult
@@ -84,7 +85,7 @@ def write_extraction(
     """Write results of extraction to a given output stream."""
     # Check if this result contains anything writable first
     if results.extracted_object:
-        exporter: Union[MarkdownExporter, HTMLExporter, RDFExporter, OWLExporter]
+        exporter: Union[CSVExporter, MarkdownExporter, HTMLExporter, RDFExporter, OWLExporter]
 
         if cut_input_text:
             truncate_len = 1000
@@ -115,6 +116,12 @@ def write_extraction(
             exporter.export(results, output, knowledge_engine.schemaview)
         elif output_format == "json":
             output.write(dump_minimal_json(results))  # type: ignore
+        elif output_format == "csv":
+            exporter = CSVExporter()
+            exporter.export(results, output, knowledge_engine.schemaview)
+        elif output_format == "tsv":
+            exporter = CSVExporter(sep="\t")
+            exporter.export(results, output, knowledge_engine.schemaview)
         elif output_format == "kgx":
             # TODO: enable passing name without extension,
             # since there will be multiple output files
@@ -187,7 +194,9 @@ def write_extraction(
 output_format_options = click.option(
     "-O",
     "--output-format",
-    type=click.Choice(["json", "yaml", "pickle", "md", "html", "owl", "turtle", "jsonl", "kgx"]),
+    type=click.Choice(
+        ["json", "yaml", "pickle", "md", "html", "owl", "turtle", "jsonl", "kgx", "csv", "tsv"]
+    ),
     default="yaml",
     help="Output format.",
 )

diff --git a/src/ontogpt/io/csv_exporter.py b/src/ontogpt/io/csv_exporter.py
@@ -0,0 +1,37 @@
+"""CSV exporter class."""
+
+from dataclasses import dataclass
+from io import BytesIO
+from pathlib import Path
+from typing import Optional, TextIO, Union
+from ontogpt.io.exporter import Exporter
+from io import BytesIO, StringIO, TextIOWrapper
+import pandas as pd
+
+from linkml_runtime import SchemaView
+
+from ontogpt.templates.core import ExtractionResult
+
+
+@dataclass
+class CSVExporter(Exporter):
+
+    sep: str = ","
+
+    def export(
+        self,
+        extraction_output: ExtractionResult,
+        output: Union[str, Path, TextIO, BytesIO],
+        schemaview: Optional[SchemaView],
+    ):
+        if isinstance(output, Path):
+            output = open(str(output), "w", encoding="utf-8")
+        if isinstance(output, str):
+            output = StringIO(output)
+        if isinstance(output, BytesIO):
+            output = TextIOWrapper(output, encoding="utf-8")
+        out_dict = extraction_output.extracted_object.dict()
+        df = pd.DataFrame.from_dict(out_dict, orient="index")
+        df.columns = ["values"]
+        df = df.explode("values")
+        df.to_csv(path_or_buf=output, sep=self.sep, header=True)