From 0d938d9f3f6b4b0da41b7f4ad03a3200970b2344 Mon Sep 17 00:00:00 2001 From: Andrei Markin Date: Tue, 11 Jun 2024 13:14:14 +0400 Subject: [PATCH] [gaarf-py] Add support for remote writes for CSV and JSON writers Change-Id: I76c0b3bff5051dff1d314426b4361468ea188f99 --- py/gaarf/io/writers/csv_writer.py | 50 +++++++++++++++----- py/gaarf/io/writers/file_writer.py | 46 ++++++++++++++++++ py/gaarf/io/writers/json_writer.py | 36 +++++++------- py/tests/unit/io/writers/test_csv_writer.py | 13 +++++ py/tests/unit/io/writers/test_file_writer.py | 33 +++++++++++++ py/tests/unit/io/writers/test_json_writer.py | 13 +++++ 6 files changed, 163 insertions(+), 28 deletions(-) create mode 100644 py/gaarf/io/writers/file_writer.py create mode 100644 py/tests/unit/io/writers/test_file_writer.py diff --git a/py/gaarf/io/writers/csv_writer.py b/py/gaarf/io/writers/csv_writer.py index 7e7088b8..983658e6 100644 --- a/py/gaarf/io/writers/csv_writer.py +++ b/py/gaarf/io/writers/csv_writer.py @@ -20,22 +20,41 @@ import os from typing import Literal +import smart_open + from gaarf.io import formatter -from gaarf.io.writers.abs_writer import AbsWriter +from gaarf.io.writers import file_writer from gaarf.report import GaarfReport -class CsvWriter(AbsWriter): +class CsvWriter(file_writer.FileWriter): + """Writes Gaarf Report to CSV. + + Attributes: + destination_folder: Destination where CSV files are stored. + delimiter: CSV delimiter. + quotechar: CSV writer quotechar. + quoting: CSV writer quoting. + """ + def __init__( self, - destination_folder: str = os.getcwd(), + destination_folder: str | os.PathLike = os.getcwd(), delimiter: str = ',', quotechar: str = '"', quoting: Literal[0] = csv.QUOTE_MINIMAL, **kwargs, ) -> None: - super().__init__(**kwargs) - self.destination_folder = destination_folder + """Initializes CsvWriter based on a destination_folder. + + Args: + destination_folder: Destination where CSV files are stored. + delimiter: CSV delimiter. + quotechar: CSV writer quotechar. + quoting: CSV writer quoting. + kwargs: Optional keyword arguments to initialize writer. + """ + super().__init__(destination_folder=destination_folder, **kwargs) self.delimiter = delimiter self.quotechar = quotechar self.quoting = quoting @@ -47,13 +66,22 @@ def __str__(self): ) def write(self, report: GaarfReport, destination: str) -> str: + """Writes Gaarf report to a CSV file. + + Args: + report: Gaarf report. + destination: Base file name report should be written to. + + Returns: + Full path where data are written. + """ report = self.format_for_write(report) destination = formatter.format_extension(destination, new_extension='.csv') - if not os.path.isdir(self.destination_folder): - os.makedirs(self.destination_folder) + self.create_dir() logging.debug('Writing %d rows of data to %s', len(report), destination) - with open( - os.path.join(self.destination_folder, destination), + output_path = os.path.join(self.destination_folder, destination) + with smart_open.open( + output_path, encoding='utf-8', mode='w', ) as file: @@ -65,5 +93,5 @@ def write(self, report: GaarfReport, destination: str) -> str: ) writer.writerow(report.column_names) writer.writerows(report.results) - logging.debug('Writing to %s is completed', destination) - return f'[CSV] - at {destination}' + logging.debug('Writing to %s is completed', output_path) + return f'[CSV] - at {output_path}' diff --git a/py/gaarf/io/writers/file_writer.py b/py/gaarf/io/writers/file_writer.py new file mode 100644 index 00000000..e72a6366 --- /dev/null +++ b/py/gaarf/io/writers/file_writer.py @@ -0,0 +1,46 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Module for writing data to a file.""" + +import os + +from gaarf.io.writers.abs_writer import AbsWriter + + +class FileWriter(AbsWriter): + """Writes Gaarf Report to a local or remote file. + + Attributes: + destination_folder: Destination where output file is stored. + """ + + def __init__( + self, + destination_folder: str | os.PathLike = os.getcwd(), + **kwargs: str, + ) -> None: + """Initializes FileWriter based on destination folder.""" + super().__init__(**kwargs) + self.destination_folder = str(destination_folder) + + def create_dir(self) -> None: + """Creates folders if needed or destination is not remote.""" + if ( + not os.path.isdir(self.destination_folder) + and '://' not in self.destination_folder + ): + os.makedirs(self.destination_folder) + + def write(self) -> None: + return diff --git a/py/gaarf/io/writers/json_writer.py b/py/gaarf/io/writers/json_writer.py index 1f59267e..6551324d 100644 --- a/py/gaarf/io/writers/json_writer.py +++ b/py/gaarf/io/writers/json_writer.py @@ -19,45 +19,47 @@ import logging import os +import smart_open + import gaarf from gaarf.io import formatter -from gaarf.io.writers import abs_writer +from gaarf.io.writers import file_writer -class JsonWriter(abs_writer.AbsWriter): +class JsonWriter(file_writer.FileWriter): """Writes Gaarf Report to JSON. Attributes: - destination_folder: A local folder where JSON files are stored. + destination_folder: Destination where JSON files are stored. """ def __init__( - self, destination_folder: str = os.getcwd(), **kwargs: str + self, destination_folder: str | os.PathLike = os.getcwd(), **kwargs: str ) -> None: """Initializes JsonWriter based on a destination_folder. Args: - destination_folder: A local folder where JSON files are stored. - Returns: Description of return. + destination_folder: A local folder where JSON files are stored. + kwargs: Optional keyword arguments to initialize writer. """ - super().__init__(**kwargs) - self.destination_folder = destination_folder + super().__init__(destination_folder=destination_folder, **kwargs) def write(self, report: gaarf.report.GaarfReport, destination: str) -> str: """Writes Gaarf report to a JSON file. Args: - report: Gaarf report. - destination: Base file name report should be written to. + report: Gaarf report. + destination: Base file name report should be written to. + + Returns: + Base filename where data are written. """ report = self.format_for_write(report) destination = formatter.format_extension(destination, new_extension='.json') - if not os.path.isdir(self.destination_folder): - os.makedirs(self.destination_folder) + self.create_dir() logging.debug('Writing %d rows of data to %s', len(report), destination) - with open( - os.path.join(self.destination_folder, destination), 'w', encoding='utf-8' - ) as f: + output_path = os.path.join(self.destination_folder, destination) + with smart_open.open(output_path, 'w', encoding='utf-8') as f: json.dump(report.to_list(row_type='dict'), f) - logging.debug('Writing to %s is completed', destination) - return f'[JSON] - at {destination}' + logging.debug('Writing to %s is completed', output_path) + return f'[JSON] - at {output_path}' diff --git a/py/tests/unit/io/writers/test_csv_writer.py b/py/tests/unit/io/writers/test_csv_writer.py index c828c4be..fe2560d2 100644 --- a/py/tests/unit/io/writers/test_csv_writer.py +++ b/py/tests/unit/io/writers/test_csv_writer.py @@ -1,3 +1,16 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import annotations import pytest diff --git a/py/tests/unit/io/writers/test_file_writer.py b/py/tests/unit/io/writers/test_file_writer.py new file mode 100644 index 00000000..03f3dc95 --- /dev/null +++ b/py/tests/unit/io/writers/test_file_writer.py @@ -0,0 +1,33 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pathlib + +import pytest + +from gaarf.io.writers import file_writer + + +class TestFileWriter: + def test_create_dir_from_local_path_creates_folder(self, tmp_path): + destination_folder = tmp_path / 'destination_folder' + writer = file_writer.FileWriter(destination_folder=destination_folder) + writer.create_dir() + assert destination_folder.is_dir() + + def test_create_dir_from_remote_path_does_not_create_folder(self): + destination_folder = 'gs://fake-bucket' + writer = file_writer.FileWriter(destination_folder=destination_folder) + writer.create_dir() + expected_path = pathlib.Path(destination_folder) + assert not expected_path.is_dir() diff --git a/py/tests/unit/io/writers/test_json_writer.py b/py/tests/unit/io/writers/test_json_writer.py index e72e6d6d..b4e5b281 100644 --- a/py/tests/unit/io/writers/test_json_writer.py +++ b/py/tests/unit/io/writers/test_json_writer.py @@ -1,3 +1,16 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import annotations import json