From 4eefc65f922f13b4a9c6fb11262c6e364f4138b0 Mon Sep 17 00:00:00 2001 From: Sangjoon Bob Lee Date: Sun, 27 Oct 2024 16:09:36 -0400 Subject: [PATCH] Apply docstrings --- README.md | 2 +- src/cifkit/data/radius_handler.py | 4 +- src/cifkit/models/cif.py | 85 +++++++++++++++- src/cifkit/models/cif_ensemble.py | 159 +++++++++++++++++++++++++++--- 4 files changed, 232 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index a4b0bb0..6343df9 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # cifkit -![Integration Tests](https://github.com/bobleesj/cifkit/actions/workflows/python-run-pytest.yml/badge.svg) +[![CI](https://github.com/bobleesj/cifkit/actions/workflows/tests-on-pr.yml/badge.svg?branch=main)](https://github.com/bobleesj/cifkit/actions/workflows/tests-on-pr.yml) [![codecov](https://codecov.io/gh/bobleesj/cifkit/graph/badge.svg?token=AN2YAC337A)](https://codecov.io/gh/bobleesj/cifkit) ![Python - Version](https://img.shields.io/pypi/pyversions/quacc) [![PyPi version](https://img.shields.io/pypi/v/cifkit.svg)](https://pypi.python.org/pypi/cifkit) diff --git a/src/cifkit/data/radius_handler.py b/src/cifkit/data/radius_handler.py index 483cb56..80579c8 100644 --- a/src/cifkit/data/radius_handler.py +++ b/src/cifkit/data/radius_handler.py @@ -34,7 +34,7 @@ def get_CIF_pauling_radius(elements: list[str]) -> dict: def get_radius_values_per_element( elements: list[str], shortest_bond_distances -) -> dict: +) -> dict[str : dict[str:float]]: """Merge CIF and Pauling radius data with CIF refined radius data.""" is_radius_data_available = get_is_radius_data_available(elements) @@ -61,7 +61,7 @@ def get_radius_values_per_element( def compute_radius_sum( radius_values: dict[str : dict[str:float]], is_radius_data_available: bool -): +) -> dict[str : dict[str:float]]: """Compute the sum of two radii.""" if not is_radius_data_available: diff --git a/src/cifkit/models/cif.py b/src/cifkit/models/cif.py index 7352431..4f40d34 100644 --- a/src/cifkit/models/cif.py +++ b/src/cifkit/models/cif.py @@ -198,12 +198,13 @@ def _log_info(self, message): logging.info(formatted_message) def _preprocess(self): - """Preprocess each .cif file and check any error.""" + """Preprocess each .cif file before initializng and separate files with + error.""" self._log_info(CifLog.PREPROCESSING.value) edit_cif_file_based_on_db(self.file_path) def _load_data(self): - """Load data from the .cif file and process it.""" + """Load data from the .cif file and extract attributes.""" self._log_info(CifLog.LOADING_DATA.value) self._block = get_cif_block(self.file_path) self._parse_cif_data() @@ -265,8 +266,7 @@ def _generate_supercell(self) -> None: self.supercell_atom_count = get_cell_atom_count(self.supercell_points) def compute_connections(self, cutoff_radius=10.0) -> None: - """Computes various connection parameters for the crystal structure, - including connection network, shortest distances, bond counts, and + """Compute onnection network, shortest distances, bond counts, and coordination numbers (CN). These prperties are lazily loaded to avoid unnecessary computation during the initialization and pre-processing step. @@ -462,6 +462,8 @@ def shortest_bond_pair_distance(self): Returns ------- dict[tuple[str, str], float] + Dictionary where each key is a tuple of element symbols and the float value + is the distance between pair of elements in Angstroms. Examples -------- @@ -506,11 +508,86 @@ def shortest_site_pair_distance(self): @property @ensure_connections def radius_values(self): + """Retrieve CIF radius, CIF_refined radius, and Pauling C12 radius. + This property uses lazily loaded connections to compute these distances + if they are not already available because the CIF radius values are + determined using the shortest bonding pair from + shortest_bond_pair_distance. + + Returns + ------- + dict[str : dict[str:float]] + Dictionary where each key is an atomic label and the value is a dictionary + containing the CIF radius, CIF_refined radius, and Pauling C12 radius in + Angstroms. + + Examples + -------- + >>> cif.radius_values + >>> { + "In": { + "CIF_radius": 1.624, + "CIF_radius_refined": 1.328, + "Pauling_radius_CN12": 1.66, + }, + "Rh": { + "CIF_radius": 1.345, + "CIF_radius_refined": 1.369, + "Pauling_radius_CN12": 1.342, + }, + "U": { + "CIF_radius": 1.377, + "CIF_radius_refined": 1.614, + "Pauling_radius_CN12": 1.516, + }, + } + """ return self._radius_values @property @ensure_connections def radius_sum(self): + """Retrieve the sum of CIF radius, CIF_refined radius, and Pauling C12 + radius for the shortest bonding pairs of elements. + + Returns + ------- + dict[str : dict[str:float]] + Dictionary where each key is a radius type and the value is a dictionary + with the key being a bond pair of elements and the value being the total + radius in Angstroms. + + Examples + -------- + >>> cif.radius_values + >>> { + "CIF_radius_sum": { + "In-In": 3.248, + "In-Rh": 2.969, + "In-U": 3.001, + "Rh-Rh": 2.69, + "Rh-U": 2.722, + "U-U": 2.754, + }, + "CIF_radius_refined_sum": { + "In-In": 2.657, + "In-Rh": 2.697, + "In-U": 2.943, + "Rh-Rh": 2.737, + "Rh-U": 2.983, + "U-U": 3.229, + }, + "Pauling_radius_sum": { + "In-In": 3.32, + "In-Rh": 3.002, + "In-U": 3.176, + "Rh-Rh": 2.684, + "Rh-U": 2.858, + "U-U": 3.032, + }, + } + """ + return self._radius_sum @property diff --git a/src/cifkit/models/cif_ensemble.py b/src/cifkit/models/cif_ensemble.py index 702535f..bf49f21 100644 --- a/src/cifkit/models/cif_ensemble.py +++ b/src/cifkit/models/cif_ensemble.py @@ -19,6 +19,35 @@ def __init__( preprocess=True, logging_enabled=False, ) -> None: + """Initialize a CifEnsemble object, containing a collection of Cif objects + + Parameters + ---------- + cif_dir_path : str + Path to the folder path containing .cif file(s). + add_nested_files : bool, optional + Option to include .cif files contained in sub-directories within cif_dir_path + , by default False + preprocess : bool, optional + Option to edit .cif files before initializing each .cif into Cif object, + by default True + logging_enabled : bool, optional + Option to log while pre-processing Cif objects, by default False + + Attributes + ---------- + dir_path: str + Path to the folder containing .cif files + file_paths: list[str] + List of file paths to .cif files + cifs: list[Cif] + List of Cif objects + file_count: int + Number of .cif files in the folder + logging_enabled: bool + Option to log while pre-processing Cif objects + """ + # Process each file, handling exceptions that may occur self.logging_enabled = logging_enabled file_paths = get_file_paths( @@ -68,37 +97,80 @@ def _get_unique_property_values(self, property_name: str): @property def unique_formulas(self) -> set[str]: - """Get unique formulas from all .cif files in the folder.""" + """Get unique formulas from all .cif files in the folder. + + Returns + ------- + set[str] + unique formulas + + Examples + -------- + >>> cif_ensemble.unique_formulas + {"EuIr2Ge2", "CeRu2Ge2", "LaRu2Ge2", "Mo"} + """ return self._get_unique_property_values("formula") @property def unique_structures(self) -> set[str]: - """Get unique structures from all .cif files in the folder.""" + """Get unique structures from all .cif files in the folder. + + Examples + -------- + >>> cif_ensemble.unique_structures + {"CeAl2Ga2", "W"} + """ return self._get_unique_property_values("structure") @property def unique_tags(self) -> set[str]: - """Get unique formulas from all .cif files in the folder.""" + """Get unique formulas from all .cif files in the folder. + + Examples + -------- + >>> cif_ensemble.unique_tags + {"hex", "rt", "rt_hex", ""} + """ return self._get_unique_property_values("tag") @property def unique_space_group_names(self) -> set[str]: - """Get unique space groups from all .cif files in the folder.""" + """Get unique space groups from all .cif files in the folder. + Examples + -------- + >>> cif_ensemble.unique_space_group_names + {"I4/mmm", "Im-3m"} + """ return self._get_unique_property_values("space_group_name") @property def unique_space_group_numbers(self) -> set[str]: - """Get unique space groups from all .cif files in the folder.""" + """Get unique space groups from all .cif files in the folder. + Examples + -------- + >>> cif_ensemble.unique_space_group_numbers + {139, 229} + """ return self._get_unique_property_values("space_group_number") @property def unique_site_mixing_types(self) -> set[int]: - """Get unique site mixing types from all .cif files in the folder.""" + """Get unique site mixing types from all .cif files in the folder. + Examples + -------- + >>> cif_ensemble.unique_site_mixing_types + {"deficiency_without_atomic_mixing", "full_occupancy"} + """ return self._get_unique_property_values("site_mixing_type") @property def unique_composition_types(self) -> set[int]: - """Get unique composition types from all .cif files in the folder.""" + """Get unique composition types from all .cif files in the folder. + Examples + -------- + >>> cif_ensemble.unique_composition_types + {1, 3} + """ return self._get_unique_property_values("composition_type") def _get_unique_property_values_from_set(self, property_name: str): @@ -109,17 +181,46 @@ def _get_unique_property_values_from_set(self, property_name: str): @property def unique_elements(self) -> set[str]: - """Get unique elements from all .cif files in the folder.""" + """Get unique elements from all .cif files in the folder. + + Examples + -------- + >>> cif_ensemble.unique_elements_stats + { + "Ce": 1, + "Eu": 1, + "Ge": 3, + "Ir": 1, + "La": 1, + "Mo": 3, + "Ru": 2, + } + """ + return self._get_unique_property_values_from_set("unique_elements") @property def CN_unique_values_by_min_dist_method(self) -> set[str]: + """ + + Returns + ------- + set[str] + Unique coordination number values by minimum distance method from all .cif files. + """ return self._get_unique_property_values_from_set( "CN_unique_values_by_min_dist_method" ) @property def CN_unique_values_by_best_methods(self) -> set[str]: + """ + + Returns + ------- + set[str] + Unique coordination number by best methods from all .cif files. + """ return self._get_unique_property_values_from_set( "CN_unique_values_by_best_methods" ) @@ -334,15 +435,51 @@ def filter_by_supercell_count( def move_cif_files( self, file_paths: set[str], to_directory_path: str ) -> None: - """Move a set of CIF files to a destination directory.""" + """Move a set of CIF files to a destination directory. + + Parameters + ---------- + file_paths : set[str] + Set of file paths to CIF files. + to_directory_path : str + Destination directory path. + + Examples + -------- + >>> file_paths = { + "tests/data/cif/ensemble_test/300169.cif", + "tests/data/cif/ensemble_test/300170.cif", + } + >>> dest_dir_path = "tests/data/cif/ensemble_new_dir" + >>> cif_ensemble_test.move_cif_files(file_paths, dest_dir_path) + """ move_files(to_directory_path, list(file_paths)) def copy_cif_files( self, file_paths: set[str], to_directory_path: str ) -> None: - """Copy a set of CIF files to a destination directory.""" + """Copy a set of CIF files to a destination directory. + + Parameters + ---------- + file_paths : set[str] + Set of file paths to CIF files. + to_directory_path : str + Destination directory path. + + Examples + -------- + >>> file_paths = { + "tests/data/cif/ensemble_test/300169.cif", + "tests/data/cif/ensemble_test/300170.cif", + } + >>> dest_dir_path = "tests/data/cif/ensemble_new_dir" + >>> cif_ensemble_test.copy_cif_files(file_paths, dest_dir_path) + """ copy_files(to_directory_path, list(file_paths)) - + + # FIXME: refactor this section to maintain DRY principle + def generate_structure_histogram(self, display=False, output_dir=None): plot_histogram( "structure",