diff --git a/src/assess_gtfs/cleaners.py b/src/assess_gtfs/cleaners.py index ae3944d..ee8e760 100644 --- a/src/assess_gtfs/cleaners.py +++ b/src/assess_gtfs/cleaners.py @@ -3,8 +3,18 @@ from typing import Union import numpy as np +from gtfs_kit.cleaners import clean_ids as clean_ids_gk +from gtfs_kit.cleaners import ( + clean_route_short_names as clean_route_short_names_gk, +) +from gtfs_kit.cleaners import clean_times as clean_times_gk +from gtfs_kit.cleaners import drop_zombies as drop_zombies_gk -from assess_gtfs.utils.defence import _check_iterable, _gtfs_defence +from assess_gtfs.utils.defence import ( + _check_iterable, + _gtfs_defence, + _type_defence, +) def drop_trips(gtfs, trip_id: Union[str, list, np.ndarray]) -> None: @@ -175,3 +185,63 @@ def clean_multiple_stop_fast_travel_warnings( ~gtfs.multiple_stops_invalid["trip_id"].isin(trip_ids) ] return None + + +def core_cleaners( + gtfs, + clean_ids: bool = True, + clean_times: bool = True, + clean_route_short_names: bool = True, + drop_zombies: bool = True, +) -> None: + """Clean the gtfs with the core cleaners of gtfs-kit. + + The source code for the cleaners, along with detailed descriptions of the + cleaning they are performing can be found here: + https://github.com/mrcagney/gtfs_kit/blob/master/gtfs_kit/cleaners.py + + All credit for these cleaners goes to the creators of the gtfs_kit package. + HOMEPAGE: https://github.com/mrcagney/gtfs_kit + + Parameters + ---------- + gtfs : GtfsInstance + The gtfs to clean + clean_ids : bool, optional + Whether or not to use clean_ids, by default True + clean_times : bool, optional + Whether or not to use clean_times, by default True + clean_route_short_names : bool, optional + Whether or not to use clean_route_short_names, by default True + drop_zombies : bool, optional + Whether or not to use drop_zombies, by default True + + Returns + ------- + None + + """ + # defences + _gtfs_defence(gtfs, "gtfs") + _type_defence(clean_ids, "clean_ids", bool) + _type_defence(clean_times, "clean_times", bool) + _type_defence(clean_route_short_names, "clean_route_short_names", bool) + _type_defence(drop_zombies, "drop_zombies", bool) + # cleaning + if clean_ids: + clean_ids_gk(gtfs.feed) + if clean_times: + clean_times_gk(gtfs.feed) + if clean_route_short_names: + clean_route_short_names_gk(gtfs.feed) + if drop_zombies: + try: + drop_zombies_gk(gtfs.feed) + except KeyError: + warnings.warn( + UserWarning( + "The drop_zombies cleaner was unable to operate on " + "clean_feed as the trips table has no shape_id column" + ) + ) + return None diff --git a/src/assess_gtfs/gtfs_utils.py b/src/assess_gtfs/gtfs_utils.py index 9e6568b..3067b1a 100644 --- a/src/assess_gtfs/gtfs_utils.py +++ b/src/assess_gtfs/gtfs_utils.py @@ -475,3 +475,36 @@ def convert_pandas_to_plotly( if return_html: return fig.to_html(full_html=False) return fig + + +def _function_pipeline( + gtfs, func_map: dict, operations: Union[dict, type[None]] +) -> None: + """Iterate through and act on a functional pipeline.""" + _gtfs_defence(gtfs, "gtfs") + _type_defence(func_map, "func_map", dict) + _type_defence(operations, "operations", (dict, type(None))) + if operations: + for key in operations.keys(): + if key not in func_map.keys(): + raise KeyError( + f"'{key}' function passed to 'operations' is not a " + "known operation. Known operation include: " + f"{func_map.keys()}" + ) + for operation in operations: + # check value is dict or none (for kwargs) + _type_defence( + operations[operation], + f"operations[{operation}]", + (dict, type(None)), + ) + operations[operation] = ( + {} if operations[operation] is None else operations[operation] + ) + func_map[operation](gtfs=gtfs, **operations[operation]) + # if no operations passed, carry out all operations + else: + for operation in func_map: + func_map[operation](gtfs=gtfs) + return None diff --git a/src/assess_gtfs/multi_validation.py b/src/assess_gtfs/multi_validation.py index 18a0488..23685d0 100644 --- a/src/assess_gtfs/multi_validation.py +++ b/src/assess_gtfs/multi_validation.py @@ -160,12 +160,12 @@ def save_feeds( inst.save(path, overwrite=overwrite) return None - def clean_feeds(self, clean_kwargs: Union[dict, None] = None) -> None: + def clean_feeds(self, cleansers: Union[dict, None] = None) -> None: """Clean each of the feeds in the MultiGtfsInstance. Parameters ---------- - clean_kwargs : Union[dict, None], optional + cleansers : Union[dict, None], optional The kwargs to pass to GtfsInstance.clean_feed() for each Gtfs in the MultiGtfsInstance, by default None @@ -175,28 +175,26 @@ def clean_feeds(self, clean_kwargs: Union[dict, None] = None) -> None: """ # defences - _type_defence(clean_kwargs, "clean_kwargs", (dict, type(None))) - if isinstance(clean_kwargs, type(None)): - clean_kwargs = {} + _type_defence(cleansers, "cleansers", (dict, type(None))) + if isinstance(cleansers, type(None)): + cleansers = {} # clean GTFS instances progress = tqdm( zip(self.paths, self.instances), total=len(self.instances) ) for path, inst in progress: progress.set_description(f"Cleaning GTFS from path {path}") - inst.clean_feed(**clean_kwargs) + inst.clean_feed(cleansers=cleansers) return None - def is_valid( - self, validation_kwargs: Union[dict, None] = None - ) -> pd.DataFrame: + def is_valid(self, validators: Union[dict, None] = None) -> pd.DataFrame: """Validate each of the feeds in the MultiGtfsInstance. Parameters ---------- - validation_kwargs : Union[dict, None], optional - The kwargs to pass to GtfsInstance.is_valid() for each Gtfs in - the MultiGtfsInstance, by default None + validators : Union[dict, None], optional + The kwargs to pass to GtfsInstance.is_valid(validators) for each + Gtfs in the MultiGtfsInstance, by default None Returns ------- @@ -206,18 +204,16 @@ def is_valid( """ # defences - _type_defence( - validation_kwargs, "validation_kwargs", (dict, type(None)) - ) - if isinstance(validation_kwargs, type(None)): - validation_kwargs = {} + _type_defence(validators, "validators", (dict, type(None))) + if isinstance(validators, type(None)): + validators = {} # clean GTFS instances progress = tqdm( zip(self.paths, self.instances), total=len(self.instances) ) for path, inst in progress: progress.set_description(f"Validating GTFS from path {path}") - inst.is_valid(**validation_kwargs) + inst.is_valid(validators=validators) # concat all validation tables into one tables = [] diff --git a/src/assess_gtfs/validation.py b/src/assess_gtfs/validation.py index 8f30185..955f055 100644 --- a/src/assess_gtfs/validation.py +++ b/src/assess_gtfs/validation.py @@ -18,12 +18,10 @@ from plotly.graph_objects import Figure as PlotlyFigure from pretty_html_table import build_table +import assess_gtfs.cleaners as cleaners +import assess_gtfs.validators as gtfs_validators from assess_gtfs.calendar import create_calendar_from_dates -from assess_gtfs.cleaners import ( - clean_consecutive_stop_fast_travel_warnings, - clean_multiple_stop_fast_travel_warnings, -) -from assess_gtfs.gtfs_utils import filter_gtfs +from assess_gtfs.gtfs_utils import _function_pipeline, filter_gtfs from assess_gtfs.report.report_utils import TemplateHTML, _set_up_report_dir from assess_gtfs.routes import ( get_saved_route_type_lookup, @@ -40,10 +38,29 @@ _is_expected_filetype, _type_defence, ) -from assess_gtfs.validators import ( - validate_travel_between_consecutive_stops, - validate_travel_over_multiple_stops, -) + +# THESE MAPPINGS CAN NOT BE MOVED TO CONSTANTS AS THEY INTRODUCE DEPENDENCY +# ISSUES. +# TODO: Update these once further cleaners/validators are merged +CLEAN_FEED_FUNCTION_MAP = { + "core_cleaners": cleaners.core_cleaners, + "clean_consecutive_stop_fast_travel_warnings": ( + cleaners.clean_consecutive_stop_fast_travel_warnings + ), + "clean_multiple_stop_fast_travel_warnings": ( + cleaners.clean_multiple_stop_fast_travel_warnings + ), +} + +VALIDATE_FEED_FUNC_MAP = { + "core_validation": gtfs_validators.core_validation, + "validate_travel_between_consecutive_stops": ( + gtfs_validators.validate_travel_between_consecutive_stops + ), + "validate_travel_over_multiple_stops": ( + gtfs_validators.validate_travel_over_multiple_stops + ), +} def _get_intermediate_dates( @@ -313,15 +330,13 @@ def get_gtfs_files(self) -> list: self.file_list = file_list return self.file_list - def is_valid(self, far_stops: bool = False) -> pd.DataFrame: + def is_valid(self, validators: dict = None) -> pd.DataFrame: """Check a feed is valid with `gtfs_kit`. Parameters ---------- - far_stops : bool, optional - Whether or not to perform validation for far stops (both - between consecutive stops and over multiple stops), by default - False. + validators : dict, optional + A dictionary of function name to kwargs mappings. Returns ------- @@ -329,10 +344,14 @@ def is_valid(self, far_stops: bool = False) -> pd.DataFrame: Table of errors, warnings & their descriptions. """ - self.validity_df = self.feed.validate() - if far_stops: - validate_travel_between_consecutive_stops(self) - validate_travel_over_multiple_stops(self) + _type_defence(validators, "validators", (dict, type(None))) + # create validity df + self.validity_df = pd.DataFrame( + columns=["type", "message", "table", "rows"] + ) + _function_pipeline( + gtfs=self, func_map=VALIDATE_FEED_FUNC_MAP, operations=validators + ) return self.validity_df def print_alerts(self, alert_type: str = "error") -> None: @@ -383,36 +402,27 @@ def print_alerts(self, alert_type: str = "error") -> None: return None - def clean_feed( - self, validate: bool = False, fast_travel: bool = False - ) -> None: - """Attempt to clean feed using `gtfs_kit`. + def clean_feed(self, cleansers: dict = None) -> None: + """Clean the gtfs feed. Parameters ---------- - validate: bool, optional - Whether or not to validate the dataframe before cleaning, by - default False. - fast_travel: bool, optional - Whether or not to clean warnings related to fast travel, by default - False. + cleansers : dict, optional + A mapping of cleansing functions and kwargs, by default None + + Returns + ------- + None """ - _type_defence(fast_travel, "fast_travel", bool) - _type_defence(validate, "valiidate", bool) - if validate: - self.is_valid(far_stops=fast_travel) - try: - # In cases where shape_id is missing, keyerror is raised. - # https://developers.google.com/transit/gtfs/reference#shapestxt - # shows that shapes.txt is optional file. - self.feed = self.feed.clean() - if fast_travel: - clean_consecutive_stop_fast_travel_warnings(self) - clean_multiple_stop_fast_travel_warnings(self) - except KeyError: - # TODO: Issue 74 - Improve this to clean feed when KeyError raised - print("KeyError. Feed was not cleaned.") + # DEV NOTE: Opting not to allow for validation in clean_feed(). + # .is_valid() should be used before hand. + # DEV NOTE 2: Use of param name 'cleansers' is to avoid conflicts + _type_defence(cleansers, "cleansers", (dict, type(None))) + _function_pipeline( + gtfs=self, func_map=CLEAN_FEED_FUNCTION_MAP, operations=cleansers + ) + return None def _produce_stops_map( self, what_geoms: str, is_filtered: bool, crs: Union[int, str] @@ -1386,7 +1396,7 @@ def html_report( report_dir: Union[str, pathlib.Path] = "outputs", overwrite: bool = False, summary_type: str = "mean", - extended_validation: bool = False, + extended_validation: bool = True, clean_feed: bool = True, ) -> None: """Generate a HTML report describing the GTFS data. @@ -1403,7 +1413,7 @@ def html_report( default "mean" extended_validation : bool, optional Whether or not to create extended reports for gtfs validation - errors/warnings, by default False + errors/warnings, by default True clean_feed : bool, optional Whether or not to clean the feed before validating, by default True @@ -1431,10 +1441,11 @@ def html_report( date = datetime.datetime.strftime(datetime.datetime.now(), "%d-%m-%Y") # feed evaluation + self.is_valid() if clean_feed: - self.clean_feed(validate=True, fast_travel=True) - # re-validate to clean any newly raised errors/warnings - validation_dataframe = self.is_valid(far_stops=True) + self.clean_feed() + # re-validate to clean any newly raised errors/warnings + validation_dataframe = self.is_valid() # create extended reports if requested if extended_validation: diff --git a/src/assess_gtfs/validators.py b/src/assess_gtfs/validators.py index eef9be4..ee97ca0 100644 --- a/src/assess_gtfs/validators.py +++ b/src/assess_gtfs/validators.py @@ -264,3 +264,13 @@ def validate_travel_over_multiple_stops(gtfs: "GtfsInstance") -> None: ) return far_stops_df + + +def core_validation(gtfs: "GtfsInstance"): + """Carry out the main validators of gtfs-kit.""" + _gtfs_defence(gtfs, "gtfs") + validation_df = gtfs.feed.validate() + gtfs.validity_df = pd.concat( + [validation_df, gtfs.validity_df], axis=0 + ).reset_index(drop=True) + # diff --git a/tests/test_cleaners.py b/tests/test_cleaners.py index c54ef6d..d97780b 100644 --- a/tests/test_cleaners.py +++ b/tests/test_cleaners.py @@ -9,6 +9,7 @@ from assess_gtfs.cleaners import ( clean_consecutive_stop_fast_travel_warnings, clean_multiple_stop_fast_travel_warnings, + core_cleaners, drop_trips, ) from assess_gtfs.validation import GtfsInstance @@ -136,7 +137,7 @@ def test_clean_consecutive_stop_fast_travel_warnings_on_pass( self, gtfs_fixture, _EXPECTED_NEWPORT_VALIDITY_DF ): """General tests for clean_consecutive_stop_fast_travel_warnings().""" - gtfs_fixture.is_valid(far_stops=True) + gtfs_fixture.is_valid() pd.testing.assert_frame_equal( _EXPECTED_NEWPORT_VALIDITY_DF, gtfs_fixture.validity_df ) @@ -200,7 +201,7 @@ def test_clean_multiple_stop_fast_travel_warnings_on_pass( self, gtfs_fixture, _EXPECTED_NEWPORT_VALIDITY_DF ): """General tests for clean_multiple_stop_fast_travel_warnings().""" - gtfs_fixture.is_valid(far_stops=True) + gtfs_fixture.is_valid() pd.testing.assert_frame_equal( _EXPECTED_NEWPORT_VALIDITY_DF, gtfs_fixture.validity_df ) @@ -230,11 +231,97 @@ def test_clean_multiple_stop_fast_travel_warnings_on_pass( 3: [], }, } + # test validation; test gtfs with no warnings clean_multiple_stop_fast_travel_warnings( - gtfs=gtfs_fixture, validate=False + gtfs=gtfs_fixture, validate=True ) gtfs_fixture.is_valid() assert expected_validation == gtfs_fixture.validity_df.to_dict(), ( "Validation table is not as expected after cleaning consecutive " "stop fast travel warnings" ) + + +class TestCoreCleaner(object): + """Tests for core_cleaners(). + + Notes + ----- + There are no passing tests for this function as it relies on function from + gtfs-kit which have already been tested. + + """ + + @pytest.mark.parametrize( + ( + "clean_ids, clean_times, clean_route_short_names, drop_zombies, " + "raises, match" + ), + [ + ( + 1, + True, + True, + True, + TypeError, + r".*expected .*bool.* Got .*int.*", + ), + ( + True, + dict(), + True, + True, + TypeError, + r".*expected .*bool.* Got .*dict.*", + ), + ( + True, + True, + "test string", + True, + TypeError, + r".*expected .*bool.* Got .*str.*", + ), + ( + True, + True, + True, + 2.12, + TypeError, + r".*expected .*bool.* Got .*float.*", + ), + ], + ) + def test_core_claners_defence( + self, + gtfs_fixture, + clean_ids, + clean_times, + clean_route_short_names, + drop_zombies, + raises, + match, + ): + """Defensive tests for core_cleaners.""" + with pytest.raises(raises, match=match): + gtfs_fixture.is_valid() + core_cleaners( + gtfs_fixture, + clean_ids, + clean_times, + clean_route_short_names, + drop_zombies, + ) + + def test_core_cleaners_drop_zombies_warns(self, gtfs_fixture): + """Test that warnings are emitted when shape_id isn't present in... + + trips. + """ + gtfs_fixture.feed.trips.drop("shape_id", axis=1, inplace=True) + with pytest.warns( + UserWarning, + match=r".*drop_zombies cleaner was unable to operate.*", + ): + gtfs_fixture.is_valid(validators={"core_validation": None}) + gtfs_fixture.clean_feed() diff --git a/tests/test_gtfs_utils.py b/tests/test_gtfs_utils.py index 7d589db..212abaf 100644 --- a/tests/test_gtfs_utils.py +++ b/tests/test_gtfs_utils.py @@ -12,13 +12,14 @@ from assess_gtfs.gtfs_utils import ( _add_validation_row, + _function_pipeline, _validate_datestring, bbox_filter_gtfs, convert_pandas_to_plotly, filter_gtfs, filter_gtfs_around_trip, ) -from assess_gtfs.validation import GtfsInstance +from assess_gtfs.validation import VALIDATE_FEED_FUNC_MAP, GtfsInstance # location of GTFS test fixture GTFS_FIX_PTH = os.path.join("tests", "data", "newport-20230613_gtfs.zip") @@ -270,7 +271,7 @@ def test__add_validation_row_defence(self): def test__add_validation_row_on_pass(self): """General tests for _add_validation_row().""" gtfs = GtfsInstance(gtfs_pth=GTFS_FIX_PTH) - gtfs.is_valid() + gtfs.is_valid(validators={"core_validation": {}}) _add_validation_row( gtfs=gtfs, _type="warning", message="test", table="stops" @@ -366,6 +367,45 @@ def test_convert_pandas_to_plotly_on_pass(self, test_df): ) +class TestFunctionPipeline(object): + """Tests for _function_pipeline. + + Notes + ----- + Not testing on pass here as better cases can be found in the tests for + GtfsInstance's is_valid() and clean_feed() methods. + + """ + + @pytest.mark.parametrize( + "operations, raises, match", + [ + # invalid type for 'validators' + (True, TypeError, ".*expected .*dict.*. Got .*bool.*"), + # invalid validator + ( + {"not_a_valid_validator": None}, + KeyError, + ( + r"'not_a_valid_validator' function passed to 'operations'" + r" is not a known operation.*" + ), + ), + # invalid type for kwargs for validator + ( + {"core_validation": pd.DataFrame()}, + TypeError, + ".* expected .*dict.*NoneType.*", + ), + ], + ) + def test_function_pipeline_defence(self, operations, raises, match): + """Defensive test for _function_pipeline.""" + gtfs = GtfsInstance(GTFS_FIX_PTH) + with pytest.raises(raises, match=match): + _function_pipeline(gtfs, VALIDATE_FEED_FUNC_MAP, operations) + + class Test_ValidateDatestring(object): """Tests for _validate_datestring.""" diff --git a/tests/test_multi_validation.py b/tests/test_multi_validation.py index 2ecb83e..7762d9e 100644 --- a/tests/test_multi_validation.py +++ b/tests/test_multi_validation.py @@ -262,30 +262,23 @@ def test_save_feeds(self, multi_gtfs_paths, tmp_path): def test_clean_feeds_defences(self, multi_gtfs_fixture): """Defensive tests for .clean_feeds().""" - with pytest.raises(TypeError, match=".*clean_kwargs.*dict.*bool"): + with pytest.raises(TypeError, match=".*cleansers.*dict.*bool"): multi_gtfs_fixture.clean_feeds(True) def test_clean_feeds_on_pass(self, multi_gtfs_fixture): """General tests for .clean_feeds().""" - # check with far stops logic first - extra_valid_df = multi_gtfs_fixture.is_valid( - validation_kwargs={"far_stops": True} - ) - n = 14 - n_out = len(extra_valid_df) - assert n_out == n, f"Expected extra_valid_df of len {n}, found {n_out}" # validate and do quick check on validity_df valid_df = multi_gtfs_fixture.is_valid() - n = 12 + n = 14 n_out = len(valid_df) - assert n_out == n, f"Expected valid_df of len {n}, found {n_out}" + assert n_out == n, f"Expected validity_df of len {n}, found {n_out}" # clean feed multi_gtfs_fixture.clean_feeds() # ensure cleaning has occured - new_valid = multi_gtfs_fixture.is_valid() - n = 11 - n_out = len(new_valid) - assert n_out == n, f"Expected valid_df of len {n}, found {n_out}" + new_valid = multi_gtfs_fixture.is_valid( + validators={"core_validation": {}} + ) + assert len(new_valid) == 12 assert np.array_equal( list(new_valid.iloc[4][["type", "table"]].values), ["error", "routes"], @@ -293,23 +286,18 @@ def test_clean_feeds_on_pass(self, multi_gtfs_fixture): def test_is_valid_defences(self, multi_gtfs_fixture): """Defensive tests for .is_valid().""" - with pytest.raises(TypeError, match=".*validation_kwargs.*dict.*bool"): + with pytest.raises(TypeError, match=".*validators.*dict.*bool"): multi_gtfs_fixture.is_valid(True) def test_is_valid_on_pass(self, multi_gtfs_fixture): """General tests for is_valid().""" valid_df = multi_gtfs_fixture.is_valid() - n = 12 + n = 14 n_out = len(valid_df) - assert n_out == n, f"Expected valid_df of len {n}, found {n_out}" + assert n_out == n, f"Expected validity_df of len {n}, found {n_out}" assert np.array_equal( list(valid_df.iloc[4][["type", "message"]].values), - ( - [ - "error", - "Invalid route_type; maybe has extra space characters", - ] - ), + (["warning", "Fast Travel Between Consecutive Stops"]), ) assert hasattr( multi_gtfs_fixture, "validity_df" @@ -317,17 +305,6 @@ def test_is_valid_on_pass(self, multi_gtfs_fixture): assert isinstance( multi_gtfs_fixture.validity_df, pd.DataFrame ), "validity_df not a df" - # run is valid but with fast travel logic - n = 14 - extra_valid_df = multi_gtfs_fixture.is_valid( - validation_kwargs={"far_stops": True} - ) - n_out = len(extra_valid_df) - assert n_out == n, f"Expected extra_valid_df of len {n}, found {n_out}" - assert np.array_equal( - list(extra_valid_df.iloc[4][["type", "message"]].values), - (["warning", "Fast Travel Between Consecutive Stops"]), - ) def test_validate_empty_feeds(self, multi_gtfs_fixture): """Tests for validate_empty_feeds.""" diff --git a/tests/test_validation.py b/tests/test_validation.py index 94ed676..b4c7b3c 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -25,12 +25,19 @@ @pytest.fixture(scope="function") # some funcs expect cleaned feed others dont -def gtfs_fixture(): +def newp_gtfs_fixture(): """Fixture for test funcs expecting a valid feed object.""" gtfs = GtfsInstance(gtfs_pth=GTFS_FIX_PTH) return gtfs +@pytest.fixture(scope="function") +def chest_gtfs_fixture(): + """Fixture for test funcs expecting a valid feed object.""" + gtfs = GtfsInstance(here("tests/data/chester-20230816-small_gtfs.zip")) + return gtfs + + class TestGtfsInstance(object): """Tests related to the GtfsInstance class.""" @@ -115,7 +122,7 @@ def test_init_on_pass(self): without_pth.to_dict() == with_pth.to_dict() ), "Failed to get route type lookup correctly" - def test_get_gtfs_files(self, gtfs_fixture): + def test_get_gtfs_files(self, newp_gtfs_fixture): """Assert files that make up the GTFS.""" expected_files = [ # smaller filter has resulted in a GTFS with no calendar dates / @@ -131,40 +138,77 @@ def test_get_gtfs_files(self, gtfs_fixture): "calendar.txt", "routes.txt", ] - foundf = gtfs_fixture.get_gtfs_files() + foundf = newp_gtfs_fixture.get_gtfs_files() assert ( foundf == expected_files ), f"GTFS files not as expected. Expected {expected_files}," "found: {foundf}" - def test_is_valid(self, gtfs_fixture): - """Assertions about validity_df table.""" - gtfs_fixture.is_valid() - assert isinstance( - gtfs_fixture.validity_df, pd.core.frame.DataFrame - ), f"Expected DataFrame. Found: {type(gtfs_fixture.validity_df)}" - shp = gtfs_fixture.validity_df.shape - assert shp == ( - 8, - 4, - ), f"Attribute `validity_df` expected a shape of (8,4). Found: {shp}" - exp_cols = pd.Index(["type", "message", "table", "rows"]) - found_cols = gtfs_fixture.validity_df.columns - assert ( - found_cols == exp_cols - ).all(), f"Expected columns {exp_cols}. Found: {found_cols}" + def test_clean_feed_defence(self, newp_gtfs_fixture): + """Check defensive behaviours of clean_feed().""" + with pytest.raises( + TypeError, match=r".*expected .*dict.* Got .*int.*" + ): + fixt = newp_gtfs_fixture + fixt.is_valid(validators={"core_validation": None}) + fixt.clean_feed(cleansers=1) + + @pytest.mark.parametrize( + "which, validators, shape", + [ + # only core validation + ("n", {"core_validation": None}, (8, 4)), + # fast travel validators + ( + "c", + { + "core_validation": None, + "validate_travel_between_consecutive_stops": None, + "validate_travel_over_multiple_stops": None, + }, + (6, 4), + ), + # all validators + ("n", None, (8, 4)), + ], + ) + def test_is_valid_on_pass( + self, newp_gtfs_fixture, chest_gtfs_fixture, which, validators, shape + ): + """Tests/assertions for is_valid() while passing. + + Notes + ----- + These tests are mostly to assure that the validators are being + identified and run, and that the validation df returned is as expected. + + I will be refraining from over testing here as it would essentially be + testing the validators again, which occurs in test_validators.py. + + Tests for validators with kwargs would be useful, once they are added. + + """ + # Bypassing any defensive checks for wich. + # Correct inputs are assured as tese are internal tests. + if which.lower().strip() == "n": + fixture = newp_gtfs_fixture + else: + fixture = chest_gtfs_fixture + df = fixture.is_valid(validators=validators) + assert isinstance(df, pd.DataFrame), "is_valid() failed to return df" + assert shape == df.shape, "validity_df not as expected" @pytest.mark.sanitycheck - def test_trips_unmatched_ids(self, gtfs_fixture): + def test_trips_unmatched_ids(self, newp_gtfs_fixture): """Tests to evaluate gtfs-klt's reaction to invalid IDs in trips. Parameters ---------- - gtfs_fixture : GtfsInstance + newp_gtfs_fixture : GtfsInstance a GtfsInstance test fixure """ - feed = gtfs_fixture.feed + feed = newp_gtfs_fixture.feed # add row to tripas table with invald trip_id, route_id, service_id feed.trips = pd.concat( @@ -205,16 +249,16 @@ def test_trips_unmatched_ids(self, gtfs_fixture): assert len(new_valid) == 11, "Validation table not expected size" @pytest.mark.sanitycheck - def test_routes_unmatched_ids(self, gtfs_fixture): + def test_routes_unmatched_ids(self, newp_gtfs_fixture): """Tests to evaluate gtfs-klt's reaction to invalid IDs in routes. Parameters ---------- - gtfs_fixture : GtfsInstance + newp_gtfs_fixture : GtfsInstance a GtfsInstance test fixure """ - feed = gtfs_fixture.feed + feed = newp_gtfs_fixture.feed # add row to tripas table with invald trip_id, route_id, service_id feed.routes = pd.concat( @@ -244,12 +288,12 @@ def test_routes_unmatched_ids(self, gtfs_fixture): assert len(new_valid) == 10, "Validation table not expected size" @pytest.mark.sanitycheck - def test_unmatched_service_id_behaviour(self, gtfs_fixture): + def test_unmatched_service_id_behaviour(self, newp_gtfs_fixture): """Tests to evaluate gtfs-klt's reaction to invalid IDs in calendar. Parameters ---------- - gtfs_fixture : GtfsInstance + newp_gtfs_fixture : GtfsInstance a GtfsInstance test fixure Notes @@ -260,7 +304,7 @@ def test_unmatched_service_id_behaviour(self, gtfs_fixture): calendar table contains duplicate service_ids. """ - feed = gtfs_fixture.feed + feed = newp_gtfs_fixture.feed original_error_count = len(feed.validate()) # introduce a dummy row with a non matching service_id @@ -296,25 +340,25 @@ def test_unmatched_service_id_behaviour(self, gtfs_fixture): len(new_valid[new_valid.message == "Undefined service_id"]) == 1 ), "gtfs-kit failed to identify missing service_id" - def test_print_alerts_defence(self, gtfs_fixture): + def test_print_alerts_defence(self, newp_gtfs_fixture): """Check defensive behaviour of print_alerts().""" with pytest.raises( AttributeError, match=r"is None, did you forget to use `self.is_valid()`?", ): - gtfs_fixture.print_alerts() + newp_gtfs_fixture.print_alerts() - gtfs_fixture.is_valid() + newp_gtfs_fixture.is_valid() with pytest.warns( UserWarning, match="No alerts of type doesnt_exist were found." ): - gtfs_fixture.print_alerts(alert_type="doesnt_exist") + newp_gtfs_fixture.print_alerts(alert_type="doesnt_exist") @patch("builtins.print") # testing print statements - def test_print_alerts_single_case(self, mocked_print, gtfs_fixture): + def test_print_alerts_single_case(self, mocked_print, newp_gtfs_fixture): """Check alerts print as expected without truncation.""" - gtfs_fixture.is_valid() - gtfs_fixture.print_alerts() + newp_gtfs_fixture.is_valid() + newp_gtfs_fixture.print_alerts() # fixture contains single error fun_out = mocked_print.mock_calls assert fun_out == [ @@ -322,11 +366,11 @@ def test_print_alerts_single_case(self, mocked_print, gtfs_fixture): ], f"Expected a print about invalid route type. Found {fun_out}" @patch("builtins.print") - def test_print_alerts_multi_case(self, mocked_print, gtfs_fixture): + def test_print_alerts_multi_case(self, mocked_print, newp_gtfs_fixture): """Check multiple alerts are printed as expected.""" - gtfs_fixture.is_valid() + newp_gtfs_fixture.is_valid() # fixture contains several warnings - gtfs_fixture.print_alerts(alert_type="warning") + newp_gtfs_fixture.print_alerts(alert_type="warning") fun_out = mocked_print.mock_calls assert fun_out == [ call("Unrecognized column agency_noc"), @@ -338,7 +382,7 @@ def test_print_alerts_multi_case(self, mocked_print, gtfs_fixture): call("Unrecognized column vehicle_journey_code"), ], f"Expected print statements about GTFS warnings. Found: {fun_out}" - def test_viz_stops_defence(self, tmpdir, gtfs_fixture): + def test_viz_stops_defence(self, tmpdir, newp_gtfs_fixture): """Check defensive behaviours of viz_stops().""" tmp = os.path.join(tmpdir, "somefile.html") with pytest.raises( @@ -348,12 +392,12 @@ def test_viz_stops_defence(self, tmpdir, gtfs_fixture): "Got " ), ): - gtfs_fixture.viz_stops(out_pth=True) + newp_gtfs_fixture.viz_stops(out_pth=True) with pytest.raises( TypeError, match="`geoms` expected . Got ", ): - gtfs_fixture.viz_stops(out_pth=tmp, geoms=38) + newp_gtfs_fixture.viz_stops(out_pth=tmp, geoms=38) with pytest.raises( ValueError, match=re.escape( @@ -361,7 +405,7 @@ def test_viz_stops_defence(self, tmpdir, gtfs_fixture): "['point', 'hull']. Got foobar: " ), ): - gtfs_fixture.viz_stops(out_pth=tmp, geoms="foobar") + newp_gtfs_fixture.viz_stops(out_pth=tmp, geoms="foobar") with pytest.raises( TypeError, match=re.escape( @@ -369,28 +413,28 @@ def test_viz_stops_defence(self, tmpdir, gtfs_fixture): "" ), ): - gtfs_fixture.viz_stops(out_pth=tmp, geom_crs=1.1) + newp_gtfs_fixture.viz_stops(out_pth=tmp, geom_crs=1.1) # check missing stop_id results in an informative error message - gtfs_fixture.feed.stops.drop("stop_id", axis=1, inplace=True) + newp_gtfs_fixture.feed.stops.drop("stop_id", axis=1, inplace=True) with pytest.raises( KeyError, match="The stops table has no 'stop_code' column. While " "this is an optional field in a GTFS file, it " "raises an error through the gtfs-kit package.", ): - gtfs_fixture.viz_stops(out_pth=tmp, filtered_only=False) + newp_gtfs_fixture.viz_stops(out_pth=tmp, filtered_only=False) @patch("builtins.print") - def test_viz_stops_point(self, mock_print, tmpdir, gtfs_fixture): + def test_viz_stops_point(self, mock_print, tmpdir, newp_gtfs_fixture): """Check behaviour of viz_stops when plotting point geom.""" tmp = os.path.join(tmpdir, "points.html") - gtfs_fixture.viz_stops(out_pth=pathlib.Path(tmp)) + newp_gtfs_fixture.viz_stops(out_pth=pathlib.Path(tmp)) assert os.path.exists( tmp ), f"{tmp} was expected to exist but it was not found." # check behaviour when parent directory doesn't exist no_parent_pth = os.path.join(tmpdir, "notfound", "points1.html") - gtfs_fixture.viz_stops( + newp_gtfs_fixture.viz_stops( out_pth=pathlib.Path(no_parent_pth), create_out_parent=True ) assert os.path.exists( @@ -405,7 +449,7 @@ def test_viz_stops_point(self, mock_print, tmpdir, gtfs_fixture): "to 'out_pth'. Path defaulted to .html" ), ): - gtfs_fixture.viz_stops(out_pth=pathlib.Path(tmp1)) + newp_gtfs_fixture.viz_stops(out_pth=pathlib.Path(tmp1)) # need to use regex for the first print statement, as tmpdir will # change. start_pat = re.compile(r"Creating parent directory:.*") @@ -418,20 +462,22 @@ def test_viz_stops_point(self, mock_print, tmpdir, gtfs_fixture): write_pth ), f"Map should have been written to {write_pth} but was not found." - def test_viz_stops_hull(self, tmpdir, gtfs_fixture): + def test_viz_stops_hull(self, tmpdir, newp_gtfs_fixture): """Check viz_stops behaviour when plotting hull geom.""" tmp = os.path.join(tmpdir, "hull.html") - gtfs_fixture.viz_stops(out_pth=pathlib.Path(tmp), geoms="hull") + newp_gtfs_fixture.viz_stops(out_pth=pathlib.Path(tmp), geoms="hull") assert os.path.exists(tmp), f"Map file not found at {tmp}." # assert file created when not filtering the hull tmp1 = os.path.join(tmpdir, "filtered_hull.html") - gtfs_fixture.viz_stops(out_pth=tmp1, geoms="hull", filtered_only=False) + newp_gtfs_fixture.viz_stops( + out_pth=tmp1, geoms="hull", filtered_only=False + ) assert os.path.exists(tmp1), f"Map file not found at {tmp1}." - def test__create_map_title_text_defence(self, gtfs_fixture): + def test__create_map_title_text_defence(self, newp_gtfs_fixture): """Test the defences for _create_map_title_text().""" # CRS without m or km units - gtfs_hull = gtfs_fixture.feed.compute_convex_hull() + gtfs_hull = newp_gtfs_fixture.feed.compute_convex_hull() gdf = GeoDataFrame({"geometry": gtfs_hull}, index=[0], crs="epsg:4326") with pytest.raises(ValueError), pytest.warns(UserWarning): _create_map_title_text(gdf=gdf, units="m", geom_crs=4326) @@ -504,7 +550,7 @@ def test__convert_multi_index_to_single(self): expected_cols.remove(col) assert len(expected_cols) == 0, "Not all expected cols in output cols" - def test__order_dataframe_by_day_defence(self, gtfs_fixture): + def test__order_dataframe_by_day_defence(self, newp_gtfs_fixture): """Test __order_dataframe_by_day defences.""" with pytest.raises( TypeError, @@ -513,7 +559,7 @@ def test__order_dataframe_by_day_defence(self, gtfs_fixture): "Got " ), ): - (gtfs_fixture._order_dataframe_by_day(df="test")) + (newp_gtfs_fixture._order_dataframe_by_day(df="test")) with pytest.raises( TypeError, match=re.escape( @@ -522,12 +568,12 @@ def test__order_dataframe_by_day_defence(self, gtfs_fixture): ), ): ( - gtfs_fixture._order_dataframe_by_day( + newp_gtfs_fixture._order_dataframe_by_day( df=pd.DataFrame(), day_column_name=5 ) ) - def test_get_route_modes(self, gtfs_fixture, mocker): + def test_get_route_modes(self, newp_gtfs_fixture, mocker): """Assertions about the table returned by get_route_modes().""" patch_scrape_lookup = mocker.patch( "assess_gtfs.validation.scrape_route_type_lookup", @@ -536,25 +582,28 @@ def test_get_route_modes(self, gtfs_fixture, mocker): {"route_type": ["3"], "desc": ["Mocked bus"]} ), ) - gtfs_fixture.get_route_modes() + newp_gtfs_fixture.get_route_modes() # check mocker was called assert ( patch_scrape_lookup.called ), "mocker.patch `patch_scrape_lookup` was not called." - found = gtfs_fixture.route_mode_summary_df["desc"][0] + found = newp_gtfs_fixture.route_mode_summary_df["desc"][0] assert found == "Mocked bus", f"Expected 'Mocked bus', found: {found}" assert isinstance( - gtfs_fixture.route_mode_summary_df, pd.core.frame.DataFrame - ), f"Expected pd df. Found: {type(gtfs_fixture.route_mode_summary_df)}" + newp_gtfs_fixture.route_mode_summary_df, pd.core.frame.DataFrame + ), ( + f"Expected pd df. Found: " + f"{type(newp_gtfs_fixture.route_mode_summary_df)}" + ) exp_cols = pd.Index(["route_type", "desc", "n_routes", "prop_routes"]) - found_cols = gtfs_fixture.route_mode_summary_df.columns + found_cols = newp_gtfs_fixture.route_mode_summary_df.columns assert ( found_cols == exp_cols ).all(), f"Expected columns are different. Found: {found_cols}" - def test__preprocess_trips_and_routes(self, gtfs_fixture): + def test__preprocess_trips_and_routes(self, newp_gtfs_fixture): """Check the outputs of _pre_process_trips_and_route() (test data).""" - returned_df = gtfs_fixture._preprocess_trips_and_routes() + returned_df = newp_gtfs_fixture._preprocess_trips_and_routes() assert isinstance(returned_df, pd.core.frame.DataFrame), ( "Expected DF for _preprocess_trips_and_routes() return," f"found {type(returned_df)}" @@ -588,13 +637,13 @@ def test__preprocess_trips_and_routes(self, gtfs_fixture): f"Found {returned_df.shape}", ) - def test_summarise_trips_defence(self, gtfs_fixture): + def test_summarise_trips_defence(self, newp_gtfs_fixture): """Defensive checks for summarise_trips().""" with pytest.raises( TypeError, match="Each item in `summ_ops`.*. Found : np.mean", ): - gtfs_fixture.summarise_trips(summ_ops=[np.mean, "np.mean"]) + newp_gtfs_fixture.summarise_trips(summ_ops=[np.mean, "np.mean"]) # case where is function but not exported from numpy def dummy_func(): @@ -608,18 +657,18 @@ def dummy_func(): " : dummy_func" ), ): - gtfs_fixture.summarise_trips(summ_ops=[np.min, dummy_func]) + newp_gtfs_fixture.summarise_trips(summ_ops=[np.min, dummy_func]) # case where a single non-numpy func is being passed with pytest.raises( NotImplementedError, match="`summ_ops` expects numpy functions only.", ): - gtfs_fixture.summarise_trips(summ_ops=dummy_func) + newp_gtfs_fixture.summarise_trips(summ_ops=dummy_func) with pytest.raises( TypeError, match="`summ_ops` expects a numpy function.*. Found ", ): - gtfs_fixture.summarise_trips(summ_ops=38) + newp_gtfs_fixture.summarise_trips(summ_ops=38) # cases where return_summary are not of type boolean with pytest.raises( TypeError, @@ -627,7 +676,7 @@ def dummy_func(): "`return_summary` expected . Got " ), ): - gtfs_fixture.summarise_trips(return_summary=5) + newp_gtfs_fixture.summarise_trips(return_summary=5) with pytest.raises( TypeError, match=re.escape( @@ -635,15 +684,15 @@ def dummy_func(): "'str'>" ), ): - gtfs_fixture.summarise_trips(return_summary="true") + newp_gtfs_fixture.summarise_trips(return_summary="true") - def test_summarise_routes_defence(self, gtfs_fixture): + def test_summarise_routes_defence(self, newp_gtfs_fixture): """Defensive checks for summarise_routes().""" with pytest.raises( TypeError, match="Each item in `summ_ops`.*. Found : np.mean", ): - gtfs_fixture.summarise_trips(summ_ops=[np.mean, "np.mean"]) + newp_gtfs_fixture.summarise_trips(summ_ops=[np.mean, "np.mean"]) # case where is function but not exported from numpy def dummy_func(): @@ -657,18 +706,18 @@ def dummy_func(): " : dummy_func" ), ): - gtfs_fixture.summarise_routes(summ_ops=[np.min, dummy_func]) + newp_gtfs_fixture.summarise_routes(summ_ops=[np.min, dummy_func]) # case where a single non-numpy func is being passed with pytest.raises( NotImplementedError, match="`summ_ops` expects numpy functions only.", ): - gtfs_fixture.summarise_routes(summ_ops=dummy_func) + newp_gtfs_fixture.summarise_routes(summ_ops=dummy_func) with pytest.raises( TypeError, match="`summ_ops` expects a numpy function.*. Found ", ): - gtfs_fixture.summarise_routes(summ_ops=38) + newp_gtfs_fixture.summarise_routes(summ_ops=38) # cases where return_summary are not of type boolean with pytest.raises( TypeError, @@ -676,38 +725,27 @@ def dummy_func(): "`return_summary` expected . Got " ), ): - gtfs_fixture.summarise_routes(return_summary=5) + newp_gtfs_fixture.summarise_routes(return_summary=5) with pytest.raises( TypeError, match=re.escape( "`return_summary` expected . Got " ), ): - gtfs_fixture.summarise_routes(return_summary="true") + newp_gtfs_fixture.summarise_routes(return_summary="true") - @patch("builtins.print") - def test_clean_feed_defence(self, mock_print, gtfs_fixture): - """Check defensive behaviours of clean_feed().""" - # Simulate condition where shapes.txt has no shape_id - gtfs_fixture.feed.shapes.drop("shape_id", axis=1, inplace=True) - gtfs_fixture.clean_feed() - fun_out = mock_print.mock_calls - assert fun_out == [ - call("KeyError. Feed was not cleaned.") - ], f"Expected print statement about KeyError. Found: {fun_out}." - - def test_summarise_trips_on_pass(self, gtfs_fixture): + def test_summarise_trips_on_pass(self, newp_gtfs_fixture): """Assertions about the outputs from summarise_trips().""" - gtfs_fixture.summarise_trips() + newp_gtfs_fixture.summarise_trips() # tests the daily_routes_summary return schema assert isinstance( - gtfs_fixture.daily_trip_summary, pd.core.frame.DataFrame + newp_gtfs_fixture.daily_trip_summary, pd.core.frame.DataFrame ), ( "Expected DF for daily_summary," - f"found {type(gtfs_fixture.daily_trip_summary)}" + f"found {type(newp_gtfs_fixture.daily_trip_summary)}" ) - found_ds = gtfs_fixture.daily_trip_summary.columns + found_ds = newp_gtfs_fixture.daily_trip_summary.columns exp_cols_ds = pd.Index( [ "day", @@ -726,13 +764,13 @@ def test_summarise_trips_on_pass(self, gtfs_fixture): # tests the self.dated_route_counts return schema assert isinstance( - gtfs_fixture.dated_trip_counts, pd.core.frame.DataFrame + newp_gtfs_fixture.dated_trip_counts, pd.core.frame.DataFrame ), ( "Expected DF for dated_route_counts," - f"found {type(gtfs_fixture.dated_trip_counts)}" + f"found {type(newp_gtfs_fixture.dated_trip_counts)}" ) - found_drc = gtfs_fixture.dated_trip_counts.columns + found_drc = newp_gtfs_fixture.dated_trip_counts.columns exp_cols_drc = pd.Index(["date", "route_type", "trip_count", "day"]) assert ( @@ -753,8 +791,8 @@ def test_summarise_trips_on_pass(self, gtfs_fixture): ) found_df = ( - gtfs_fixture.daily_trip_summary[ - gtfs_fixture.daily_trip_summary["day"] == "friday" + newp_gtfs_fixture.daily_trip_summary[ + newp_gtfs_fixture.daily_trip_summary["day"] == "friday" ] .sort_values(by="route_type", ascending=True) .reset_index(drop=True) @@ -770,24 +808,26 @@ def test_summarise_trips_on_pass(self, gtfs_fixture): # test that the dated_trip_counts can be returned expected_size = (504, 4) - found_size = gtfs_fixture.summarise_trips(return_summary=False).shape + found_size = newp_gtfs_fixture.summarise_trips( + return_summary=False + ).shape assert expected_size == found_size, ( "Size of date_route_counts not as expected. " "Expected {expected_size}" ) - def test_summarise_routes_on_pass(self, gtfs_fixture): + def test_summarise_routes_on_pass(self, newp_gtfs_fixture): """Assertions about the outputs from summarise_routes().""" - gtfs_fixture.summarise_routes() + newp_gtfs_fixture.summarise_routes() # tests the daily_routes_summary return schema assert isinstance( - gtfs_fixture.daily_route_summary, pd.core.frame.DataFrame + newp_gtfs_fixture.daily_route_summary, pd.core.frame.DataFrame ), ( "Expected DF for daily_summary," - f"found {type(gtfs_fixture.daily_route_summary)}" + f"found {type(newp_gtfs_fixture.daily_route_summary)}" ) - found_ds = gtfs_fixture.daily_route_summary.columns + found_ds = newp_gtfs_fixture.daily_route_summary.columns exp_cols_ds = pd.Index( [ "day", @@ -806,13 +846,13 @@ def test_summarise_routes_on_pass(self, gtfs_fixture): # tests the self.dated_route_counts return schema assert isinstance( - gtfs_fixture.dated_route_counts, pd.core.frame.DataFrame + newp_gtfs_fixture.dated_route_counts, pd.core.frame.DataFrame ), ( "Expected DF for dated_route_counts," - f"found {type(gtfs_fixture.dated_route_counts)}" + f"found {type(newp_gtfs_fixture.dated_route_counts)}" ) - found_drc = gtfs_fixture.dated_route_counts.columns + found_drc = newp_gtfs_fixture.dated_route_counts.columns exp_cols_drc = pd.Index(["date", "route_type", "day", "route_count"]) assert ( @@ -833,8 +873,8 @@ def test_summarise_routes_on_pass(self, gtfs_fixture): ) found_df = ( - gtfs_fixture.daily_route_summary[ - gtfs_fixture.daily_route_summary["day"] == "friday" + newp_gtfs_fixture.daily_route_summary[ + newp_gtfs_fixture.daily_route_summary["day"] == "friday" ] .sort_values(by="route_type", ascending=True) .reset_index(drop=True) @@ -850,13 +890,15 @@ def test_summarise_routes_on_pass(self, gtfs_fixture): # test that the dated_route_counts can be returned expected_size = (504, 4) - found_size = gtfs_fixture.summarise_routes(return_summary=False).shape + found_size = newp_gtfs_fixture.summarise_routes( + return_summary=False + ).shape assert expected_size == found_size, ( "Size of date_route_counts not as expected. " "Expected {expected_size}" ) - def test__plot_summary_defences(self, tmp_path, gtfs_fixture): + def test__plot_summary_defences(self, tmp_path, newp_gtfs_fixture): """Test defences for _plot_summary().""" # test defences for checks summaries exist with pytest.raises( @@ -866,7 +908,7 @@ def test__plot_summary_defences(self, tmp_path, gtfs_fixture): " Did you forget to call '.summarise_trips()' first?" ), ): - gtfs_fixture._plot_summary(which="trip", target_column="mean") + newp_gtfs_fixture._plot_summary(which="trip", target_column="mean") with pytest.raises( AttributeError, @@ -875,9 +917,11 @@ def test__plot_summary_defences(self, tmp_path, gtfs_fixture): " Did you forget to call '.summarise_routes()' first?" ), ): - gtfs_fixture._plot_summary(which="route", target_column="mean") + newp_gtfs_fixture._plot_summary( + which="route", target_column="mean" + ) - gtfs_fixture.summarise_routes() + newp_gtfs_fixture.summarise_routes() # test parameters that are yet to be tested options = ["v", "h"] @@ -888,7 +932,7 @@ def test__plot_summary_defences(self, tmp_path, gtfs_fixture): f"{options}. Got i: " ), ): - gtfs_fixture._plot_summary( + newp_gtfs_fixture._plot_summary( which="route", target_column="route_count_mean", orientation="i", @@ -903,7 +947,7 @@ def test__plot_summary_defences(self, tmp_path, gtfs_fixture): " given to 'img_type'. Path defaulted to .png" ), ): - gtfs_fixture._plot_summary( + newp_gtfs_fixture._plot_summary( which="route", target_column="route_count_mean", save_image=True, @@ -919,15 +963,17 @@ def test__plot_summary_defences(self, tmp_path, gtfs_fixture): "['trip', 'route']. Got tester: " ), ): - gtfs_fixture._plot_summary(which="tester", target_column="tester") + newp_gtfs_fixture._plot_summary( + which="tester", target_column="tester" + ) - def test__plot_summary_on_pass(self, gtfs_fixture, tmp_path): + def test__plot_summary_on_pass(self, newp_gtfs_fixture, tmp_path): """Test plotting a summary when defences are passed.""" - current_fixture = gtfs_fixture + current_fixture = newp_gtfs_fixture current_fixture.summarise_routes() # test returning a html string - test_html = gtfs_fixture._plot_summary( + test_html = newp_gtfs_fixture._plot_summary( which="route", target_column="route_count_mean", return_html=True, @@ -935,7 +981,7 @@ def test__plot_summary_on_pass(self, gtfs_fixture, tmp_path): assert type(test_html) is str, "Failed to return HTML for the plot" # test returning a plotly figure - test_image = gtfs_fixture._plot_summary( + test_image = newp_gtfs_fixture._plot_summary( which="route", target_column="route_count_mean" ) assert ( @@ -943,8 +989,8 @@ def test__plot_summary_on_pass(self, gtfs_fixture, tmp_path): ), "Failed to return plotly.graph_objects.Figure type" # test returning a plotly for trips - gtfs_fixture.summarise_trips() - test_image = gtfs_fixture._plot_summary( + newp_gtfs_fixture.summarise_trips() + test_image = newp_gtfs_fixture._plot_summary( which="trip", target_column="trip_count_mean" ) assert ( @@ -952,7 +998,7 @@ def test__plot_summary_on_pass(self, gtfs_fixture, tmp_path): ), "Failed to return plotly.graph_objects.Figure type" # test saving plots in html and png format - gtfs_fixture._plot_summary( + newp_gtfs_fixture._plot_summary( which="route", target_column="mean", width=1200, @@ -981,7 +1027,7 @@ def test__plot_summary_on_pass(self, gtfs_fixture, tmp_path): assert counts["html"] == 1, "Failed to save plot as HTML" assert counts["png"] == 1, "Failed to save plot as png" - def test__create_extended_repeated_pair_table(self, gtfs_fixture): + def test__create_extended_repeated_pair_table(self, newp_gtfs_fixture): """Test _create_extended_repeated_pair_table().""" test_table = pd.DataFrame( { @@ -1000,17 +1046,19 @@ def test__create_extended_repeated_pair_table(self, gtfs_fixture): } ).to_dict() - returned_table = gtfs_fixture._create_extended_repeated_pair_table( - table=test_table, - join_vars=["trip_name", "trip_abbrev"], - original_rows=[0], - ).to_dict() + returned_table = ( + newp_gtfs_fixture._create_extended_repeated_pair_table( + table=test_table, + join_vars=["trip_name", "trip_abbrev"], + original_rows=[0], + ).to_dict() + ) assert ( expected_table == returned_table ), "_create_extended_repeated_pair_table() failed" - def test_html_report_defences(self, gtfs_fixture, tmp_path): + def test_html_report_defences(self, newp_gtfs_fixture, tmp_path): """Test the defences whilst generating a HTML report.""" with pytest.raises( ValueError, @@ -1019,15 +1067,15 @@ def test_html_report_defences(self, gtfs_fixture, tmp_path): "['mean', 'min', 'max', 'median']. Got test_sum: " ), ): - gtfs_fixture.html_report( + newp_gtfs_fixture.html_report( report_dir=tmp_path, overwrite=True, summary_type="test_sum", ) - def test_html_report_on_pass(self, gtfs_fixture, tmp_path): + def test_html_report_on_pass(self, newp_gtfs_fixture, tmp_path): """Test that a HTML report is generated if defences are passed.""" - gtfs_fixture.html_report(report_dir=pathlib.Path(tmp_path)) + newp_gtfs_fixture.html_report(report_dir=pathlib.Path(tmp_path)) # assert that the report has been completely generated assert os.path.exists( @@ -1062,33 +1110,35 @@ def test_html_report_on_pass(self, gtfs_fixture, tmp_path): ("invalid_ext.txt", "invalid_ext.zip", True), ], ) - def test_save(self, tmp_path, gtfs_fixture, path, final_path, warns): + def test_save(self, tmp_path, newp_gtfs_fixture, path, final_path, warns): """Test the .save() methohd of GtfsInstance().""" complete_path = os.path.join(tmp_path, path) expected_path = os.path.join(tmp_path, final_path) if warns: # catch UserWarning from invalid file extension with pytest.warns(UserWarning): - gtfs_fixture.save(complete_path) + newp_gtfs_fixture.save(complete_path) else: with does_not_raise(): - gtfs_fixture.save(complete_path, overwrite=True) + newp_gtfs_fixture.save(complete_path, overwrite=True) assert os.path.exists(expected_path), "GTFS not saved correctly" - def test_save_overwrite(self, tmp_path, gtfs_fixture): + def test_save_overwrite(self, tmp_path, newp_gtfs_fixture): """Test the .save()'s method of GtfsInstance overwrite feature.""" # original save save_pth = f"{tmp_path}/test_save.zip" - gtfs_fixture.save(save_pth, overwrite=True) + newp_gtfs_fixture.save(save_pth, overwrite=True) assert os.path.exists(save_pth), "GTFS not saved at correct path" # test saving without overwrite enabled with pytest.raises( FileExistsError, match="File already exists at path.*" ): - gtfs_fixture.save(f"{tmp_path}/test_save.zip", overwrite=False) + newp_gtfs_fixture.save( + f"{tmp_path}/test_save.zip", overwrite=False + ) # test saving with overwrite enabled raises no errors with does_not_raise(): - gtfs_fixture.save(f"{tmp_path}/test_save.zip", overwrite=True) + newp_gtfs_fixture.save(f"{tmp_path}/test_save.zip", overwrite=True) assert os.path.exists(save_pth), "GTFS save not found" @pytest.mark.parametrize( @@ -1111,14 +1161,14 @@ def test_filter_to_date(self, date, expected_len): len(gtfs.feed.stop_times) == expected_len ), "GTFS not filtered to singular date as expected" - def test_filter_to_bbox(self, gtfs_fixture): + def test_filter_to_bbox(self, newp_gtfs_fixture): """Small tests for the shallow wrapper filter_to_bbox().""" assert ( - len(gtfs_fixture.feed.stop_times) == 7765 + len(newp_gtfs_fixture.feed.stop_times) == 7765 ), "feed.stop_times is an unexpected size" - gtfs_fixture.filter_to_bbox( + newp_gtfs_fixture.filter_to_bbox( [-2.985535, 51.551459, -2.919617, 51.606077] ) assert ( - len(gtfs_fixture.feed.stop_times) == 217 + len(newp_gtfs_fixture.feed.stop_times) == 217 ), "GTFS not filtered to bbox as expected" diff --git a/tests/test_validators.py b/tests/test_validators.py index f90e084..fdc78b6 100644 --- a/tests/test_validators.py +++ b/tests/test_validators.py @@ -41,7 +41,7 @@ def test_validate_travel_between_consecutive_stops( self, gtfs_fixture, _EXPECTED_CHESTER_VALIDITY_DF ): """General tests for validating travel between consecutive stops.""" - gtfs_fixture.is_valid(far_stops=False) + gtfs_fixture.is_valid(validators={"core_validation": {}}) validate_travel_between_consecutive_stops(gtfs=gtfs_fixture) # This assertion should not contain the final row of the chester # fixture, which is created on validate_travel_over_multiple_stops() @@ -61,7 +61,7 @@ def test_validate_travel_over_multiple_stops( self, gtfs_fixture, _EXPECTED_CHESTER_VALIDITY_DF ): """General tests for validate_travel_over_multiple_stops().""" - gtfs_fixture.is_valid(far_stops=False) + gtfs_fixture.is_valid(validators={"core_validation": {}}) validate_travel_over_multiple_stops(gtfs=gtfs_fixture) pd.testing.assert_frame_equal( _EXPECTED_CHESTER_VALIDITY_DF, gtfs_fixture.validity_df