diff --git a/CHANGELOG.md b/CHANGELOG.md index b3f3091..468795d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ **Under development** +- feat: add municipality information to households and activities - chore: update to `eqasim-java` commit `ece4932` - feat: vehicles and vehicle types are now always generated - feat: read vehicles data from zip files diff --git a/synthesis/output.py b/synthesis/output.py index 1c47962..84c52a3 100644 --- a/synthesis/output.py +++ b/synthesis/output.py @@ -62,23 +62,6 @@ def execute(context): output_prefix = context.config("output_prefix") output_formats = context.config("output_formats") - # Prepare households - df_households = context.stage("synthesis.population.enriched").rename( - columns = { "household_income": "income" } - ).drop_duplicates("household_id") - - df_households = df_households[[ - "household_id", - "car_availability", "bike_availability", - "number_of_vehicles", "number_of_bikes", - "income", - "census_household_id" - ]] - if "csv" in output_formats: - df_households.to_csv("%s/%shouseholds.csv" % (output_path, output_prefix), sep = ";", index = None, lineterminator = "\n") - if "parquet" in output_formats: - df_households.to_parquet("%s/%shouseholds.parquet" % (output_path, output_prefix)) - # Prepare persons df_persons = context.stage("synthesis.population.enriched").rename( columns = { "has_license": "has_driving_license" } @@ -106,9 +89,29 @@ def execute(context): df_activities["preceding_trip_index"] = df_activities["following_trip_index"].shift(1) df_activities.loc[df_activities["is_first"], "preceding_trip_index"] = -1 df_activities["preceding_trip_index"] = df_activities["preceding_trip_index"].astype(int) + # Prepare spatial data sets + df_locations = context.stage("synthesis.population.spatial.locations")[[ + "person_id", "iris_id", "commune_id","departement_id","region_id","activity_index", "geometry" + ]] + df_activities = pd.merge(df_activities, df_locations[[ + "person_id", "iris_id", "commune_id","departement_id","region_id","activity_index", "geometry" + ]], how = "left", on = ["person_id", "activity_index"]) + + # Prepare spatial activities + df_spatial = gpd.GeoDataFrame(df_activities[[ + "person_id", "household_id", "activity_index", + "iris_id", "commune_id","departement_id","region_id", + "preceding_trip_index", "following_trip_index", + "purpose", "start_time", "end_time", + "is_first", "is_last", "geometry" + ]], crs = df_locations.crs) + df_spatial = df_spatial.astype({'purpose': 'str', "departement_id": 'str'}) + + # Write activities df_activities = df_activities[[ "person_id", "household_id", "activity_index", + "iris_id", "commune_id","departement_id","region_id", "preceding_trip_index", "following_trip_index", "purpose", "start_time", "end_time", "is_first", "is_last" @@ -119,6 +122,25 @@ def execute(context): if "parquet" in output_formats: df_activities.to_parquet("%s/%sactivities.parquet" % (output_path, output_prefix)) + # Prepare households + df_households = context.stage("synthesis.population.enriched").rename( + columns = { "household_income": "income" } + ).drop_duplicates("household_id") + + df_households = pd.merge(df_households,df_activities[df_activities["purpose"] == "home"][["household_id", + "iris_id", "commune_id","departement_id","region_id"]].drop_duplicates("household_id"),how="left") + df_households = df_households[[ + "household_id","iris_id", "commune_id", "departement_id","region_id", + "car_availability", "bike_availability", + "number_of_vehicles", "number_of_bikes", + "income", + "census_household_id" + ]] + if "csv" in output_formats: + df_households.to_csv("%s/%shouseholds.csv" % (output_path, output_prefix), sep = ";", index = None, lineterminator = "\n") + if "parquet" in output_formats: + df_households.to_parquet("%s/%shouseholds.parquet" % (output_path, output_prefix)) + # Prepare trips df_trips = context.stage("synthesis.population.trips").rename( columns = { @@ -170,18 +192,7 @@ def execute(context): df_vehicle_types.to_parquet("%s/%svehicle_types.parquet" % (output_path, output_prefix)) df_vehicles.to_parquet("%s/%svehicles.parquet" % (output_path, output_prefix)) - # Prepare spatial data sets - df_locations = context.stage("synthesis.population.spatial.locations")[[ - "person_id", "activity_index", "geometry" - ]] - - df_activities = pd.merge(df_activities, df_locations[[ - "person_id", "activity_index", "geometry" - ]], how = "left", on = ["person_id", "activity_index"]) - # Write spatial activities - df_spatial = gpd.GeoDataFrame(df_activities, crs = df_locations.crs) - df_spatial["purpose"] = df_spatial["purpose"].astype(str) if "gpkg" in output_formats: path = "%s/%sactivities.gpkg" % (output_path, output_prefix) df_spatial.to_file(path, driver = "GPKG") @@ -194,7 +205,7 @@ def execute(context): df_spatial_homes = df_spatial[ df_spatial["purpose"] == "home" ].drop_duplicates("household_id")[[ - "household_id", "geometry" + "household_id","iris_id", "commune_id","departement_id","region_id", "geometry" ]] if "gpkg" in output_formats: path = "%s/%shomes.gpkg" % (output_path, output_prefix) diff --git a/synthesis/population/spatial/locations.py b/synthesis/population/spatial/locations.py index 5277fd1..2397e09 100644 --- a/synthesis/population/spatial/locations.py +++ b/synthesis/population/spatial/locations.py @@ -9,6 +9,7 @@ def configure(context): context.stage("synthesis.population.activities") context.stage("synthesis.population.sampled") + context.stage("data.spatial.iris") def execute(context): df_home = context.stage("synthesis.population.spatial.home.locations") @@ -57,4 +58,10 @@ def execute(context): assert not df_locations["geometry"].isna().any() df_locations = gpd.GeoDataFrame(df_locations, crs = df_home.crs) + # add municipalities + df_iris = context.stage("data.spatial.iris") + df_iris = gpd.GeoDataFrame(df_iris, crs = df_home.crs) + + df_locations = gpd.sjoin(df_locations,df_iris,how="left") + return df_locations diff --git a/tests/test_determinism.py b/tests/test_determinism.py index e6ca821..e2755d7 100644 --- a/tests/test_determinism.py +++ b/tests/test_determinism.py @@ -68,8 +68,8 @@ def _test_determinism(index, data_path, tmpdir): synpp.run(stages, config, working_directory = cache_path) REFERENCE_CSV_HASHES = { - "ile_de_france_activities.csv": "e520003e1876a9542ff1a955a6efcfdc", - "ile_de_france_households.csv": "709ce7ded8a2487e6691d4fb3374754b", + "ile_de_france_activities.csv": "53c44fb4026d2037729ee8ff1c8fb93f", + "ile_de_france_households.csv": "ca2a29ef13467326f937638f1ff8be1a", "ile_de_france_persons.csv": "ddbe9b418c915b14e888b54efbdf9b1e", "ile_de_france_trips.csv": "6c5f3427e41e683da768eeb53796a806", "ile_de_france_vehicle_types.csv": "00bee1ea6d7bc9af43ae6c7101dd75da", @@ -77,9 +77,9 @@ def _test_determinism(index, data_path, tmpdir): } REFERENCE_GPKG_HASHES = { - "ile_de_france_activities.gpkg": "9cf9a5fd8927c709927f7a940f86efbf", + "ile_de_france_activities.gpkg": "884eec1fd0c29904284eb4362ff89be1", "ile_de_france_commutes.gpkg": "5a4180390a69349cc655c07c5671e8d3", - "ile_de_france_homes.gpkg": "033d1aa7a5350579cbd5e8213b9736f2", + "ile_de_france_homes.gpkg": "a85e973f0e2f51031cd60170d351845e", "ile_de_france_trips.gpkg": "d0aec4033cfc184bf1b91ae13a537ef8", }