From f65bd1cf1a7132c4e84d9233e7654a5ae6da70f0 Mon Sep 17 00:00:00 2001 From: Patrick Schmitt Date: Wed, 25 Sep 2024 15:03:45 +0200 Subject: [PATCH 1/3] adapt ranking of pixels for elevation bands --- oggm/sandbox/distribute_2d.py | 54 +++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/oggm/sandbox/distribute_2d.py b/oggm/sandbox/distribute_2d.py index 19bac4a92..c1a850dd5 100644 --- a/oggm/sandbox/distribute_2d.py +++ b/oggm/sandbox/distribute_2d.py @@ -99,7 +99,9 @@ def add_smoothed_glacier_topo(gdir, outline_offset=-40, @entity_task(log, writes=['gridded_data']) def assign_points_to_band(gdir, topo_variable='glacier_topo_smoothed', - elevation_weight=1.003): + ranking_variables=None, + ranking_variables_weights=None, + ): """Assigns glacier grid points to flowline elevation bands and ranks them. Creates two variables in gridded_data.nc: @@ -123,18 +125,25 @@ def assign_points_to_band(gdir, topo_variable='glacier_topo_smoothed', topo_variable : str the topography to read from `gridded_data.nc` (could be smoothed, or smoothed differently). - elevation_weight : float - how much weight to give to the elevation of the grid point versus the - thickness. Arbitrary number, might be tuned differently. + ranking_variables : list + A list of distributed variables used for ranking pixels of each + elevation band. All variables inside of 'gridded_data' can be used (e.g. + 'topo_smoothed', 'slope', 'dis_from_border', 'distributed_thickness', + 'aspect'). Default is 'distributed_thickness'. + ranking_variables_weights : list + A list of weights, corresponding to the ranking_variables. Larger values + assign more weight to the corresponding ranking_variable. Must be in the + same order as ranking_variables! Default is [1]. """ # We need quite a few data from the gridded dataset with xr.open_dataset(gdir.get_filepath('gridded_data')) as ds: + ds_grid = ds.load() topo_data = ds[topo_variable].data.copy() glacier_mask = ds.glacier_mask.data == 1 topo_data_flat = topo_data[glacier_mask] band_index = topo_data * np.nan # container per_band_rank = topo_data * np.nan # container - distrib_thick = ds.distributed_thickness.data + weighting_per_pixel = topo_data * 0. # container # For the flowline we need the model flowlines only fls = gdir.read_pickle('model_flowlines') @@ -173,15 +182,36 @@ def assign_points_to_band(gdir, topo_variable='glacier_topo_smoothed', # assert np.nanmin(band_index) == 0 assert np.all(np.isfinite(band_index[glacier_mask])) - # Ok now assign within band using ice thickness weighted by elevation - # We rank the pixels within one band by elevation, but also add - # a penalty is added to higher elevation grid points - min_alt = np.nanmin(topo_data) - weighted_thick = ((topo_data - min_alt + 1) * elevation_weight) * distrib_thick + # Ok now we rank the pixels within each band, which variables are considered + # can be defined by the user. All variables are normalized (between 0 and 1) + # and multiplied by a weight and summed up. If the weight is negative the + # normalized variable will be inverted (0 becomes 1 and vice versa). + if ranking_variables is None: + # the default variables to use + ranking_variables = ['distributed_thickness'] + if ranking_variables_weights is None: + # the default weights to use + ranking_variables_weights = [1] + assert len(ranking_variables) == len(ranking_variables_weights) + + # normalize all values to the range of 0 to 1 + def min_max_normalization(data): + data = np.where(glacier_mask, data, np.nan) + return (data - np.nanmin(data)) / (np.nanmax(data) - np.nanmin(data)) + + for var, var_weight in zip(ranking_variables, + ranking_variables_weights): + normalized_var = min_max_normalization(ds_grid[var].data) + # for negative weights we invert the normalized variable + # (smallest becomes largest and vice versa) + if var_weight < 0: + normalized_var = 1 - normalized_var + weighting_per_pixel += normalized_var * np.abs(var_weight) + + # now rank the pixel of each band individually for band_id in np.unique(np.sort(band_index[glacier_mask])): - # We work per band here is_band = band_index == band_id - per_band_rank[is_band] = mstats.rankdata(weighted_thick[is_band]) + per_band_rank[is_band] = mstats.rankdata(weighting_per_pixel[is_band]) with ncDataset(gdir.get_filepath('gridded_data'), 'a') as nc: vn = 'band_index' From eeda03304f2eb9b8b7260bb5954231fa625a62de Mon Sep 17 00:00:00 2001 From: Patrick Schmitt Date: Thu, 26 Sep 2024 09:04:02 +0200 Subject: [PATCH 2/3] add compute_fl_diagnostics_quantiles with tests --- oggm/core/flowline.py | 77 +++++++++++++++++++++++++++++++++++++++ oggm/tasks.py | 1 + oggm/tests/test_models.py | 74 +++++++++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+) diff --git a/oggm/core/flowline.py b/oggm/core/flowline.py index f84c4d433..1cfd5bfff 100644 --- a/oggm/core/flowline.py +++ b/oggm/core/flowline.py @@ -4592,3 +4592,80 @@ def clean_merged_flowlines(gdir, buffer=None): # Finally write the flowlines gdir.write_pickle(allfls, 'model_flowlines') + + +@entity_task(log) +def compute_fl_diagnostics_quantiles(gdir, + input_filesuffixes, + quantiles=0.5, + output_filesuffix='_median', + ): + """Compute quantile fl_diagnostics (e.g. median flowline of projections) + + This function takes a number of fl_diagnostic files and compute out of them + quantiles. This could be useful for example for calculating a median + flowline for different gcm projections which use the same scenario + (e.g. ssp126). + + Parameters + ---------- + gdir : :py:class:`oggm.GlacierDirectory` + the glacier directory to process + input_filesuffixes : list + a list of fl_diagnostic filesuffixes which should be considered for + computing + quantiles : float or list + The quantiles to compute. Could be a float for a single quantile or a + list of floats for severel quantile calculations. + Default is 0.5 (the median). + output_filesuffix : str + The output_filesuffix of the resulting fl_diagnostics. + Default is '_median'. + """ + + # if quantiles is a list with one element, only use this + if isinstance(quantiles, list): + if len(quantiles) == 1: + quantiles = quantiles[0] + + # get filepath of all fl_diagnostic files + all_fl_diag_paths= [] + for filesuffix in input_filesuffixes: + all_fl_diag_paths.append(gdir.get_filepath('fl_diagnostics', + filesuffix=filesuffix)) + + # assume all have the same number of flowlines, extract the base structure + # and number of fl_ids from the first file + with xr.open_dataset(all_fl_diag_paths[0]) as ds: + ds_base = ds.load() + fl_ids = ds.flowlines.data + + # help function to open all fl_diag files as one with open_mfdataset + def add_filename_coord(ds): + filepath = ds.encoding["source"] + filename = os.path.basename(filepath).split(".")[0] + return ds.expand_dims({'filename': [filename]}) + + # now loop through all flowlines and save back in the same structure + fl_diag_dss = [] + for fl_id in fl_ids: + with xr.open_mfdataset(all_fl_diag_paths, + preprocess=lambda ds: add_filename_coord(ds), + group=f'fl_{fl_id}') as ds_fl: + with warnings.catch_warnings(): + # ignore warnings for NaNs + warnings.simplefilter("ignore", category=RuntimeWarning) + fl_diag_dss.append(ds_fl.load().quantile(quantiles, + dim='filename')) + + # for writing into the nice output structure + output_filepath = gdir.get_filepath('fl_diagnostics', + filesuffix=output_filesuffix) + encode = {} + for v in fl_diag_dss[0]: + encode[v] = {'zlib': True, 'complevel': 5} + + ds_base.to_netcdf(output_filepath, 'w') + for fl_id, ds in zip(fl_ids, fl_diag_dss): + ds.to_netcdf(output_filepath, 'a', group='fl_{}'.format(fl_id), + encoding=encode) diff --git a/oggm/tasks.py b/oggm/tasks.py index 39f931115..a261859ff 100644 --- a/oggm/tasks.py +++ b/oggm/tasks.py @@ -62,6 +62,7 @@ from oggm.core.flowline import run_from_climate_data from oggm.core.flowline import run_constant_climate from oggm.core.flowline import run_with_hydro +from oggm.core.flowline import compute_fl_diagnostics_quantiles from oggm.core.dynamic_spinup import run_dynamic_spinup from oggm.core.dynamic_spinup import run_dynamic_melt_f_calibration from oggm.utils import copy_to_basedir diff --git a/oggm/tests/test_models.py b/oggm/tests/test_models.py index 7b444a810..6e6b4fb21 100644 --- a/oggm/tests/test_models.py +++ b/oggm/tests/test_models.py @@ -3198,6 +3198,80 @@ def test_elevation_feedback(self, hef_gdir): plt.legend() plt.show() + @pytest.mark.slow + def test_fl_diag_quantiles(self, hef_gdir): + cfg.PARAMS['store_fl_diagnostics'] = True + + # conduct three runs from which to calculate the quantiles + output_suffixes = ['_run1', '_run2', '_run3'] + for i, output_suffix in enumerate(output_suffixes): + run_random_climate(hef_gdir, y0=1985-i*5, nyears=10, + output_filesuffix=output_suffix, seed=i) + + # only calculate the median + workflow.execute_entity_task(tasks.compute_fl_diagnostics_quantiles, + hef_gdir, + input_filesuffixes=output_suffixes, + quantiles=0.5, + output_filesuffix='_median' + ) + + # calculate 5th and 95th quantiles together + workflow.execute_entity_task(tasks.compute_fl_diagnostics_quantiles, + hef_gdir, + input_filesuffixes=output_suffixes, + quantiles=[0.05, 0.95], + output_filesuffix='_iqr' + ) + + ft = 'fl_diagnostics' + with xr.open_dataset( + hef_gdir.get_filepath(ft, filesuffix=output_suffixes[0])) as ds: + fl_ids = ds.flowlines.data + + for fl_id in fl_ids: + # open data of current flowline + ds_runs = [] + for output_suffix in output_suffixes: + fp = hef_gdir.get_filepath(ft, filesuffix=output_suffix) + with xr.open_dataset(fp, group=f'fl_{fl_id}') as ds: + ds_runs.append(ds.load()) + fp = hef_gdir.get_filepath(ft, filesuffix='_median') + with xr.open_dataset(fp, group=f'fl_{fl_id}') as ds: + ds_median = ds.load() + fp = hef_gdir.get_filepath(ft, filesuffix='_iqr') + with xr.open_dataset(fp, group=f'fl_{fl_id}') as ds: + ds_iqr = ds.load() + + # the median flowline should never be the smallest or largest + # value, compared to the values of the runs (as we have three runs) + variables_to_check = ['volume_m3', 'area_m2', 'thickness_m'] + for var in variables_to_check: + var_das = [] + for ds_run in ds_runs: + var_das.append(ds_run[var]) + var_stack = xr.concat(var_das, dim='runs') + + var_min = var_stack.min(dim='runs') + var_max = var_stack.max(dim='runs') + + var_median = ds_median[var] + is_median_equal_to_min = (var_median == var_min).any() + is_median_equal_to_max = (var_median == var_max).any() + + assert is_median_equal_to_min + assert is_median_equal_to_max + + # median should be larger/smaller than 5th/95th quantile + var_5th = ds_iqr.loc[{'quantile': 0.05}][var] + var_95th = ds_iqr.loc[{'quantile': 0.95}][var] + + is_median_larger_than_5th_q = (var_median >= var_5th).all() + is_median_smaller_than_95th_q = (var_median <= var_95th).all() + + assert is_median_larger_than_5th_q + assert is_median_smaller_than_95th_q + @pytest.mark.usefixtures('with_class_wd') class TestDynamicSpinup: From 3f72ea20bb9f02e438689d9626d9b41042767b42 Mon Sep 17 00:00:00 2001 From: Patrick Schmitt Date: Thu, 26 Sep 2024 09:10:22 +0200 Subject: [PATCH 3/3] pep8 --- oggm/core/flowline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/oggm/core/flowline.py b/oggm/core/flowline.py index 1cfd5bfff..40ee2ac27 100644 --- a/oggm/core/flowline.py +++ b/oggm/core/flowline.py @@ -4599,7 +4599,7 @@ def compute_fl_diagnostics_quantiles(gdir, input_filesuffixes, quantiles=0.5, output_filesuffix='_median', - ): + ): """Compute quantile fl_diagnostics (e.g. median flowline of projections) This function takes a number of fl_diagnostic files and compute out of them @@ -4629,7 +4629,7 @@ def compute_fl_diagnostics_quantiles(gdir, quantiles = quantiles[0] # get filepath of all fl_diagnostic files - all_fl_diag_paths= [] + all_fl_diag_paths = [] for filesuffix in input_filesuffixes: all_fl_diag_paths.append(gdir.get_filepath('fl_diagnostics', filesuffix=filesuffix))