diff --git a/.github/workflows/create_test_conda_env.yml b/.github/workflows/create_test_conda_env.yml index f740e344..ce5de814 100644 --- a/.github/workflows/create_test_conda_env.yml +++ b/.github/workflows/create_test_conda_env.yml @@ -59,8 +59,8 @@ jobs: python --version $CONDA/envs/fre-cli/bin/python --version - # run pylint - pylint -ry --ignored-modules netCDF4 fre/ || echo "pylint returned non-zero exit code. preventing workflow from dying with this echo." + # run pylint, ignored modules avoid warnings arising from code internal to those modules + pylint --max-args 6 -ry --ignored-modules netCDF4,cmor fre/ || echo "pylint returned non-zero exit code. preventing workflow from dying with this echo." - name: Install Sphinx and Build Documentation run: | diff --git a/fre/cmor/README.md b/fre/cmor/README.md index 0dad659b..8f5179c6 100644 --- a/fre/cmor/README.md +++ b/fre/cmor/README.md @@ -1,83 +1,81 @@ -old usage notes at the top of `cmor_mixer.py` - -# Before start this script in common way run these 2 command in terminal where you are going to execute this script: -# module load python/3.9 -# conda activate cmor - -# another possible runs without any preparation in terminal: -# /home/san/anaconda/envs/cmor_dev/bin/python -# /app/spack/v0.15/linux-rhel7-x86_64/gcc-4.8.5/python/3.7.7-d6cyi6ophaei6arnmzya2kn6yumye2yl/bin/python - - -# How to run it (simple examples): -# ~/fms_yaml_tools/CMOR_3/cmor_mixer.py -# -d /archive/oar.gfdl.cmip6/CM4/warsaw_201710_om4_v1.0.1/CM4_1pctCO2_C/gfdl.ncrc4-intel16-prod-openmp/pp/atmos/ts/monthly/5yr -# -l /home/san/CMOR_3/GFDL-CM4_1pctCO2_C_CMOR-Amon.lst -# -r /home/san/CMOR/cmor/cmip6-cmor-tables/Tables/CMIP6_Amon.json -# -p /home/san/CMOR/cmor/Test/CMOR_input_CM4_1pctCO2_C.json - -# ~/fms_yaml_tools/cmor_mixer.py -# -d /archive/Fabien.Paulot/ESM4/H2/ESM4_amip_D1_soilC_adj/gfdl.ncrc3-intel16-prod-openmp/pp/land/ts/monthly/5yr -# -l /home/san/CMOR_3/GFDL-ESM4_amip_CMOR-landCML.lst -# -r /home/san/CMOR/cmor/cmip6-cmor-tables/Tables/CMIP6_Lmon.json -# -p /home/san/CMOR/cmor/Test/CMOR_input_ESM4_amip.json - -# ~/fms_yaml_tools/cmor_mixer.py -# -d /archive/oar.gfdl.cmip6/CM4/warsaw_201710_om4_v1.0.1/CM4_historical/gfdl.ncrc4-intel16-prod-openmp/pp/atmos/ts/monthly/5yr -# -l /home/san/CMOR_3/GFDL-CM4_historical_CMOR-Amon.lst -# -r /home/san/CMOR/cmor/cmip6-cmor-tables/Tables/Atmos_Monthly.json -# -p /home/san/CMOR/cmor/Test/CMOR_input_CM4_historical.json - -# ~/fms_yaml_tools/cmor_mixer.py -# -d /archive/oar.gfdl.cmip6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_cmip/ts/daily/5yr -# -l /home/san/CMOR_3/GFDL-ESM4_CMOR-day_historical.lst -# -r /home/san/CMOR/cmor/cmip6-cmor-tables/Tables/CMIP6_day.json -# -p /home/san/CMOR/cmor/Test/CMOR_input_ESM4_historical.json -# -o /net2/san - -# ~/fms_yaml_tools/cmor_mixer.py -# -d /archive/oar.gfdl.cmip6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos/ts/6hr/5yr -# -l /home/san/CMOR_3/GFDL-ESM4_CMOR-6hr.lst -# -r /home/san/CMOR/cmor/cmip6-cmor-tables/Tables/CMIP6_6hrPlev.json -# -p /home/san/CMOR/cmor/Test/CMOR_input_ESM4_historical.json - -# ~/fms_yaml_tools/cmor_mixer.py -# -d /archive/oar.gfdl.cmip6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_cmip/ts/3hr/5yr -# -l /home/san/CMOR_3/GFDL_ESM4_historical_CMOR-3hr.lst -# -r /home/san/CMOR/cmor/cmip6-cmor-tables/Tables/CMIP6_3hr.json -# -p /home/san/CMOR/cmor/Test/CMOR_input_ESM4_historical.json -# -o /net2/san - -# Additional tables containing in /home/san/CMIP6_work/cmor/cmip6-cmor-tables/Tables: -# CMIP6_CV.json -# CMIP6_formula_terms.json -# CMIP6_grids.json -# CMIP6_coordinate.json - -# Detailed description of program is placed at -# https://docs.google.com/document/d/1HPetcUyrVXDwCBIyWheZ_2JzOz7ZHi1y3vmIlcErYeA/edit?pli=1 - -# Keep in mind rule for input ../cmor/cmip6-cmor-tables/Tables/*.json: -# output variables can not contain "_" in out_name, though name (and standard_name) itself can have it; example: -# "alb_sfc": { -# "frequency": "mon", -# "modeling_realm": "atmos", -# "standard_name": "alb_sfc", -# "units": "percent", -# "cell_methods": "area: time: mean", -# "long_name": "surface albedo", -# "comment": "", -# "dimensions": "longitude latitude time", -# "out_name": "albsfc", -# "type": "real", -# "positive": "", -# "valid_min": "", -# "valid_max": "", -# "ok_min_mean_abs": "", -# "ok_max_mean_abs": "" -# } - -# Problems with standard CMOR library: -# - monthly variable "enth_conv_col" produces error - CMOR expects 4 dimensions but it has only 3; -# - variable /archive/oar.gfdl.cmip6/CM4/warsaw_201710_om4_v1.0.1/CM4_historical/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_cmip/ts/3hr/5yr/atmos_cmip.1965010100-1969123123.clt.nc -# is not readable. +UNDER CONSTRUCTION: old usage notes at the top of `cmor_mixer.py`, re-rigged for markdown and CMIP7. + +at PP/AN, module load the latest `fre-cli` that's been pushed to the main branch: +``` +> module load fre/canopy +> which fre + /home/fms/local/opt/fre-commands/canopy/bin/fre +``` + +alternatively, with access to conda: +``` +> conda activate /nbhome/fms/conda/envs/fre-cli +> which fre + /nbhome/fms/conda/envs/fre-cli/bin/fre +``` + +this subtool's help, and command-specific `run` help: +``` +> fre cmor --help + Usage: fre cmor [OPTIONS] COMMAND [ARGS]... + + - access fre cmor subcommands + + Options: + --help Show this message and exit. + + Commands: + run Rewrite climate model output + + +# subtool command-specific help, e.g. for run +> fre cmor run --help + Usage: fre cmor run [OPTIONS] + + Rewrite climate model output + + Options: + -d, --indir TEXT Input directory [required] + -l, --varlist TEXT Variable list [required] + -r, --table_config TEXT Table configuration [required] + -p, --exp_config TEXT Experiment configuration [required] + -o, --outdir TEXT Output directory [required] + --help Show this message and exit. +``` + + +the tool requires configuration in the form of variable tables and conventions to work appropriately +clone the following repository and list the following directory contents to get a sense of what +the code needs from you to work. a few examples shown in the output below. the CV file is of particular interest/necessity +``` +> git clone https://github.com/PCMDI/cmip6-cmor-tables.git fre/tests/test_files/cmip6-cmor-tables +> ls fre/tests/test_files/cmip6-cmor-tables/Tables +... + CMIP6_CV.json + CMIP6_formula_terms.json + CMIP6_grids.json + CMIP6_coordinate.json + CMIP6_input_example.json +... + CMIP6_3hr.json +... + CMIP6_Efx.json +... + CMIP6_IyrGre.json +... +``` + + +Simple example call(s) using fre-cli in the root directory of this repository note the line-continuation character at the end for readability, +you may wish to avoid it when copy/pasting. +``` +> fre cmor run \ + -d fre/tests/test_files/ocean_sos_var_file \ + -l fre/tests/test_files/varlist \ + -r fre/tests/test_files/cmip6-cmor-tables/Tables/CMIP6_Omon.json \ + -p fre/tests/test_files/CMOR_input_example.json \ + -o fre/tests/test_files/outdir +``` + + + diff --git a/fre/cmor/cmor_mixer.py b/fre/cmor/cmor_mixer.py index 988cdd76..dd0556f2 100755 --- a/fre/cmor/cmor_mixer.py +++ b/fre/cmor/cmor_mixer.py @@ -1,42 +1,52 @@ -#!/usr/bin/env python ''' -see README.md for CMORmixer.py usage +python module housing the metadata processing routines utilizing the cmor module, in addition to +click API entry points +see README.md for additional information on `fre cmor run` (cmor_mixer.py) usage ''' -# TODO : reconcile 'lst' variable names with 'list' in variable names -# as this is confusing to read and ambiguous to interpret -# probably good to avoid the word 'list' in the names -# variable ierr is unused... what is it and what does it do? -# commented out until further investigation done - import os import json +import subprocess +from pathlib import Path import netCDF4 as nc import click import cmor +# ----- \start consts + +# ----- \end consts + +### ------ helper functions ------ ### def copy_nc(in_nc, out_nc): - ''' copy a net-cdf file, presumably ''' - print('\n\n----- START copy_nc call -----') + ''' + copy target input netcdf file in_nc to target out_nc. I have to think this is not a trivial copy + operation, as if it were, using shutil's copy would be sufficient. accepts two arguments + in_nc: string, path to an input netcdf file we wish to copy + out_nc: string, an output path to copy the targeted input netcdf file to + ''' print(f'(copy_nc) in_nc: {in_nc}') print(f'(copy_nc) out_nc: {out_nc}') + # input file dsin = nc.Dataset(in_nc) - # output file - dsout = nc.Dataset(out_nc, - "w") #, format = "NETCDF3_CLASSIC") + # output file, same exact data_model as input file. + # note- totally infuriating... + # the correct value for the format arg is netCDF4.Dataset.data_model + # and NOT netCDF4.Dataset.disk_format + dsout = nc.Dataset(out_nc, "w", + format = dsin.data_model) #Copy dimensions for dname, the_dim in dsin.dimensions.items(): - dsout.createDimension(dname, len(the_dim) if not the_dim.isunlimited() else None) + dsout.createDimension( dname, + len(the_dim) if not the_dim.isunlimited() else None ) - # Copy variables + # Copy variables and attributes for v_name, varin in dsin.variables.items(): out_var = dsout.createVariable(v_name, varin.datatype, varin.dimensions) - # Copy variable attributes out_var.setncatts({k: varin.getncattr(k) for k in varin.ncattrs()}) out_var[:] = varin[:] dsout.setncatts({a:dsin.getncattr(a) for a in dsin.ncattrs()}) @@ -44,365 +54,543 @@ def copy_nc(in_nc, out_nc): # close up dsin.close() dsout.close() - print('----- END copy_nc call -----\n\n') -def netcdf_var (proj_table_vars, var_lst, nc_fl, gfdl_var, - cmip_input_json, cmor_table_vars_file): - ''' PLACEHOLDER DESCRIPTION ''' - print('\n\n----- START netcdf_var call -----') +def get_var_filenames(indir, var_filenames = None): + ''' + appends files ending in .nc located within indir to list var_filenames accepts two arguments + indir: string, representing a path to a directory containing files ending in .nc extension + var_filenames: list of strings, empty or non-empty, to append discovered filenames to. the + object pointed to by the reference var_filenames is manipulated, and so need + not be returned. + ''' + if var_filenames is None: + var_filenames = [] + var_filenames_all = os.listdir(indir) + print(f'(get_var_filenames) var_filenames_all={var_filenames_all}') + for var_file in var_filenames_all: + if var_file.endswith('.nc'): + var_filenames.append(var_file) + #print(f"(get_var_filenames) var_filenames = {var_filenames}") + if len(var_filenames) < 1: + raise ValueError(f'target directory had no files with .nc ending. indir =\n {indir}') + var_filenames.sort() - # NetCDF all time periods - var_j = var_lst[gfdl_var] - print( "(netcdf_var) input data: " ) - print(f"(netcdf_var) var_lst = {var_lst}" ) - print(f"(netcdf_var) nc_fl = {nc_fl}" ) - print(f"(netcdf_var) gfdl_var = {gfdl_var} ==> {var_j}" ) - # open the input file - ds = nc.Dataset(nc_fl,'a') +def get_iso_datetimes(var_filenames, iso_datetime_arr = None): + ''' + appends iso datetime strings found amongst filenames to iso_datetime_arr. + var_filenames: non-empty list of strings representing filenames. some of which presumably + contain datetime strings + iso_datetime_arr: list of strings, empty or non-empty, representing datetimes found in + var_filenames entries. the objet pointed to by the reference + iso_datetime_arr is manipulated, and so need-not be returned + ''' + if iso_datetime_arr is None: + iso_datetime_arr = [] + for filename in var_filenames: + iso_datetime = filename.split(".")[1] + if iso_datetime not in iso_datetime_arr: + iso_datetime_arr.append( + filename.split(".")[1] ) + iso_datetime_arr.sort() + #print(f"(get_iso_datetimes) Available dates: {iso_datetime_arr}") + if len(iso_datetime_arr) < 1: + raise ValueError('ERROR: iso_datetime_arr has length 0!') - # determine the vertical dimension - vert_dim=0#vert_dim = None - for name, variable in ds.variables.items(): - if name == gfdl_var: - dims = variable.dimensions - for dim in dims: - if ds[dim].axis and ds[dim].axis == "Z": - vert_dim = dim - print(f"(netcdf_var) Vertical dimension: {vert_dim}") +def check_dataset_for_ocean_grid(ds): + ''' + checks netCDF4.Dataset ds for ocean grid origin, and throws an error if it finds one. accepts + one argument. this function has no return. + ds: netCDF4.Dataset object containing variables with associated dimensional information. + ''' + #print(f'(check_dataset_for_ocean_grid) {ds}') + #print(f'(check_dataset_for_ocean_grid) {ds.variables}') + #print(f'(check_dataset_for_ocean_grid) {ds.variables.keys()}') + if "xh" in list(ds.variables.keys()): + raise NotImplementedError( + "'xh' found in var_list. ocean grid req'd but not yet unimplemented. stop.") - # initialize CMOR - cmor.setup() +def get_vertical_dimension(ds,target_var): + ''' + determines the vertical dimensionality of target_var within netCDF4 Dataset ds. accepts two + arguments and returns an object represnting the vertical dimensions assoc with the target_var. + ds: netCDF4.Dataset object containing variables with associated dimensional information. + target_var: string, representating a variable contained within the netCDF4.Dataset ds - # read experiment configuration file - cmor.dataset_json(cmip_input_json) - print(f"(netcdf_var) cmip_input_json = {cmip_input_json}") - print(f"(netcdf_var) cmor_table_vars_file = {cmor_table_vars_file}") + ''' + vert_dim = 0 + for name, variable in ds.variables.items(): + # not the var we are looking for? move on. + if name != target_var: + continue + dims = variable.dimensions + for dim in dims: + # if it is not a vertical axis, move on. + if not (ds[dim].axis and ds[dim].axis == "Z"): + continue + vert_dim = dim + return vert_dim + + +def create_tmp_dir(outdir): + ''' + creates a tmp_dir based on targeted output directory root. returns the name of the tmp dir. + accepts one argument: + outdir: string, representing the final output directory root for the cmor modules netcdf + file output. tmp_dir will be slightly different depending on the output directory + targeted + ''' + print(f"(cmorize_target_var_files) outdir = {outdir}") + tmp_dir = None + if any( [ outdir == "/local2", + outdir.find("/work") != -1, + outdir.find("/net" ) != -1 ] ): + print(f'(cmorize_target_var_files) using /local /work /net ( tmp_dir = {outdir}/ )') + tmp_dir = "{outdir}/" + else: + print('(cmorize_target_var_files) NOT using /local /work /net (tmp_dir = outdir/tmp/ )') + tmp_dir = f"{outdir}/tmp/" + try: + os.makedirs(tmp_dir, exist_ok=True) + except Exception as exc: + raise OSError('problem creating temp output directory. stop.') from exc + return tmp_dir - # load variable list (CMOR table) - cmor.load_table(cmor_table_vars_file) - var_list = list(ds.variables.keys()) - print(f"(netcdf_var) list of variables: {var_list}") - # Define lat and lon dimensions - # Assume input file is lat/lon grid - if "xh" in var_list: - raise NotImplementedError( - "'xh' found in var_list. ocean grid req'd but not yet unimplemented. stop.") - # read the input units - var = ds[gfdl_var][:] - var_dim = len(var.shape) +### ------ BULK ROUTINES ------ ### +def rewrite_netcdf_file_var ( proj_table_vars = None, + local_var = None, + netcdf_file = None, + target_var = None, + json_exp_config = None, + json_table_config = None):#, tmp_dir = None ): + ''' rewrite the input netcdf file nc_fl containing target_var in a CMIP-compliant manner. + ''' + print('\n\n-------------------------- START rewrite_netcdf_file_var call -----') + print( "(rewrite_netcdf_file_var) input data: " ) + print(f"(rewrite_netcdf_file_var) local_var = {local_var}" ) + print(f"(rewrite_netcdf_file_var) target_var = {target_var}") - # Check var_dim, vert_dim - if var_dim not in [3, 4]: - raise ValueError(f"var_dim == {var_dim} != 3 nor 4. stop.") - if var_dim == 4 and vert_dim not in [ "plev30", "plev19", "plev8", - "height2m", "level", "lev", "levhalf"] : - raise ValueError(f'var_dim={var_dim}, vert_dim = {vert_dim} is not supported') + # open the input file + print(f"(rewrite_netcdf_file_var) opening {netcdf_file}" ) + ds = nc.Dataset(netcdf_file,'a') + + # ocean grids are not implemented yet. + print( '(rewrite_netcdf_file_var) checking input netcdf file for oceangrid condition') + check_dataset_for_ocean_grid(ds) - print(f"(netcdf_var) var_dim = {var_dim}, var_lst[gfdl_var] = {var_j}") - print(f"(netcdf_var) gfdl_var = {gfdl_var}") - units = proj_table_vars["variable_entry"] [var_j] ["units"] - #units = proj_table_vars["variable_entry"] [gfdl_var] ["units"] - print(f"(netcdf_var) var_dim = {var_dim}, units={units}") - # "figure out the names of this dimension names programmatically !!!" + # figure out the dimension names programmatically TODO + # Define lat and lon dimensions + # Assume input file is lat/lon grid lat = ds["lat"][:] lon = ds["lon"][:] lat_bnds = ds["lat_bnds"][:] lon_bnds = ds["lon_bnds"][:] - cmor_lat = cmor.axis("latitude", coord_vals = lat, cell_bounds = lat_bnds, units = "degrees_N") - cmor_lon = cmor.axis("longitude", coord_vals = lon, cell_bounds = lon_bnds, units = "degrees_E") - # Define time and time_bnds dimensions + ## Define time #time = ds["time"][:] + + # read in time_coords + units time_coords = ds["time"][:] time_coord_units = ds["time"].units + print(f"(rewrite_netcdf_file_var) time_coord_units = {time_coord_units}") + + # read in time_bnds , if present time_bnds = [] - print(f"(netcdf_var) time_coord_units = {time_coord_units}") - print(f"(netcdf_var) time_bnds = {time_bnds}") try: - print( f"(netcdf_var) Executing cmor.axis('time', \n" - f"(netcdf_var) coord_vals = \n{time_coords}, \n" - f"(netcdf_var) cell_bounds = {time_bnds}, units = {time_coord_units}) " ) - time_bnds = ds["time_bnds"][:] + #print(f"(rewrite_netcdf_file_var) time_bnds = {time_bnds}") + except ValueError: + print( "(rewrite_netcdf_file_var) WARNING grabbing time_bnds didnt work... moving on") + + + + + + # read the input... units? + var = ds[target_var][:] + + + # determine the vertical dimension by looping over netcdf variables + vert_dim = get_vertical_dimension(ds,target_var) #0#vert_dim = None + print(f"(rewrite_netcdf_file_var) Vertical dimension of {target_var}: {vert_dim}") + + + # Check var_dim, vert_dim + var_dim = len(var.shape) + if var_dim not in [3, 4]: + raise ValueError(f"var_dim == {var_dim} != 3 nor 4. stop.") + + # check for vert_dim error condition. if pass, assign lev for later use. + lev = None + if var_dim == 4: + if vert_dim not in [ "plev30", "plev19", "plev8", + "height2m", "level", "lev", "levhalf"] : + raise ValueError(f'var_dim={var_dim}, vert_dim = {vert_dim} is not supported') + lev = ds[vert_dim] + + print(f"(rewrite_netcdf_file_var) var_dim = {var_dim}, local_var = {local_var}") + + + + # now we set up the cmor module object + # initialize CMOR + cmor.setup( + netcdf_file_action = cmor.CMOR_PRESERVE, + set_verbosity = cmor.CMOR_QUIET, #default is CMOR_NORMAL + exit_control = cmor.CMOR_NORMAL, + logfile = None, + create_subdirectories = 1 + ) + + # read experiment configuration file + cmor.dataset_json(json_exp_config) + print(f"(rewrite_netcdf_file_var) json_exp_config = {json_exp_config}") + print(f"(rewrite_netcdf_file_var) json_table_config = {json_table_config}") + + # load variable list (CMOR table) + cmor.load_table(json_table_config) + + #units = proj_table_vars["variable_entry"] [local_var] ["units"] + units = proj_table_vars["variable_entry"] [target_var] ["units"] + print(f"(rewrite_netcdf_file_var) units={units}") + + cmor_lat = cmor.axis("latitude", coord_vals = lat, cell_bounds = lat_bnds, units = "degrees_N") + cmor_lon = cmor.axis("longitude", coord_vals = lon, cell_bounds = lon_bnds, units = "degrees_E") + try: + print( f"(rewrite_netcdf_file_var) Executing cmor.axis('time', \n" + f"(rewrite_netcdf_file_var) coord_vals = \n{time_coords}, \n" + f"(rewrite_netcdf_file_var) cell_bounds = time_bnds, units = {time_coord_units}) ") cmor_time = cmor.axis("time", coord_vals = time_coords, cell_bounds = time_bnds, units = time_coord_units) #cmor_time = cmor.axis("time", coord_vals = time_coords, units = time_coord_units) except ValueError as exc: - print(f"(netcdf_var) WARNING exception raised... exc={exc}") - print( "(netcdf_var) grabbing time_bnds didnt work... trying without it") - print( "(netcdf_var) cmor_time = cmor.axis('time', " + print(f"(rewrite_netcdf_file_var) WARNING exception raised... exc={exc}") + print( "(rewrite_netcdf_file_var) cmor_time = cmor.axis('time', " "coord_vals = time_coords, units = time_coord_units)") cmor_time = cmor.axis("time", coord_vals = time_coords, units = time_coord_units) - # Set the axes + # initializations save_ps = False ps = None - #ierr = None + ierr_ap, ierr_b = None, None ips = None + + # set axes for 3-dim case if var_dim == 3: axes = [cmor_time, cmor_lat, cmor_lon] - print(f"(netcdf_var) axes = {axes}") - + print(f"(rewrite_netcdf_file_var) axes = {axes}") + # set axes for 4-dim case elif var_dim == 4: if vert_dim in ["plev30", "plev19", "plev8", "height2m"]: - lev = ds[vert_dim] cmor_lev = cmor.axis( vert_dim, coord_vals = lev[:], units = lev.units ) - axes = [cmor_time, cmor_lev, cmor_lat, cmor_lon] - - elif vert_dim in ["level", "lev"]: - lev = ds[vert_dim] + elif vert_dim in ["level", "lev", "levhalf"]: # find the ps file nearby - ps_file = nc_fl.replace(f'.{gfdl_var}.nc', '.ps.nc') + ps_file = netcdf_file.replace(f'.{target_var}.nc', '.ps.nc') ds_ps = nc.Dataset(ps_file) - ps = ds_ps['ps'][:] - - cmor_lev = cmor.axis("alternate_hybrid_sigma", - coord_vals = lev[:], units = lev.units, - cell_bounds = ds[vert_dim+"_bnds"] ) - axes = [cmor_time, cmor_lev, cmor_lat, cmor_lon] - #ierr = cmor.zfactor( zaxis_id = cmor_lev, - # zfactor_name = "ap", - # axis_ids = [cmor_lev, ], - # zfactor_values = ds["ap"][:], - # zfactor_bounds = ds["ap_bnds"][:], - # units = ds["ap"].units ) - #ierr = cmor.zfactor( zaxis_id = cmor_lev, - # zfactor_name = "b", - # axis_ids = [cmor_lev, ], - # zfactor_values = ds["b"][:], - # zfactor_bounds = ds["b_bnds"][:], - # units = ds["b"].units ) - ips = cmor.zfactor( zaxis_id = cmor_lev, + ps = ds_ps['ps'][:].copy() + ds_ps.close() + + # assign lev_half specifics + if vert_dim == "lev_half": + ierr_ap = cmor.zfactor( zaxis_id = cmor_lev, + zfactor_name = "ap_half", + axis_ids = [cmor_lev, ], + zfactor_values = ds["ap_bnds"][:], + units = ds["ap_bnds"].units ) + ierr_b = cmor.zfactor( zaxis_id = cmor_lev, + zfactor_name = "b_half", + axis_ids = [cmor_lev, ], + zfactor_values = ds["b_bnds"][:], + units = ds["b_bnds"].units ) + cmor_lev = cmor.axis( "alternate_hybrid_sigma_half", + coord_vals = lev[:], + units = lev.units ) + else: + ierr_ap = cmor.zfactor( zaxis_id = cmor_lev, + zfactor_name = "ap", + axis_ids = [cmor_lev, ], + zfactor_values = ds["ap"][:], + zfactor_bounds = ds["ap_bnds"][:], + units = ds["ap"].units ) + ierr_b = cmor.zfactor( zaxis_id = cmor_lev, + zfactor_name = "b", + axis_ids = [cmor_lev, ], + zfactor_values = ds["b"][:], + zfactor_bounds = ds["b_bnds"][:], + units = ds["b"].units ) + cmor_lev = cmor.axis( "alternate_hybrid_sigma", + coord_vals = lev[:], + units = lev.units, + cell_bounds = ds[vert_dim+"_bnds"] ) + + print(f'(rewrite_netcdf_file_var) ierr_ap after calling cmor_zfactor: {ierr_ap}') + print(f'(rewrite_netcdf_file_var) ierr_b after calling cmor_zfactor: {ierr_b}') + ips = cmor.zfactor( zaxis_id = cmor_lev, zfactor_name = "ps", - axis_ids = [cmor_time, cmor_lat, cmor_lon], - units = "Pa" ) + axis_ids = [cmor_time, cmor_lat, cmor_lon], + units = "Pa" ) save_ps = True + # assign axes at end of 4-dim case + axes = [cmor_time, cmor_lev, cmor_lat, cmor_lon] - elif vert_dim == "levhalf": - lev = ds[vert_dim] - - # find the ps file nearby - ps_file = nc_fl.replace(f'.{gfdl_var}.nc', '.ps.nc') - ds_ps = nc.Dataset(ps_file) - ps = ds_ps['ps'][:] - - #print("Calling cmor.zfactor, len,vals = ",lev.shape,",",lev[:]) - cmor_lev = cmor.axis("alternate_hybrid_sigma_half", - coord_vals = lev[:], units = lev.units ) - axes = [cmor_time, cmor_lev, cmor_lat, cmor_lon] - #ierr = cmor.zfactor( zaxis_id = cmor_lev, - # zfactor_name = "ap_half", - # axis_ids = [cmor_lev, ], - # zfactor_values = ds["ap_bnds"][:], - # units = ds["ap_bnds"].units ) - #ierr = cmor.zfactor( zaxis_id = cmor_lev, - # zfactor_name = "b_half", - # axis_ids = [cmor_lev, ], - # zfactor_values = ds["b_bnds"][:], - # units = ds["b_bnds"].units ) - ips = cmor.zfactor( zaxis_id = cmor_lev, - zfactor_name = "ps", - axis_ids = [cmor_time, cmor_lat, cmor_lon], - units = "Pa" ) - save_ps = True - # read the positive attribute - var = ds[gfdl_var][:] - positive = proj_table_vars["variable_entry"] [var_j] ["positive"] - print(f"(netcdf_var) var_lst[{gfdl_var}] = {var_j}, positive = {positive}") + # read positive attribute and create cmor_var? can this return none? TODO + positive = proj_table_vars["variable_entry"] [target_var] ["positive"] + print(f"(rewrite_netcdf_file_var) positive = {positive}") + cmor_var = cmor.variable(target_var, units, axes, positive = positive) # Write the output to disk - #cmor_var = cmor.variable(var_lst[gfdl_var], units, axes) - cmor_var = cmor.variable(var_j, units, axes, positive = positive) + #var = ds[target_var][:] #was this ever needed? why? cmor.write(cmor_var, var) if save_ps: - if ips is not None and ps is not None: - cmor.write(ips, ps, store_with = cmor_var) + if any( [ ips is None, ps is None ] ): + print( 'WARNING: ps or ips is None!, but save_ps is True!') + print(f'ps = {ps}, ips = {ips}') + print( 'skipping ps writing!') else: - print('WARNING: ps or ips is None!') - print(f'ps = {ps}') - print(f'ips = {ips}') - filename = cmor.close(cmor_var, file_name = True) - print(f"(netcdf_var) filename = {filename}") - cmor.close() - - print('----- END netcdf_var call -----\n\n') + cmor.write(ips, ps, store_with = cmor_var) + cmor.close(ips, file_name = True, preserve = False) + filename = cmor.close(cmor_var, file_name = True, preserve = False) + print(f"(rewrite_netcdf_file_var) returned by cmor.close: filename = {filename}") + #cmor.close() + ds.close() + + print('-------------------------- END rewrite_netcdf_file_var call -----\n\n') return filename -def gfdl_to_pcmdi_var( proj_table_vars, var_lst, dir2cmor, gfdl_var, iso_datetime_arr, - cmip_input_json, cmor_table_vars_file, cmip_output, name_of_set ): - ''' processes a target directory/file ''' - print('\n\n----- START gfdl_to_pcmdi_var call -----') - #print( "(gfdl_to_pcmdi_var) GFDL Variable : PCMDI Variable ") - - print(f"(gfdl_to_pcmdi_var) (gfdl_var:var_lst[gfdl_var]) => {gfdl_var}:{var_lst[gfdl_var]}") - print(f"(gfdl_to_pcmdi_var) Processing Directory/File: {gfdl_var}") - # why is nc_fls an empty dict here? see below line - nc_fls = {} - - print(f"(gfdl_to_pcmdi_var) cmip_output = {cmip_output}") - if any( [ cmip_output == "/local2", - cmip_output.find("/work") != -1, - cmip_output.find("/net" ) != -1 ] ): - print('(gfdl_to_pcmdi_var) using /local /work /net ( tmp_dir = cmip_output/ )') - tmp_dir = "{cmip_output}/" - else: - print('(gfdl_to_pcmdi_var) NOT using /local /work /net (tmp_dir = cmip_output/tmp/ )') - tmp_dir = f"{cmip_output}/tmp/" - try: - os.makedirs(tmp_dir, exist_ok=True) - except Exception as exc: - raise OSError('problem creating temp output directory. stop.') from exc - print(f'(gfdl_to_pcmdi_var) will use tmp_dir={tmp_dir}') +def cmorize_target_var_files( indir = None, target_var = None, local_var = None, + iso_datetime_arr = None, name_of_set = None, + json_exp_config = None, outdir = None, + proj_table_vars = None, json_table_config = None ): + ''' processes a target directory/file + this routine is almost entirely exposed data movement before/after calling + rewrite_netcdf_file_var it is also the most hopelessly opaque routine in this entire dang macro. + this badboy right here accepts... lord help us... !!!NINE!!! arguments, NINE. + indir: string, path to target directories containing netcdf files to cmorize + target_var: string, name of variable inside the netcdf file to cmorize + local_var: string, value of the variable name in the filename, right before the .nc + extension. often identical to target_var but not always. + iso_datetime_arr: list of strings, each one a unique ISO datetime string found in targeted + netcdf filenames + name_of_set: string, representing the post-processing component (GFDL convention) of the + targeted files. + json_exp_config: see cmor_run_subtool arg desc + outdir: string, path to output directory root to move the cmor module output to, including + the whole directory structure + proj_table_vars: an opened json file object, read from json_table_config + json_table_config: see cmor_run_subtool arg desc + + ''' + print('\n\n-------------------------- START cmorize_target_var_files call -----') + print(f"(cmorize_target_var_files) local_var = {local_var} to be used for file-targeting.") + print(f"(cmorize_target_var_files) target_var = {target_var} to be used for reading the data " + "from the file") + print(f"(cmorize_target_var_files) outdir = {outdir}") + + + #determine a tmp dir for working on files. + tmp_dir = create_tmp_dir( outdir ) + print(f'(cmorize_target_var_files) will use tmp_dir={tmp_dir}') + + print("\n\n==== begin (???) mysterious file movement ====================================") # loop over sets of dates, each one pointing to a file - #for i in range(len(iso_datetime_arr)): + nc_fls = {} for i, iso_datetime in enumerate(iso_datetime_arr): - print("\n\n==== begin (???) mysterious file movement ====================================") + # why is nc_fls a filled list/array/object thingy here? see above line - nc_fls[i] = f"{dir2cmor}/{name_of_set}.{iso_datetime}.{gfdl_var}.nc" - if not os.path.exists(nc_fls[i]): - print (f"(gfdl_to_pcmdi_var) input file(s) {nc_fls[i]} does not exist. Moving on.") - continue #return # return? continue. - - # create a copy of the input file in the work directory - nc_file_work = f"{tmp_dir}{name_of_set}.{iso_datetime}.{gfdl_var}.nc" - print(f"(gfdl_to_pcmdi_var) nc_file_work = {nc_file_work}") + #nc_fls[i] = f"{indir}/{name_of_set}.{iso_datetime}.{target_var}.nc" + nc_fls[i] = f"{indir}/{name_of_set}.{iso_datetime}.{local_var}.nc" + print(f"(cmorize_target_var_files) input file = {nc_fls[i]}") + if not Path(nc_fls[i]).exists(): + print ("(cmorize_target_var_files) input file(s) not found. Moving on.") + continue + + + # create a copy of the input file with local var name into the work directory + #nc_file_work = f"{tmp_dir}{name_of_set}.{iso_datetime}.{target_var}.nc" + nc_file_work = f"{tmp_dir}{name_of_set}.{iso_datetime}.{local_var}.nc" + print(f"(cmorize_target_var_files) nc_file_work = {nc_file_work}") copy_nc( nc_fls[i], nc_file_work) - # copy ps also, if it's there - nc_ps_file_work = '' - nc_ps_file = nc_fls[i].replace(f'.{gfdl_var}.nc', '.ps.nc') - if os.path.exists(nc_ps_file): - print(f"(gfdl_to_pcmdi_var) nc_ps_file = {nc_ps_file}") - nc_ps_file_work = nc_file_work.replace(f'.{gfdl_var}.nc', '.ps.nc') - print(f"(gfdl_to_pcmdi_var) nc_ps_file_work = {nc_ps_file_work}") + # if the ps file exists, we'll copy it to the work directory too + nc_ps_file = nc_fls[i].replace(f'.{local_var}.nc', '.ps.nc') + nc_ps_file_work = nc_file_work.replace(f'.{local_var}.nc', '.ps.nc') + if Path(nc_ps_file).exists(): + print(f"(cmorize_target_var_files) nc_ps_file_work = {nc_ps_file_work}") copy_nc(nc_ps_file, nc_ps_file_work) - # main CMOR actions: - print ("(gfdl_to_pcmdi_var) calling netcdf_var()") - local_file_name = netcdf_var(proj_table_vars, var_lst, nc_file_work, gfdl_var, - cmip_input_json, cmor_table_vars_file) - filename = f"{cmip_output}{cmip_output[:cmip_output.find('/')]}/{local_file_name}" - print(f"(gfdl_to_pcmdi_var) source file = {nc_fls[i]}") - print(f"(gfdl_to_pcmdi_var) filename = {filename}") - - filedir = filename[:filename.rfind("/")] - print(f"(gfdl_to_pcmdi_var) filedir = {filedir}") + # now we have a file in our targets, point CMOR to the configs and the input file(s) + print ("(cmorize_target_var_files) calling rewrite_netcdf_file_var") + gotta_go_back_here=os.getcwd()+'/' + os.chdir(gotta_go_back_here+tmp_dir) # this is unavoidable, cmor module FORCES write to CWD + local_file_name = rewrite_netcdf_file_var( proj_table_vars , + local_var , + gotta_go_back_here + nc_file_work , + target_var , + gotta_go_back_here + json_exp_config , + gotta_go_back_here + json_table_config)#, +# gotta_go_back_here + tmp_dir ) + os.chdir(gotta_go_back_here) + assert Path( gotta_go_back_here+tmp_dir+local_file_name ).exists() + #assert False + + # now that CMOR has rewritten things... we can take our post-rewriting actions + # the final output filename will be... + print(f'(cmorize_target_var_files) local_file_name={local_file_name}') + filename =f"{outdir}/{local_file_name}" + print(f"(cmorize_target_var_files) filename = {filename}") + + # the final output file directory will be... + filedir = Path(filename).parent + print(f"(cmorize_target_var_files) filedir = {filedir}") try: + print(f'(cmorize_target_var_files) attempting to create filedir={filedir}') os.makedirs(filedir) except FileExistsError: - print(f'(gfdl_to_pcmdi_var) WARNING: directory {filedir} already exists!') + print(f'(cmorize_target_var_files) WARNING: directory {filedir} already exists!') # hmm.... this is making issues for pytest - mv_cmd = f"mv {os.getcwd()}/{local_file_name} {filedir}" - print(f"(gfdl_to_pcmdi_var) mv_cmd = {mv_cmd}") - os.system(mv_cmd) + mv_cmd = f"mv {tmp_dir}{local_file_name} {filedir}" + print(f"(cmorize_target_var_files) moving files...\n {mv_cmd}") + subprocess.run(mv_cmd, shell=True, check=True) + # ------ refactor this into function? TODO + # ------ what is the use case for this logic really?? filename_no_nc = filename[:filename.rfind(".nc")] chunk_str = filename_no_nc[-6:] if not chunk_str.isdigit(): + print(f'(cmorize_target_var_files) WARNING: chunk_str is not a digit: ' + f'chunk_str = {chunk_str}') filename_corr = "{filename[:filename.rfind('.nc')]}_{iso_datetime}.nc" mv_cmd = f"mv {filename} {filename_corr}" - print(f"(gfdl_to_pcmdi_var) mv_cmd = {mv_cmd}") - os.system(mv_cmd) - - print("====== end (???) mysterious file movement ====================================\n\n") - - if os.path.exists(nc_file_work): - print(f'(gfdl_to_pcmdi_var) removing: nc_file_work={nc_file_work}') - os.remove(nc_file_work) - if os.path.exists(nc_ps_file_work): - print(f'(gfdl_to_pcmdi_var) removing: nc_ps_file_work={nc_ps_file_work}') - os.remove(nc_ps_file_work) - - print('----- END var2process call -----\n\n') - - - - -def cmor_run_subtool( indir = None, varlist = None, - table_config = None, exp_config = None , outdir = None): - ''' primary steering function for the cmor_mixer tool, i.e - essentially main ''' - print('\n\n----- START _cmor_run_subtool call -----') - + print(f"(cmorize_target_var_files) moving files, strange chunkstr logic...\n {mv_cmd}") + subprocess.run(mv_cmd, shell=True, check=True) + # ------ end refactor this into function? + + # delete files in work dirs + if Path(nc_file_work).exists(): + Path(nc_file_work).unlink() + + if Path(nc_ps_file_work).exists(): + Path(nc_ps_file_work).unlink() + + + + + +def cmor_run_subtool( indir = None, + json_var_list = None, + json_table_config = None, + json_exp_config = None , + outdir = None): + ''' + primary steering function for the cmor_mixer tool, i.e essentially main. Accepts five args: + indir: string, directory containing netCDF files. keys specified in json_var_list are local + variable names used for targeting specific files + json_var_list: string, path pointing to a json file containing directory of key/value + pairs. the keys are the "local" names used in the filename, and the + values pointed to by those keys are strings representing the name of the + variable contained in targeted files. the key and value are often the same, + but it is not required. + json_table_config: json file containing CMIP-compliant per-variable/metadata for specific + MIP table. The MIP table can generally be identified by the specific + filename (e.g. "Omon") + json_exp_config: json file containing other configuration details (FILL IN TO DO #TODO) + outdir: string, directory root that will contain the full output and output directory + structure generated by the cmor module upon request. + ''' + if None in [indir, json_var_list, json_table_config, json_exp_config, outdir]: + raise ValueError(f'all input arguments are required!\n' + '[indir, json_var_list, json_table_config, json_exp_config, outdir] = \n' + f'[{indir}, {json_var_list}, {json_table_config}, ' + '{json_exp_config}, {outdir}]' ) # open CMOR table config file + print('(cmor_run_subtool) getting table variables from json_table_config') try: - proj_table_vars = json.load( open( table_config, "r", - encoding = "utf-8" ) ) + with open( json_table_config, "r", encoding = "utf-8") as table_config_file: + proj_table_vars=json.load(table_config_file) + except Exception as exc: raise FileNotFoundError( - f'ERROR: table_config file cannot be opened.\n' - f' table_config = {table_config}' ) from exc + f'ERROR: json_table_config file cannot be opened.\n' + f' json_table_config = {json_table_config}' ) from exc # open input variable list + print('(cmor_run_subtool) opening variable list json_var_list') try: - gfdl_var_lst = json.load( open( varlist, "r", - encoding = "utf-8" ) ) + with open( json_var_list, "r", encoding = "utf-8" ) as var_list_file: + var_list = json.load( var_list_file ) + except Exception as exc: raise FileNotFoundError( - f'ERROR: varlist file cannot be opened.\n' - f' varlist = {varlist}' ) from exc + f'ERROR: json_var_list file cannot be opened.\n' + f' json_var_list = {json_var_list}' ) from exc - # examine input files to obtain available date ranges + # examine input directory to obtain a list of input file targets var_filenames = [] - var_filenames_all = os.listdir(indir) - print(f'(cmor_run_subtool) var_filenames_all={var_filenames_all}') - for var_file in var_filenames_all: - if var_file.endswith('.nc'): - var_filenames.append(var_file) - var_filenames.sort() - print(f"(cmor_run_subtool) var_filenames = {var_filenames}") - - - # name_of_set == component label, which is not relevant for CMOR/CMIP - name_of_set = var_filenames[0].split(".")[0] - print(f"(cmor_run_subtool) component label is name_of_set = {name_of_set}") + get_var_filenames(indir, var_filenames) + print(f"(cmor_run_subtool) found filenames = \n {var_filenames}") + # examine input files to obtain target date ranges iso_datetime_arr = [] - for filename in var_filenames: - iso_datetime_arr.append( - filename.split(".")[1] ) - iso_datetime_arr.sort() - print(f"(cmor_run_subtool) Available dates: {iso_datetime_arr}") - if len(iso_datetime_arr) < 1: - raise ValueError('ERROR: iso_datetime_arr has length 0!') + get_iso_datetimes(var_filenames, iso_datetime_arr) + print(f"(cmor_run_subtool) found iso datetimes = \n {iso_datetime_arr}") - # process each variable separately - for gfdl_var in gfdl_var_lst: - if gfdl_var_lst[gfdl_var] in proj_table_vars["variable_entry"]: - gfdl_to_pcmdi_var( - proj_table_vars, gfdl_var_lst, - indir, gfdl_var, iso_datetime_arr, - exp_config, table_config, - outdir, name_of_set ) - else: - print(f"(cmor_run_subtool) WARNING: Skipping variable {gfdl_var} ...") - print( "(cmor_run_subtool) ... it's not found in CMOR variable group") - print('----- END _cmor_run_subtool call -----\n\n') + # name_of_set == component label... + # which is not relevant for CMOR/CMIP... or is it? + name_of_set = var_filenames[0].split(".")[0] + print(f"(cmor_run_subtool) setting name_of_set = {name_of_set}") + + # loop over entries in the json_var_list, read into var_list + for local_var in var_list: + + # if its not in the table configurations variable_entry list, skip + if var_list[local_var] not in proj_table_vars["variable_entry"]: + print(f"(cmor_run_subtool) WARNING: skipping local_var={local_var} /" + f" target_var={target_var}") + print( "(cmor_run_subtool) ... target_var not found in CMOR variable group") + continue + + # it is in there, get the name of the data inside the netcdf file. + target_var=var_list[local_var] # often equiv to local_var but not necessarily. + if local_var != target_var: + print(f'(cmor_run_subtool) WARNING: local_var == {local_var} ' + f'!= {target_var} == target_var') + print(f'i am expecting {local_var} to be in the filename, and i expect the variable' + f' in that file to be {target_var}') + + print(f'(cmor_run_subtool) ..............beginning CMORization for {local_var}/' + f'{target_var}..........') + cmorize_target_var_files( + indir, target_var, local_var, iso_datetime_arr, # OK + name_of_set, json_exp_config, + outdir, + proj_table_vars, json_table_config # a little redundant + ) @click.command() -def _cmor_run_subtool(indir, varlist, table_config, exp_config, outdir): - ''' entry point to fre cmor run for click ''' - return cmor_run_subtool(indir, varlist, table_config, exp_config, outdir) +def _cmor_run_subtool(indir = None, + json_var_list = None, json_table_config = None, json_exp_config = None, + outdir = None): + ''' entry point to fre cmor run for click. see cmor_run_subtool for argument descriptions.''' + return cmor_run_subtool(indir, json_var_list, json_table_config, json_exp_config, outdir) if __name__ == '__main__': diff --git a/fre/cmor/frecmor.py b/fre/cmor/frecmor.py index 4e70e3c7..f764a546 100644 --- a/fre/cmor/frecmor.py +++ b/fre/cmor/frecmor.py @@ -33,7 +33,16 @@ def cmor_cli(): def run(context, indir, varlist, table_config, exp_config, outdir): # pylint: disable=unused-argument """Rewrite climate model output""" - context.forward(_cmor_run_subtool) + context.invoke( + _cmor_run_subtool, + indir = indir, + json_var_list = varlist, + json_table_config = table_config, + json_exp_config = exp_config, + outdir = outdir + ) + # context.forward( + # _cmor_run_subtool() ) if __name__ == "__main__": cmor_cli() diff --git a/fre/cmor/tests/test_cmor_run_subtool.py b/fre/cmor/tests/test_cmor_run_subtool.py index 41c87fb7..043b5705 100644 --- a/fre/cmor/tests/test_cmor_run_subtool.py +++ b/fre/cmor/tests/test_cmor_run_subtool.py @@ -1,5 +1,6 @@ -''' tests for fre.cmor._cmor_run_subtool ''' +''' tests for fre.cmor.cmor_run_subtool ''' import subprocess +import shutil from pathlib import Path from datetime import date @@ -11,12 +12,15 @@ ROOTDIR = 'fre/tests/test_files' # setup- cmip/cmor variable table(s) -CLONE_CMIP_TABLE_URL='https://github.com/PCMDI/cmip6-cmor-tables.git' -CLONE_REPO_PATH=f'{ROOTDIR}/cmip6-cmor-tables' -TABLE_CONFIG = f'{ROOTDIR}/cmip6-cmor-tables/Tables/CMIP6_Omon.json' +CLONE_CMIP_TABLE_URL = \ + 'https://github.com/PCMDI/cmip6-cmor-tables.git' +CLONE_REPO_PATH = \ + f'{ROOTDIR}/cmip6-cmor-tables' +TABLE_CONFIG = \ + f'{CLONE_REPO_PATH}/Tables/CMIP6_Omon.json' def test_setup_cmor_cmip_table_repo(): - ''' setup routine, clone the repo holding CMOR/CMIP tables ''' + ''' setup routine, if it doesnt exist, clone the repo holding CMOR/CMIP6 tables ''' if Path(TABLE_CONFIG).exists(): pass else: @@ -30,48 +34,90 @@ def test_setup_cmor_cmip_table_repo(): VARLIST = f'{ROOTDIR}/varlist' EXP_CONFIG = f'{ROOTDIR}/CMOR_input_example.json' OUTDIR = f'{ROOTDIR}/outdir' +TMPDIR = f'{OUTDIR}/tmp' # determined by cmor_run_subtool -YYYYMMDD=date.today().strftime('%Y%m%d') -CMOR_CREATES_DIR='CMIP6/CMIP6/ISMIP6/PCMDI/PCMDI-test-1-0/piControl-withism/r3i1p1f1/Omon/sos/gn' -# why does this have "fre" at the end of it? +YYYYMMDD = date.today().strftime('%Y%m%d') +CMOR_CREATES_DIR = \ + 'CMIP6/CMIP6/ISMIP6/PCMDI/PCMDI-test-1-0/piControl-withism/r3i1p1f1/Omon/sos/gn' FULL_OUTPUTDIR = \ - f"{OUTDIR}fre/{CMOR_CREATES_DIR}/v{YYYYMMDD}" + f"{OUTDIR}/{CMOR_CREATES_DIR}/v{YYYYMMDD}" FULL_OUTPUTFILE = \ f"{FULL_OUTPUTDIR}/sos_Omon_PCMDI-test-1-0_piControl-withism_r3i1p1f1_gn_199307-199807.nc" -# FYI +# FYI but helpful for tests FILENAME = 'ocean_monthly_1x1deg.199301-199712.sos.nc' # unneeded, this is mostly for reference FULL_INPUTFILE=f"{INDIR}/{FILENAME}" -def test_fre_cmor_run(capfd): - ''' fre cmor run, test-use case ''' - - # clean up, lest we fool outselves +def test_setup_fre_cmor_run_subtool(capfd): + ''' checks for outputfile from prev pytest runs, removes it if it's present. + this routine also checks to make sure the desired input file is present''' if Path(FULL_OUTPUTFILE).exists(): Path(FULL_OUTPUTFILE).unlink() + if Path(OUTDIR).exists(): + shutil.rmtree(OUTDIR) + assert not any ( [ Path(FULL_OUTPUTFILE).exists(), + Path(OUTDIR).exists() ] ) + assert Path(FULL_INPUTFILE).exists() + _out, _err = capfd.readouterr() + +def test_fre_cmor_run_subtool_case1(capfd): + ''' fre cmor run, test-use case ''' + + #debug + #print( + # f"fre.cmor.cmor_run_subtool(" + # f"\'{INDIR}\'," + # f"\'{VARLIST}\'," + # f"\'{TABLE_CONFIG}\'," + # f"\'{EXP_CONFIG}\'," + # f"\'{OUTDIR}\'" + # ")" + #) # test call, where meat of the workload gets done fre.cmor.cmor_run_subtool( indir = INDIR, - varlist = VARLIST, - table_config = TABLE_CONFIG, - exp_config = EXP_CONFIG, + json_var_list = VARLIST, + json_table_config = TABLE_CONFIG, + json_exp_config = EXP_CONFIG, outdir = OUTDIR ) - # success condition tricky... tool doesnt return anything really... ? - # TODO think about returns and success conditions assert all( [ Path(FULL_OUTPUTFILE).exists(), Path(FULL_INPUTFILE).exists() ] ) - out, err = capfd.readouterr() + _out, _err = capfd.readouterr() -def test_fre_cmor_run_output_compare(capfd): - ''' I/O comparison of prev test-use case ''' +def test_fre_cmor_run_subtool_case1_output_compare_data(capfd): + ''' I/O data-only comparison of test case1 ''' print(f'FULL_OUTPUTFILE={FULL_OUTPUTFILE}') print(f'FULL_INPUTFILE={FULL_INPUTFILE}') - nccmp_cmd= [ "nccmp", "-f", "-m", "-g", "-d", + nccmp_cmd= [ "nccmp", "-f", "-d", + f"{FULL_INPUTFILE}", + f"{FULL_OUTPUTFILE}" ] + print(f"via subprocess, running {' '.join(nccmp_cmd)}") + result = subprocess.run( ' '.join(nccmp_cmd), + shell=True, + check=False, + capture_output=True + ) + # err_list has length two if end in newline + err_list = result.stderr.decode().split('\n') + expected_err = \ + "DIFFER : FILE FORMATS : NC_FORMAT_64BIT <> NC_FORMAT_NETCDF4_CLASSIC" + assert all( [result.returncode == 1, + len(err_list)==2, + '' in err_list, + expected_err in err_list ] ) + _out, _err = capfd.readouterr() + +def test_fre_cmor_run_subtool_case1_output_compare_metadata(capfd): + ''' I/O metadata-only comparison of test case1 ''' + print(f'FULL_OUTPUTFILE={FULL_OUTPUTFILE}') + print(f'FULL_INPUTFILE={FULL_INPUTFILE}') + + nccmp_cmd= [ "nccmp", "-f", "-m", "-g", f"{FULL_INPUTFILE}", f"{FULL_OUTPUTFILE}" ] print(f"via subprocess, running {' '.join(nccmp_cmd)}") @@ -80,9 +126,124 @@ def test_fre_cmor_run_output_compare(capfd): check=False ) - # check file difference specifics here ----- + assert result.returncode == 1 + _out, _err = capfd.readouterr() + + +# FYI, but again, helpful for tests +FILENAME_DIFF = \ + 'ocean_monthly_1x1deg.199301-199712.sosV2.nc' +FULL_INPUTFILE_DIFF = \ + f"{INDIR}/{FILENAME_DIFF}" +VARLIST_DIFF = \ + f'{ROOTDIR}/varlist_local_target_vars_differ' +def test_setup_fre_cmor_run_subtool_case2(capfd): + ''' make a copy of the input file to the slightly different name. + checks for outputfile from prev pytest runs, removes it if it's present. + this routine also checks to make sure the desired input file is present''' + if Path(FULL_OUTPUTFILE).exists(): + Path(FULL_OUTPUTFILE).unlink() + assert not Path(FULL_OUTPUTFILE).exists() + + if Path(OUTDIR+'/CMIP6').exists(): + shutil.rmtree(OUTDIR+'/CMIP6') + assert not Path(OUTDIR+'/CMIP6').exists() + + + # VERY ANNOYING !!! FYI WARNING TODO + if Path(TMPDIR).exists(): + try: + shutil.rmtree(TMPDIR) + except OSError as exc: + print(f'WARNING: TMPDIR={TMPDIR} could not be removed.') + print( ' this does not matter that much, but is unfortunate.') + print( ' supicion: something the cmor module is using is not being closed') + + #assert not Path(TMPDIR).exists() # VERY ANNOYING !!! FYI WARNING TODO + + # VERY ANNOYING !!! FYI WARNING TODO + if Path(OUTDIR).exists(): + try: + shutil.rmtree(OUTDIR) + except OSError as exc: + print(f'WARNING: OUTDIR={OUTDIR} could not be removed.') + print( ' this does not matter that much, but is unfortunate.') + print( ' supicion: something the cmor module is using is not being closed') + + #assert not Path(OUTDIR).exists() # VERY ANNOYING !!! FYI WARNING TODO + + # make a copy of the usual test file. + if not Path(FULL_INPUTFILE_DIFF).exists(): + shutil.copy( + Path(FULL_INPUTFILE), + Path(FULL_INPUTFILE_DIFF) ) + assert Path(FULL_INPUTFILE_DIFF).exists() + _out, _err = capfd.readouterr() + +def test_fre_cmor_run_subtool_case2(capfd): + ''' fre cmor run, test-use case2 ''' + + #debug + #print( + # f"fre.cmor.cmor_run_subtool(" + # f"\'{INDIR}\'," + # f"\'{VARLIST_DIFF}\'," + # f"\'{TABLE_CONFIG}\'," + # f"\'{EXP_CONFIG}\'," + # f"\'{OUTDIR}\'" + # ")" + #) + # test call, where meat of the workload gets done + fre.cmor.cmor_run_subtool( + indir = INDIR, + json_var_list = VARLIST_DIFF, + json_table_config = TABLE_CONFIG, + json_exp_config = EXP_CONFIG, + outdir = OUTDIR + ) + + # check we ran on the right input file. + assert all( [ Path(FULL_OUTPUTFILE).exists(), + Path(FULL_INPUTFILE_DIFF).exists() ] ) + _out, _err = capfd.readouterr() + +def test_fre_cmor_run_subtool_case2_output_compare_data(capfd): + ''' I/O data-only comparison of test case2 ''' + print(f'FULL_OUTPUTFILE={FULL_OUTPUTFILE}') + print(f'FULL_INPUTFILE_DIFF={FULL_INPUTFILE_DIFF}') + + nccmp_cmd= [ "nccmp", "-f", "-d", + f"{FULL_INPUTFILE_DIFF}", + f"{FULL_OUTPUTFILE}" ] + print(f"via subprocess, running {' '.join(nccmp_cmd)}") + result = subprocess.run( ' '.join(nccmp_cmd), + shell=True, + check=False, + capture_output=True + ) + + err_list = result.stderr.decode().split('\n')#length two if end in newline + expected_err="DIFFER : FILE FORMATS : NC_FORMAT_64BIT <> NC_FORMAT_NETCDF4_CLASSIC" + assert all( [result.returncode == 1, + len(err_list)==2, + '' in err_list, + expected_err in err_list ] ) + _out, _err = capfd.readouterr() + +def test_fre_cmor_run_subtool_case2_output_compare_metadata(capfd): + ''' I/O metadata-only comparison of test case2 ''' + print(f'FULL_OUTPUTFILE={FULL_OUTPUTFILE}') + print(f'FULL_INPUTFILE_DIFF={FULL_INPUTFILE_DIFF}') + + nccmp_cmd= [ "nccmp", "-f", "-m", "-g", + f"{FULL_INPUTFILE_DIFF}", + f"{FULL_OUTPUTFILE}" ] + print(f"via subprocess, running {' '.join(nccmp_cmd)}") + result = subprocess.run( ' '.join(nccmp_cmd), + shell=True, + check=False + ) - #subprocess.run(["rm", "-rf", f"{OUTDIR}/CMIP6/CMIP6/"]) assert result.returncode == 1 - out, err = capfd.readouterr() + _out, _err = capfd.readouterr() diff --git a/fre/make/createCheckout.py b/fre/make/createCheckout.py index c72e91a4..0c07ae00 100644 --- a/fre/make/createCheckout.py +++ b/fre/make/createCheckout.py @@ -76,7 +76,9 @@ def checkout_create(yamlfile,platform,target,no_parallel_checkout,jobs,execute,v freCheckout = checkout.checkout("checkout.sh",srcDir) freCheckout.writeCheckout(modelYaml.compile.getCompileYaml(),jobs,pc) freCheckout.finish(pc) - click.echo("\nCheckout script created in "+ srcDir + "/checkout.sh \n") + # Make checkout script executable + os.chmod(srcDir+"/checkout.sh", 0o744) + print("\nCheckout script created in "+ srcDir + "/checkout.sh \n") # Run the checkout script if run == True: @@ -99,7 +101,6 @@ def checkout_create(yamlfile,platform,target,no_parallel_checkout,jobs,execute,v print("\nCheckout script PREVIOUSLY created in "+ srcDir + "/checkout.sh \n") if run == True: - os.chmod(srcDir+"/checkout.sh", 0o744) try: subprocess.run(args=[srcDir+"/checkout.sh"], check=True) except: diff --git a/fre/tests/test_files/varlist_local_target_vars_differ b/fre/tests/test_files/varlist_local_target_vars_differ new file mode 100644 index 00000000..402be0e6 --- /dev/null +++ b/fre/tests/test_files/varlist_local_target_vars_differ @@ -0,0 +1,3 @@ +{ + "sosV2": "sos" +} diff --git a/fre/tests/test_fre_app_cli.py b/fre/tests/test_fre_app_cli.py index dfc0a171..4b61ff75 100644 --- a/fre/tests/test_fre_app_cli.py +++ b/fre/tests/test_fre_app_cli.py @@ -15,57 +15,57 @@ def test_cli_fre_app(capfd): """ fre app """ result = runner.invoke(fre.fre, args=["app"]) assert result.exit_code == 0 - out, err = capfd.readouterr() + _out, _err = capfd.readouterr() def test_cli_fre_app_help(capfd): """ fre app --help """ result = runner.invoke(fre.fre, args=["app", "--help"]) assert result.exit_code == 0 - out, err = capfd.readouterr() + _out, _err = capfd.readouterr() def test_cli_fre_app_opt_dne(capfd): """ fre app optionDNE """ result = runner.invoke(fre.fre, args=["app", "optionDNE"]) assert result.exit_code == 2 - out, err = capfd.readouterr() + _out, _err = capfd.readouterr() # fre app gen-time-averages def test_cli_fre_app_gen_time_averages(capfd): """ fre cmor run """ result = runner.invoke(fre.fre, args=["app", "gen-time-averages"]) assert result.exit_code == 2 - out, err = capfd.readouterr() + _out, _err = capfd.readouterr() def test_cli_fre_app_gen_time_averages_help(capfd): """ fre cmor run --help """ result = runner.invoke(fre.fre, args=["app", "gen-time-averages", "--help"]) assert result.exit_code == 0 - out, err = capfd.readouterr() + _out, _err = capfd.readouterr() def test_cli_fre_app_gen_time_averages_opt_dne(capfd): """ fre cmor run optionDNE """ result = runner.invoke(fre.fre, args=["app", "gen-time-averages", "optionDNE"]) assert result.exit_code == 2 - out, err = capfd.readouterr() + _out, _err = capfd.readouterr() # fre app regrid def test_cli_fre_app_regrid(capfd): """ fre cmor run """ result = runner.invoke(fre.fre, args=["app", "regrid"]) assert result.exit_code == 2 - out, err = capfd.readouterr() + _out, _err = capfd.readouterr() def test_cli_fre_app_regrid_help(capfd): """ fre cmor run --help """ result = runner.invoke(fre.fre, args=["app", "regrid", "--help"]) assert result.exit_code == 0 - out, err = capfd.readouterr() + _out, _err = capfd.readouterr() def test_cli_fre_app_regrid_opt_dne(capfd): """ fre cmor run optionDNE """ result = runner.invoke(fre.fre, args=["app", "regrid", "optionDNE"]) assert result.exit_code == 2 - out, err = capfd.readouterr() + _out, _err = capfd.readouterr() def test_cli_fre_app_regrid_test_case_1(capfd): """ fre cmor run --help """ @@ -113,4 +113,4 @@ def test_cli_fre_app_regrid_test_case_1(capfd): result = runner.invoke(fre.fre, args=args_list ) assert result.exit_code == 0 - out, err = capfd.readouterr() + _out, _err = capfd.readouterr() diff --git a/fre/tests/test_fre_cmor_cli.py b/fre/tests/test_fre_cmor_cli.py index dd45bc44..b2a09b62 100644 --- a/fre/tests/test_fre_cmor_cli.py +++ b/fre/tests/test_fre_cmor_cli.py @@ -3,6 +3,7 @@ from datetime import date from pathlib import Path +import click from click.testing import CliRunner from fre import fre @@ -42,9 +43,9 @@ def test_cli_fre_cmor_run_opt_dne(): assert result.exit_code == 2 -# maybe this is not the right place for this test case? # TODO # these unit tests should be more about the cli, rather than the workload -def test_cli_fre_cmor_run_case1(capfd): +YYYYMMDD=date.today().strftime('%Y%m%d') +def test_cli_fre_cmor_run_case1(): ''' fre cmor run, test-use case ''' # where are we? we're running pytest from the base directory of this repo @@ -58,12 +59,10 @@ def test_cli_fre_cmor_run_case1(capfd): outdir = f'{rootdir}/outdir' # determined by cmor_run_subtool - YYYYMMDD=date.today().strftime('%Y%m%d') cmor_creates_dir = \ 'CMIP6/CMIP6/ISMIP6/PCMDI/PCMDI-test-1-0/piControl-withism/r3i1p1f1/Omon/sos/gn' - # why does this have "fre" at the end of it? full_outputdir = \ - f"{outdir}fre/{cmor_creates_dir}/v{YYYYMMDD}" + f"{outdir}/{cmor_creates_dir}/v{YYYYMMDD}" # yay no more 'fre' where it shouldnt be full_outputfile = \ f"{full_outputdir}/sos_Omon_PCMDI-test-1-0_piControl-withism_r3i1p1f1_gn_199307-199807.nc" @@ -82,7 +81,54 @@ def test_cli_fre_cmor_run_case1(capfd): "--table_config", table_config, "--exp_config", exp_config, "--outdir", outdir]) + click.echo(f'stdout = \n {result.stdout}') + #click.echo(f'stderr = \n {result.stderr}') #not captured sep. + assert all ( [ result.exit_code == 0, + Path(full_outputfile).exists(), + Path(full_inputfile).exists() ] ) + + + + + +def test_cli_fre_cmor_run_case2(): + ''' fre cmor run, test-use case ''' + + # where are we? we're running pytest from the base directory of this repo + rootdir = 'fre/tests/test_files' + + # explicit inputs to tool + indir = f'{rootdir}/ocean_sos_var_file' + varlist = f'{rootdir}/varlist_local_target_vars_differ' + table_config = f'{rootdir}/cmip6-cmor-tables/Tables/CMIP6_Omon.json' + exp_config = f'{rootdir}/CMOR_input_example.json' + outdir = f'{rootdir}/outdir' + + # determined by cmor_run_subtool + cmor_creates_dir = \ + 'CMIP6/CMIP6/ISMIP6/PCMDI/PCMDI-test-1-0/piControl-withism/r3i1p1f1/Omon/sos/gn' + full_outputdir = \ + f"{outdir}/{cmor_creates_dir}/v{YYYYMMDD}" # yay no more 'fre' where it shouldnt be + full_outputfile = \ + f"{full_outputdir}/sos_Omon_PCMDI-test-1-0_piControl-withism_r3i1p1f1_gn_199307-199807.nc" + + # FYI + filename = 'ocean_monthly_1x1deg.199301-199712.sosV2.nc' # unneeded, this is mostly for reference + full_inputfile=f"{indir}/{filename}" + + # clean up, lest we fool outselves + if Path(full_outputfile).exists(): + Path(full_outputfile).unlink() + + #click.echo('') + result = runner.invoke(fre.fre, args = ["cmor", "run", + "--indir", indir, + "--varlist", varlist, + "--table_config", table_config, + "--exp_config", exp_config, + "--outdir", outdir]) + click.echo(f'stdout = \n {result.stdout}') + #click.echo(f'stderr = \n {result.stderr}') #not captured sep. assert all ( [ result.exit_code == 0, Path(full_outputfile).exists(), Path(full_inputfile).exists() ] ) - out, err = capfd.readouterr() diff --git a/fre/tests/test_fre_make_cli.py b/fre/tests/test_fre_make_cli.py index 425525eb..279760b3 100644 --- a/fre/tests/test_fre_make_cli.py +++ b/fre/tests/test_fre_make_cli.py @@ -1,7 +1,8 @@ ''' test "fre make" calls ''' from click.testing import CliRunner - +from pathlib import Path +import os from fre import fre runner = CliRunner() @@ -20,3 +21,42 @@ def test_cli_fre_make_opt_dne(): ''' fre make optionDNE ''' result = runner.invoke(fre.fre, args=["make", "optionDNE"]) assert result.exit_code == 2 + +def test_cli_fre_make_create_checkout_baremetal(): + ''' fre make create-checkout -y am5.yaml -p ncrc5.intel23 -t debug''' + # Set paths and click options + test_dir = Path("fre/tests") + yamlfile = Path("fre/make/tests/AM5_example/") + platform = "ncrc5.intel23" + target = "debug" + + # Create output path to test that files exist + out_path=f"{test_dir}/fremake_out" + Path(out_path).mkdir(parents=True,exist_ok=True) + + # Set HOME for modelRoot location (output location) in fre make + os.environ["HOME"]=str(Path(out_path)) + + # run create-checkout + result = runner.invoke(fre.fre, args=["make", "create-checkout", "-y", f"{yamlfile}/am5.yaml", "-p", platform, "-t", target]) + + # Check for successful command, creation of checkout script, and that script is executable (os.access - checks is file has specific access mode, os.X_OK - checks executable permission) + assert all ([result.exit_code == 0, + Path(f"{out_path}/fremake_canopy/test/am5/src/checkout.sh").exists(), + os.access(Path(f"{out_path}/fremake_canopy/test/am5/src/checkout.sh"), os.X_OK)]) + +def test_cli_fre_make_create_checkout_container(): + ''' fre make create-checkout -y am5.yaml -p hpcme.2023 -t debug''' + # Set paths and click options + test_dir = Path("fre/tests") + yamlfile = Path("fre/make/tests/AM5_example/") + platform = "hpcme.2023" + target = "debug" + + # run create-checkout + result = runner.invoke(fre.fre, args=["make", "create-checkout", "-y", f"{yamlfile}/am5.yaml", "-p", platform, "-t", target]) + + # Check for successful command, creation of checkout script, and that script is executable (os.access - checks is file has specific access mode, os.X_OK - checks executable permission) + assert all ([result.exit_code == 0, + Path(f"tmp/{platform}/checkout.sh").exists(), + os.access(Path(f"tmp/{platform}/checkout.sh"), os.X_OK) == False ]) diff --git a/meta.yaml b/meta.yaml index 9625aba2..07f76686 100644 --- a/meta.yaml +++ b/meta.yaml @@ -62,7 +62,7 @@ test: - fre.cmor - fre.catalog commands: - - pylint -ry --ignored-modules netCDF4 fre/ || echo "pylint returned non-zero exit code and will kill the workflow. guarding against this now." + - pylint --max-args 6 -ry --ignored-modules netCDF4,cmor fre/ || echo "pylint returned non-zero exit code and will kill the workflow. guarding against this now." - pip install GitPython && pytest --config-file=fre/pytest.ini --cov-config=fre/coveragerc --cov=fre fre/ - fre --help - fre pp --help