From 50c66ec7294f672588234c925254e7fb7350f12d Mon Sep 17 00:00:00 2001 From: elbashandy Date: Tue, 6 Oct 2020 13:51:03 -0700 Subject: [PATCH 1/2] Changing teca_cf_writer_collective tests to read from the Pipeline 1 directly instead of baseline in TECA_DATA_ROOT --- test/CMakeLists.txt | 14 ++++---- test/python/CMakeLists.txt | 6 ++-- test/python/test_cf_writer_collective.py | 39 ++++++--------------- test/test_cf_writer_collective.cpp | 43 +++++------------------- 4 files changed, 27 insertions(+), 75 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e980b57a2..062c5a31d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -649,26 +649,24 @@ teca_add_test(test_cf_writer_collective_serial EXEC_NAME test_cf_writer_collective SOURCES test_cf_writer_collective.cpp LIBS teca_core teca_data teca_alg teca_io ${teca_test_link} - COMMAND test_cf_writer_collective 128 512 128 1 - "${TECA_DATA_ROOT}/test_cf_writer_collective_%t%.bin" 213 + COMMAND test_cf_writer_collective 128 512 128 1 213 FEATURES ${TECA_HAS_NETCDF} REQ_TECA_DATA) teca_add_test(test_cf_writer_collective_threads - COMMAND test_cf_writer_collective 128 512 128 ${TEST_CORES} - "${TECA_DATA_ROOT}/test_cf_writer_collective_%t%.bin" 213 + COMMAND test_cf_writer_collective 128 512 128 ${TEST_CORES} 213 FEATURES ${TECA_HAS_NETCDF} REQ_TECA_DATA) teca_add_test(test_cf_writer_collective_mpi - COMMAND ${MPIEXEC} -n ${TEST_CORES} test_cf_writer_collective 128 512 128 1 - "${TECA_DATA_ROOT}/test_cf_writer_collective_%t%.bin" 213 + COMMAND ${MPIEXEC} -n ${TEST_CORES} + test_cf_writer_collective 128 512 128 1 213 FEATURES ${TECA_HAS_NETCDF} ${TECA_HAS_MPI} REQ_TECA_DATA) teca_add_test(test_cf_writer_collective_mpi_threads - COMMAND ${MPIEXEC} -n ${HALF_TEST_CORES} test_cf_writer_collective 128 512 128 2 - "${TECA_DATA_ROOT}/test_cf_writer_collective_%t%.bin" 213 + COMMAND ${MPIEXEC} -n ${HALF_TEST_CORES} + test_cf_writer_collective 128 512 128 2 213 FEATURES ${TECA_HAS_NETCDF} ${TECA_HAS_MPI} REQ_TECA_DATA) diff --git a/test/python/CMakeLists.txt b/test/python/CMakeLists.txt index 34f8d7085..4f5e7ca9b 100644 --- a/test/python/CMakeLists.txt +++ b/test/python/CMakeLists.txt @@ -393,15 +393,13 @@ teca_add_test(py_test_nested_pipeline teca_add_test(py_test_cf_writer_collective_serial COMMAND ${PYTHON_EXECUTABLE} - "${CMAKE_CURRENT_SOURCE_DIR}/test_cf_writer_collective.py" 128 512 128 - "${TECA_DATA_ROOT}/test_cf_writer_collective_%t%.bin" 213 + "${CMAKE_CURRENT_SOURCE_DIR}/test_cf_writer_collective.py" 128 512 128 213 FEATURES ${TECA_HAS_NETCDF} REQ_TECA_DATA) teca_add_test(py_test_cf_writer_collective_mpi COMMAND ${MPIEXEC} -n ${TEST_CORES} ${PYTHON_EXECUTABLE} - "${CMAKE_CURRENT_SOURCE_DIR}/test_cf_writer_collective.py" 128 512 128 - "${TECA_DATA_ROOT}/test_cf_writer_collective_%t%.bin" 213 + "${CMAKE_CURRENT_SOURCE_DIR}/test_cf_writer_collective.py" 128 512 128 213 FEATURES ${TECA_HAS_NETCDF} ${MPI4Py_FOUND} REQ_TECA_DATA) diff --git a/test/python/test_cf_writer_collective.py b/test/python/test_cf_writer_collective.py index f7be2f1c3..1980f5453 100644 --- a/test/python/test_cf_writer_collective.py +++ b/test/python/test_cf_writer_collective.py @@ -7,17 +7,16 @@ set_stack_trace_on_error() -if len(sys.argv) != 6: +if len(sys.argv) != 5: sys.stderr.write('test_information_array_io.py [n points] [n steps] ' \ - '[steps per file] [baseline file] [baseline step]\n') + '[steps per file] [step]\n') sys.exit(-1) n_threads = 1 nx = int(sys.argv[1]) n_steps = int(sys.argv[2]) steps_per_file = int(sys.argv[3]) -baseline = sys.argv[4] -check_step = int(sys.argv[5]) +check_step = int(sys.argv[4]) out_file = 'py_test_cf_writer_collective-%t%.nc' files_regex = 'py_test_cf_writer_collective.*\\.nc$' @@ -240,28 +239,10 @@ def execute(self, port, data_in, req_in): rex.set_start_index(check_step) rex.set_end_index(check_step) - fn = file_util.replace_timestep(baseline, check_step) - do_test = system_util.get_environment_variable_bool('TECA_DO_TEST', True) - if do_test and os.path.exists(fn): - sys.stderr.write('running the test...\n') - - cmr = teca_cartesian_mesh_reader.New() - cmr.set_file_name(fn) - - diff = teca_dataset_diff.New() - diff.set_communicator(MPI.COMM_SELF) - diff.set_input_connection(0, cmr.get_output_port()) - diff.set_input_connection(1, par.get_output_port()) - diff.set_executive(rex) - diff.set_tolerance(1.e-4) - diff.update() - else: - sys.stderr.write('writing the baseline...\n') - - cmw = teca_cartesian_mesh_writer.New() - cmw.set_communicator(MPI.COMM_SELF) - cmw.set_file_name(baseline) - cmw.set_input_connection(par.get_output_port()) - cmw.set_file_name(baseline) - cmw.set_executive(rex) - cmw.update() + diff = teca_dataset_diff.New() + diff.set_communicator(MPI.COMM_SELF) + diff.set_input_connection(0, gd.get_output_port()) + diff.set_input_connection(1, par.get_output_port()) + diff.set_executive(rex) + diff.set_tolerance(1.e-4) + diff.update() diff --git a/test/test_cf_writer_collective.cpp b/test/test_cf_writer_collective.cpp index 426da51d9..1fc6d3035 100644 --- a/test/test_cf_writer_collective.cpp +++ b/test/test_cf_writer_collective.cpp @@ -329,10 +329,10 @@ int main(int argc, char **argv) teca_system_interface::set_stack_trace_on_error(); teca_system_interface::set_stack_trace_on_mpi_error(); - if (argc != 7) + if (argc != 6) { std::cerr << "test_information_array_io.py [n points] [n steps] " - "[steps per file] [n threads] [baseline file] [baseline step]" << std::endl; + "[steps per file] [n threads] [step]" << std::endl; return -1; } @@ -340,8 +340,7 @@ int main(int argc, char **argv) unsigned long n_steps = atoi(argv[2]); int steps_per_file = atoi(argv[3]); int n_threads = atoi(argv[4]); - const char *baseline = argv[5]; - int check_step = atoi(argv[6]); + int check_step = atoi(argv[5]); const char *out_file = "test_cf_writer_collective-%t%.nc"; const char *files_regex = "test_cf_writer_collective.*\\.nc$"; @@ -406,36 +405,12 @@ int main(int argc, char **argv) rex->set_start_index(check_step); rex->set_end_index(check_step); - std::string fn(baseline); - teca_file_util::replace_timestep(fn, check_step); - bool do_test = true; - teca_system_util::get_environment_variable("TECA_DO_TEST", do_test); - if (do_test && teca_file_util::file_exists(fn.c_str())) - { - std::cerr << "running the test..." << std::endl; - - p_teca_cartesian_mesh_reader cmr = teca_cartesian_mesh_reader::New(); - cmr->set_file_name(fn); - - p_teca_dataset_diff diff = teca_dataset_diff::New(); - diff->set_communicator(MPI_COMM_SELF); - diff->set_input_connection(0, cmr->get_output_port()); - diff->set_input_connection(1, par->get_output_port()); - diff->set_executive(rex); - diff->update(); - } - else - { - std::cerr << "writing the baseline..." << std::endl; - - p_teca_cartesian_mesh_writer cmw = teca_cartesian_mesh_writer::New(); - cmw->set_communicator(MPI_COMM_SELF); - cmw->set_file_name(baseline); - cmw->set_input_connection(par->get_output_port()); - cmw->set_file_name(baseline); - cmw->set_executive(rex); - cmw->update(); - } + p_teca_dataset_diff diff = teca_dataset_diff::New(); + diff->set_communicator(MPI_COMM_SELF); + diff->set_input_connection(0, gd->get_output_port()); + diff->set_input_connection(1, par->get_output_port()); + diff->set_executive(rex); + diff->update(); } return 0; From 1a5cb871b95313dfe6b0820dd3760a88afee4e4c Mon Sep 17 00:00:00 2001 From: elbashandy Date: Mon, 5 Oct 2020 13:48:40 -0700 Subject: [PATCH 2/2] Supporting Regex in teca_cartesian_mesh_reader --- io/teca_cartesian_mesh_reader.cxx | 90 ++++++++++++++++++++++++------- io/teca_cartesian_mesh_reader.h | 6 +++ 2 files changed, 78 insertions(+), 18 deletions(-) diff --git a/io/teca_cartesian_mesh_reader.cxx b/io/teca_cartesian_mesh_reader.cxx index dfeca9e6e..11c3d0f98 100644 --- a/io/teca_cartesian_mesh_reader.cxx +++ b/io/teca_cartesian_mesh_reader.cxx @@ -28,13 +28,13 @@ struct teca_cartesian_mesh_reader::teca_cartesian_mesh_reader_internals static p_teca_mesh read_cartesian_mesh( const std::string &file_name); - p_teca_mesh mesh; + teca_metadata metadata; }; // -------------------------------------------------------------------------- void teca_cartesian_mesh_reader::teca_cartesian_mesh_reader_internals::clear() { - this->mesh = nullptr; + this->metadata.clear(); } // -------------------------------------------------------------------------- @@ -79,7 +79,10 @@ teca_cartesian_mesh_reader::teca_cartesian_mesh_reader_internals::read_cartesian // -------------------------------------------------------------------------- -teca_cartesian_mesh_reader::teca_cartesian_mesh_reader() : generate_original_ids(0) +teca_cartesian_mesh_reader::teca_cartesian_mesh_reader() : +file_name(""), +files_regex(""), +generate_original_ids(0) { this->internals = new teca_cartesian_mesh_reader_internals; } @@ -101,6 +104,9 @@ void teca_cartesian_mesh_reader::get_properties_description( opts.add_options() TECA_POPTS_GET(std::string, prefix, file_name, "a file name to read") + TECA_POPTS_GET(std::string, prefix, files_regex, + "a regular expression that matches the set of files " + "comprising the dataset") ; global_opts.add(opts); @@ -111,6 +117,7 @@ void teca_cartesian_mesh_reader::set_properties(const std::string &prefix, variables_map &opts) { TECA_POPTS_SET(opts, std::string, prefix, file_name) + TECA_POPTS_SET(opts, std::string, prefix, files_regex) } #endif @@ -144,23 +151,43 @@ teca_metadata teca_cartesian_mesh_reader::get_output_metadata(unsigned int port, // 1 use regex for multi step dataset // 2 read metadata without reading mesh - // read the mesh if we have not already done so - if (!this->internals->mesh) + if (this->internals->metadata) + return this->internals->metadata; + + std::vector files; + std::string path; + + if (!this->file_name.empty()) { - if (!(this->internals->mesh = - teca_cartesian_mesh_reader_internals::read_cartesian_mesh(this->file_name))) + files.push_back(teca_file_util::filename(this->file_name)); + path = teca_file_util::path(this->file_name); + } + else + { + // use regex + std::string regex = teca_file_util::filename(this->files_regex); + path = teca_file_util::path(this->files_regex); + + if (teca_file_util::locate_files(path, regex, files)) { - TECA_ERROR("Failed to read the mesh from \"" << this->file_name << "\"") + TECA_ERROR( + << "Failed to locate any files" << endl + << this->files_regex << endl + << path << endl + << regex) return teca_metadata(); } } - teca_metadata md = this->internals->mesh->get_metadata(); - md.set("index_initializer_key", std::string("number_of_meshes")); - md.set("index_request_key", std::string("mesh_id")); - md.set("number_of_meshes", 1l); + size_t n_files = files.size(); - return md; + this->internals->metadata.set("index_initializer_key", std::string("number_of_time_steps")); + this->internals->metadata.set("number_of_time_steps", n_files); + this->internals->metadata.set("index_request_key", std::string("time_step")); + this->internals->metadata.set("files", files); + this->internals->metadata.set("root", path); + + return this->internals->metadata; } // -------------------------------------------------------------------------- @@ -174,18 +201,45 @@ const_p_teca_dataset teca_cartesian_mesh_reader::execute(unsigned int port, #endif (void) port; (void) input_data; - (void) request; // TODO // 1 handle request for specific index // 2 handle spatial subseting // 3 Pass only requested arrays - p_teca_dataset ds = this->internals->mesh->new_instance(); - ds->shallow_copy(this->internals->mesh); + // get the timestep + unsigned long time_step = 0; + if (request.get("time_step", time_step)) + { + TECA_ERROR("Request is missing time_step") + return nullptr; + } + + std::string path; + std::string file; + if (this->internals->metadata.get("root", path) + || this->internals->metadata.get("files", time_step, file)) + { + TECA_ERROR("time_step=" << time_step + << " Failed to locate file for time step " << time_step) + return nullptr; + } + + std::string file_path = path + PATH_SEP + file; + + p_teca_mesh mesh; + if (!(mesh = + teca_cartesian_mesh_reader_internals::read_cartesian_mesh(file_path))) + { + TECA_ERROR("Failed to read the mesh from \"" << file_path << "\"") + return nullptr; + } + + p_teca_dataset ds = mesh->new_instance(); + ds->shallow_copy(mesh); - ds->get_metadata().set("index_request_key", std::string("mesh_id")); - ds->get_metadata().set("mesh_id", 0l); + ds->get_metadata().set("index_request_key", std::string("time_step")); + ds->get_metadata().set("time_step", time_step); return ds; } diff --git a/io/teca_cartesian_mesh_reader.h b/io/teca_cartesian_mesh_reader.h index 66b6ddcd5..666c4cd97 100644 --- a/io/teca_cartesian_mesh_reader.h +++ b/io/teca_cartesian_mesh_reader.h @@ -42,6 +42,11 @@ class teca_cartesian_mesh_reader : public teca_algorithm // the file from which data will be read. TECA_ALGORITHM_PROPERTY(std::string, file_name) + // describe the set of files comprising the dataset. This + // should contain the full path and regex describing the + // file name pattern + TECA_ALGORITHM_PROPERTY(std::string, files_regex) + // name of the column containing index values. // if this is not empty the reader will operate // in parallel mode serving up requested indices @@ -90,6 +95,7 @@ class teca_cartesian_mesh_reader : public teca_algorithm private: std::string file_name; + std::string files_regex; std::string index_column; int generate_original_ids; std::vector metadata_column_names;