From fc8dbcbd4f650fee553d1065bf5a9afd498c5b98 Mon Sep 17 00:00:00 2001 From: Burlen Loring Date: Thu, 6 Jul 2023 11:00:03 -0700 Subject: [PATCH 1/6] tc_candiates add doxygen documentation --- alg/teca_tc_candidates.h | 45 ++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/alg/teca_tc_candidates.h b/alg/teca_tc_candidates.h index 070ef5c0d..e3ba93ce0 100644 --- a/alg/teca_tc_candidates.h +++ b/alg/teca_tc_candidates.h @@ -60,19 +60,26 @@ class TECA_EXPORT teca_tc_candidates : public teca_algorithm TECA_GET_ALGORITHM_PROPERTIES_DESCRIPTION() TECA_SET_ALGORITHM_PROPERTIES() - // set/get the name of input variables + /** @name input_variables + * Set the names of the variables that are required for TC detection. + */ + ///@{ TECA_ALGORITHM_PROPERTY(std::string, surface_wind_speed_variable) TECA_ALGORITHM_PROPERTY(std::string, vorticity_850mb_variable) TECA_ALGORITHM_PROPERTY(std::string, sea_level_pressure_variable) TECA_ALGORITHM_PROPERTY(std::string, core_temperature_variable) TECA_ALGORITHM_PROPERTY(std::string, thickness_variable) - - // a candidate is defined as having: - // 1) a local maximum in vorticity above vorticty_850mb_threshold, - // centered on a window of vorticty_850mb_window degrees - // 2) a local minimum in pressure within max_core_radius degrees - // 3) having max pressure delta within max_pressure_radius at - // that location + ///@} + + /** @name detector_controls + * Set the thresholds controling detector behavior. A TC candidate is + * defined as having: + * 1. a local maximum in vorticity above vorticty_850mb_threshold, centered + * on a window of vorticty_850mb_window degrees + * 2. a local minimum in pressure within max_core_radius degrees + * 3. having max pressure delta within max_pressure_radius at that location + */ + ///@{ TECA_ALGORITHM_PROPERTY(double, max_core_radius) TECA_ALGORITHM_PROPERTY(double, min_vorticity_850mb) TECA_ALGORITHM_PROPERTY(double, vorticity_850mb_window) @@ -86,21 +93,23 @@ class TECA_EXPORT teca_tc_candidates : public teca_algorithm TECA_ALGORITHM_PROPERTY(double, max_thickness_delta) TECA_ALGORITHM_PROPERTY(double, max_thickness_radius) - // set/get the bounding box to search for storms - // in units of degrees lat,lon + // set/get the number of iterations to search for the storm local minimum. + // raising this parameter might increase detections but the detector will + // run slower. default is 50. + TECA_ALGORITHM_PROPERTY(int, minimizer_iterations) + ///@} + + /** @name spatial_subset + * Set/get the bounding box to search for storms in units of degrees lat,lon + */ + ///@{ TECA_ALGORITHM_PROPERTY(double, search_lat_low) TECA_ALGORITHM_PROPERTY(double, search_lat_high) TECA_ALGORITHM_PROPERTY(double, search_lon_low) TECA_ALGORITHM_PROPERTY(double, search_lon_high) + ///@} - // set/get the number of iterations to search for the - // storm local minimum. raising this parameter might increase - // detections but the detector will run slower. default is - // 50. - TECA_ALGORITHM_PROPERTY(int, minimizer_iterations) - - // send human readable representation to the - // stream + /// send human readable representation to the stream virtual void to_stream(std::ostream &os) const override; protected: From 0f6118867f78ee2e013aa85873b8e8437e1cad38 Mon Sep 17 00:00:00 2001 From: Burlen Loring Date: Fri, 23 Jun 2023 15:59:00 -0700 Subject: [PATCH 2/6] tc_candidates add OpenMP parallelism to the GFDL TC detector --- CMakeLists.txt | 18 ++++++++++++++++-- alg/CMakeLists.txt | 7 +++++++ alg/gfdl_tc_candidates.f90.in | 16 ++++++++++++++++ test/test_tc_candidates.cpp | 5 +++-- 4 files changed, 42 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e7d877d96..a28cade37 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,11 +22,10 @@ else () set(CMAKE_C_VISIBILITY_PRESET hidden) endif() + # this prevents a relink when a shared library's implementation changes set(CMAKE_LINK_DEPENDS_NO_SHARED ON) - - # set build/install sub dirs for various components if (NOT LIB_PREFIX) set(LIB_PREFIX lib) @@ -130,6 +129,7 @@ include(teca_interface_library) # out if these are not found. for those times when you don't set the corresponding # REQUIRE variable to FALSE set(REQUIRE_CUDA FALSE CACHE BOOL "Forces build failure when CUDA is missing") +set(REQUIRE_OPENMP FALSE CACHE BOOL "Forces build failure when OpenMP is missing") set(REQUIRE_MPI TRUE CACHE BOOL "Forces build failure when MPI is missing") set(REQUIRE_NETCDF TRUE CACHE BOOL "Forces build failure when NetCDF is missing") set(REQUIRE_NETCDF_MPI TRUE CACHE BOOL "Forces build failure when NetCDF_MPI is missing") @@ -166,6 +166,20 @@ else() set(HAMR_ENABLE_CUDA OFF CACHE BOOL "") endif() +# configure for OpenMP +set(tmp OFF) +find_package(OpenMP COMPONENTS Fortran) +if (OpenMP_Fortran_FOUND AND ((DEFINED TECA_HAS_OPENMP AND TECA_HAS_OPENMP) + OR (NOT DEFINED TECA_HAS_OPENMP))) + message(STATUS "OpenMP features (${OpenMP_Fortran_VERSION}) -- enabled") + set(tmp ON) +elseif (REQUIRE_OPENMP) + message(STATUS "OpenMP features -- required but not found.") +else() + message(STATUS "OpenMP features -- not found.") +endif() +set(TECA_HAS_OPENMP ${tmp} CACHE BOOL "OpenMP features") + # configure for MPI if (ENABLE_CRAY_MPICH) set(ENV{PKG_CONFIG_PATH} "$ENV{CRAY_MPICH_DIR}/lib/pkgconfig:$ENV{PKG_CONFIG_PATH}") diff --git a/alg/CMakeLists.txt b/alg/CMakeLists.txt index 5048444c6..36d66b7e6 100644 --- a/alg/CMakeLists.txt +++ b/alg/CMakeLists.txt @@ -91,6 +91,9 @@ foreach(generic_src ${teca_alg_f90_generics}) set(iso_c_type_coord "${f_type}(c_${c_type_coord})") configure_file(${generic_src}.f90.in ${generic_src}_${decorator}.f90 @ONLY) list(APPEND teca_alg_f90_srcs ${generic_src}_${decorator}.f90) + if (TECA_HAS_OPENMP) + set_source_files_properties(${generic_src}_${decorator}.f90 PROPERTIES COMPILE_FLAGS ${OpenMP_Fortran_FLAGS}) + endif() endforeach() endforeach() endforeach() @@ -109,6 +112,10 @@ if (TECA_HAS_CUDA) set_source_files_properties(${teca_alg_cxx_srcs} PROPERTIES LANGUAGE CUDA) endif() +if (TECA_HAS_OPENMP) + list(APPEND teca_alg_link OpenMP::OpenMP_Fortran) +endif() + add_library(teca_alg ${teca_alg_cxx_srcs} ${teca_alg_cuda_srcs} ${teca_alg_f90_srcs}) target_link_libraries(teca_alg teca_data teca_core ${teca_alg_link}) diff --git a/alg/gfdl_tc_candidates.f90.in b/alg/gfdl_tc_candidates.f90.in index 3ce36d9fd..963b1c5af 100644 --- a/alg/gfdl_tc_candidates.f90.in +++ b/alg/gfdl_tc_candidates.f90.in @@ -262,6 +262,16 @@ integer(c_int) function gfdl_tc_candidates_@decorator@( & call splie3_@decorator@(rlon, thick, thick_dx) ! loop over grid & look for storms + !$omp parallel do schedule(dynamic) default(none) & + !$omp& shared(tc_table,can_id,storm_id,nx,nx2,ix,jx,ixp6, & + !$omp& jxp6, max_core_radius,min_vort,vort_win_size,max_psl_dy,max_psl_dr, & + !$omp& max_twc_dy,max_twc_dr,max_thick_dy,max_thick_dr,Gwind,Gvort,Gtbar,Gpsl, & + !$omp& Gthick,Grlat,Grlon,Gnlat,Gnlon,frprm_itmax,time_step,rlon,rlat, & + !$omp& vort,wind,psl,psl_dx,psl_dy,tbar,tbar_dx,tbar_dy,thick,thick_dx,thick_dy) & + !$omp& private(im,ip,jm,jp,ierr_pos,ierr_mag,wind_max,psl_min,twc_max,thick_max,lon_vort, & + !$omp& lon_psl,lon_twc,lon_thick,lat_vort,lat_psl,lat_twc,lat_thick,exist_twc, & + !$omp& exist_thick,p,xx,yy,rr,fret,have_thick,have_twc,can_ij,w_msg) & + !$omp& firstprivate(nxp1,jxp3,ixp3) do j = nxp1,jxp3 do i = nxp1,ixp3 @@ -276,6 +286,7 @@ integer(c_int) function gfdl_tc_candidates_@decorator@( & .or. (vort(i,j) .lt. min_vort)) & cycle + !$omp atomic can_id = can_id + 1 lon_vort = rlon(i) @@ -337,7 +348,9 @@ integer(c_int) function gfdl_tc_candidates_@decorator@( & endif ! --- we have strom a candidate + !$omp atomic storm_id = storm_id + 1 + !$omp end atomic ! --- step 3: check for presence of a warm core exist_twc = .false. @@ -422,13 +435,16 @@ integer(c_int) function gfdl_tc_candidates_@decorator@( & wind_max = maxval(wind(im:ip,jm:jp)) + !$omp critical call teca_tc_append_candidate_@decorator@( & storm_id, lon_psl, lat_psl, wind_max, vort(i,j), & psl_min, have_twc, have_thick, twc_max, thick_max, & tc_table) + !$omp end critical end do end do + !$omp end parallel do deallocate(rlon) deallocate(rlat) diff --git a/test/test_tc_candidates.cpp b/test/test_tc_candidates.cpp index 0a880e466..f28aab607 100644 --- a/test/test_tc_candidates.cpp +++ b/test/test_tc_candidates.cpp @@ -21,6 +21,7 @@ #include "teca_system_interface.h" #include "teca_system_util.h" #include "teca_mpi.h" +#include "teca_thread_util.h" #include #include @@ -45,7 +46,7 @@ int main(int argc, char **argv) int have_baseline = 0; long start_index = 0; long end_index = -1; - unsigned int n_threads = 1; + int n_threads = -1; string ux_850mb; string uy_850mb; string ux_surf; @@ -153,7 +154,7 @@ int main(int argc, char **argv) map_reduce->set_start_index(start_index); map_reduce->set_end_index(end_index); map_reduce->set_verbose(1); - map_reduce->set_thread_pool_size(n_threads); + map_reduce->set_thread_pool_size(1); //n_threads); // sort results in time p_teca_table_sort sort = teca_table_sort::New(); From bf939b13a3684b7ed9a055679581aaf7775e3744 Mon Sep 17 00:00:00 2001 From: Burlen Loring Date: Fri, 23 Jun 2023 15:59:46 -0700 Subject: [PATCH 3/6] add core_init a place to set OpenMP control variables this is mostly a noop at the moment since OpenMP variables are different w and wo MPI and we can't access MPI during library loading. --- core/CMakeLists.txt | 1 + core/teca_core_init.cxx | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 core/teca_core_init.cxx diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 0e232601c..b07f0d591 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -8,6 +8,7 @@ set(teca_core_srcs teca_bad_cast.cxx teca_binary_stream.cxx teca_common.cxx + teca_core_init.cxx teca_dataset.cxx teca_dataset_capture.cxx teca_index_executive.cxx diff --git a/core/teca_core_init.cxx b/core/teca_core_init.cxx new file mode 100644 index 000000000..dcd104b4b --- /dev/null +++ b/core/teca_core_init.cxx @@ -0,0 +1,38 @@ +#include +#include +#include +#include + +__attribute__((constructor)) void init(void) +{ +#if defined(TECA_DEBUG) + std::cerr << "teca_core initializing ... " << std::endl; +#endif +/* TODO -- problems with MPI + * with multiple MPI ranks per node, MPI OMP_PROC_BIND should be false, otherwise true. + * we have no way here to know if MPI is in use and how many ranks per node. + * these variables need to be initialized here to have any affect. + if (!getenv("OMP_NUM_THREADS")) + { + int n_threads = std::max(1u, std::thread::hardware_concurrency() / 2); + setenv("OMP_NUM_THREADS", std::to_string(n_threads).c_str(), 1); + } + + if (!getenv("OMP_PROC_BIND")) + setenv("OMP_PROC_BIND", "true", 1); + + if (!getenv("OMP_PLACES")) + setenv("OMP_PLACES", "cores", 1); +*/ +#if defined(TECA_DEBUG) + setenv("OMP_DISPLAY_ENV", "true", 1); + setenv("OMP_DISPLAY_AFFINITY", "true", 1); +#endif +} + +#if defined(TECA_DEBUG) +__attribute__((destructor)) void fini(void) +{ + std::cerr << "teca_core finalizing ... " << std::endl; +} +#endif From d6d439ba67f6b455399266fcf3967aeccb252216 Mon Sep 17 00:00:00 2001 From: Burlen Loring Date: Thu, 6 Jul 2023 11:21:04 -0700 Subject: [PATCH 4/6] tc_candiates explicitly set the number of OpenMP threads --- .travis.yml | 2 +- alg/gfdl_tc_candidates.f90.in | 8 ++-- alg/gfdl_tc_candidates.h | 7 +-- alg/teca_tc_candidates.cxx | 8 +++- alg/teca_tc_candidates.h | 17 +++++++ apps/teca_tc_detect.cpp | 8 ++++ test/CMakeLists.txt | 18 ++++++-- test/python/CMakeLists.txt | 18 ++++++-- test/python/test_tc_candidates.py | 60 +++++++++++++++---------- test/test_tc_candidates.cpp | 73 ++++++++++++++++++------------- 10 files changed, 148 insertions(+), 71 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9280c5ddd..47d740ba0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,7 +19,7 @@ env: - BUILD_TYPE=Debug - TECA_DIR=/travis_teca_dir - TECA_PYTHON_VERSION=3 - - TECA_DATA_REVISION=157 + - TECA_DATA_REVISION=158 jobs: - DOCKER_IMAGE=ubuntu IMAGE_VERSION=20.04 IMAGE_NAME=ubuntu_20_04 REQUIRE_NETCDF_MPI=TRUE - DOCKER_IMAGE=ubuntu IMAGE_VERSION=20.04 IMAGE_NAME=ubuntu_20_04 REQUIRE_NETCDF_MPI=FALSE diff --git a/alg/gfdl_tc_candidates.f90.in b/alg/gfdl_tc_candidates.f90.in index 963b1c5af..8d4b966b6 100644 --- a/alg/gfdl_tc_candidates.f90.in +++ b/alg/gfdl_tc_candidates.f90.in @@ -139,8 +139,8 @@ integer(c_int) function gfdl_tc_candidates_@decorator@( & max_psl_dy, max_psl_dr, max_twc_dy, max_twc_dr, & max_thick_dy, max_thick_dr, & Gwind, Gvort, Gtbar, Gpsl, Gthick, Grlat, & - Grlon, Gnlat, Gnlon, frprm_itmax, time_step, & - tc_table) result(ret_val) bind(C) + Grlon, Gnlat, Gnlon, frprm_itmax, n_threads, & + time_step, tc_table) result(ret_val) bind(C) use spline_@decorator@_module, only : splie2_@decorator@, & splie3_@decorator@, frprm_@decorator@, shape_@decorator@ @@ -157,7 +157,7 @@ integer(c_int) function gfdl_tc_candidates_@decorator@( & dimension(Gnlon, Gnlat) :: Gwind, Gvort, Gtbar, Gpsl, Gthick @iso_c_type_coord@, intent(in), dimension(Gnlon) :: Grlon @iso_c_type_coord@, intent(in), dimension(Gnlat) :: Grlat - integer(c_int), intent(in) :: frprm_itmax + integer(c_int), intent(in) :: frprm_itmax, n_threads type(c_ptr), intent(inout) :: tc_table @iso_c_type_var@, parameter :: ftol = 0.01 @@ -262,7 +262,7 @@ integer(c_int) function gfdl_tc_candidates_@decorator@( & call splie3_@decorator@(rlon, thick, thick_dx) ! loop over grid & look for storms - !$omp parallel do schedule(dynamic) default(none) & + !$omp parallel do schedule(dynamic) num_threads(n_threads) default(none) & !$omp& shared(tc_table,can_id,storm_id,nx,nx2,ix,jx,ixp6, & !$omp& jxp6, max_core_radius,min_vort,vort_win_size,max_psl_dy,max_psl_dr, & !$omp& max_twc_dy,max_twc_dr,max_thick_dy,max_thick_dr,Gwind,Gvort,Gtbar,Gpsl, & diff --git a/alg/gfdl_tc_candidates.h b/alg/gfdl_tc_candidates.h index 3ce116cb3..d2b771565 100644 --- a/alg/gfdl_tc_candidates.h +++ b/alg/gfdl_tc_candidates.h @@ -15,7 +15,8 @@ int gfdl_tc_candidates_c ## _c_name ## _v ## _v_name ( \ const _v_type *Gvort, const _v_type *Gtbar, \ const _v_type *Gpsl, const _v_type *Gthick, \ const _c_type *Grlat, const _c_type *Grlon, long *Gnlat, \ - long *Gnlon, int *frprm_itmax, long *step, void *atable); \ + long *Gnlon, int *frprm_itmax, int *n_threads, long *step, \ + void *atable); \ \ namespace teca_gfdl { \ int tc_candidates( \ @@ -25,13 +26,13 @@ int tc_candidates( \ const _v_type *Gwind, const _v_type *Gvort, const _v_type *Gtbar, \ const _v_type *Gpsl, const _v_type *Gthick, const _c_type *Grlat, \ const _c_type *Grlon, long Gnlat, long Gnlon, int frprm_itmax, \ - long step, void *atable) \ + int n_threads, long step, void *atable) \ { \ return gfdl_tc_candidates_c ## _c_name ## _v ## _v_name ( \ &core_rad, &min_vort, &vort_win, &max_psl_dy, &max_psl_dr, \ &max_twc_dy, &max_twc_dr, &max_thick_dy, &max_thick_dr, \ Gwind, Gvort, Gtbar, Gpsl, Gthick, Grlat, Grlon, &Gnlat, \ - &Gnlon, &frprm_itmax, &step, atable); \ + &Gnlon, &frprm_itmax, &n_threads, &step, atable); \ } \ }; diff --git a/alg/teca_tc_candidates.cxx b/alg/teca_tc_candidates.cxx index 7eecf4b8d..8c4590b7b 100644 --- a/alg/teca_tc_candidates.cxx +++ b/alg/teca_tc_candidates.cxx @@ -44,7 +44,8 @@ teca_tc_candidates::teca_tc_candidates() : search_lat_high(0.0), search_lon_low(1.0), search_lon_high(0.0), - minimizer_iterations(50) + minimizer_iterations(50), + omp_num_threads(1) { this->set_number_of_input_connections(1); this->set_number_of_output_ports(1); @@ -99,6 +100,8 @@ void teca_tc_candidates::get_properties_description( "lowest longitude in degrees to search for stroms") TECA_POPTS_GET(double, prefix, search_lon_high, "highest longitude in degrees to search for storms") + TECA_POPTS_GET(int, prefix, omp_num_threads, + "the number of OpenMP threads to use in the main detector loop") ; this->teca_algorithm::get_properties_description(prefix, opts); @@ -129,6 +132,7 @@ void teca_tc_candidates::set_properties( TECA_POPTS_SET(opts, double, prefix, search_lat_low) TECA_POPTS_SET(opts, double, prefix, search_lon_high) TECA_POPTS_SET(opts, double, prefix, search_lon_low) + TECA_POPTS_SET(opts, int, prefix, omp_num_threads) } #endif @@ -434,7 +438,7 @@ const_p_teca_dataset teca_tc_candidates::execute(unsigned int port, this->max_core_temperature_delta, this->max_core_temperature_radius, this->max_thickness_delta, this->max_thickness_radius, v, w, T, P, th, lat, lon, nlat, nlon, this->minimizer_iterations, - time_step, candidates.get())) + this->omp_num_threads, time_step, candidates.get())) { TECA_FATAL_ERROR("GFDL TC detector encountered an error") return nullptr; diff --git a/alg/teca_tc_candidates.h b/alg/teca_tc_candidates.h index e3ba93ce0..84115e296 100644 --- a/alg/teca_tc_candidates.h +++ b/alg/teca_tc_candidates.h @@ -109,6 +109,21 @@ class TECA_EXPORT teca_tc_candidates : public teca_algorithm TECA_ALGORITHM_PROPERTY(double, search_lon_high) ///@} + /** @name omp_num_threads + * Set the number of OpenMP threads. + * + * @warning This is an experimaental setting and it is recommended to use + * this when running only 1 MPI rank per node. + * + * Using multiple OpenMP threads can speed up single step processing times + * but may interfere with TECA's MPI+threads pipeline execution. We + * haven't yet found a way to make OpenMP play nicely with our own internal + * threading. + */ + ///@{ + TECA_ALGORITHM_PROPERTY(int, omp_num_threads) + ///@} + /// send human readable representation to the stream virtual void to_stream(std::ostream &os) const override; @@ -161,6 +176,8 @@ class TECA_EXPORT teca_tc_candidates : public teca_algorithm double search_lon_high; int minimizer_iterations; + + int omp_num_threads; }; #endif diff --git a/apps/teca_tc_detect.cpp b/apps/teca_tc_detect.cpp index 28cf45a8b..a51b67e51 100644 --- a/apps/teca_tc_detect.cpp +++ b/apps/teca_tc_detect.cpp @@ -101,6 +101,9 @@ int main(int argc, char **argv) ("n_threads", value()->default_value(-1), "\nSets the thread pool size on each" " MPI rank. When the default value of -1 is used TECA will coordinate the thread" " pools across ranks such each thread is bound to a unique physical core.\n") + ("n_omp_threads", value()->default_value(1), "\nSets the number of OpenMP threads\n" + " to use in the main detector loop. WARNING: This is experimental, see\n" + " teca_tc_candidates Doxygen documentation for details\n") ("help", "\ndisplays documentation for application specific command line options\n") ("advanced_help", "\ndisplays documentation for algorithm specific command line options\n") @@ -177,6 +180,7 @@ int main(int argc, char **argv) candidates->set_max_thickness_radius(4.0); candidates->set_search_lat_low(-80.0); candidates->set_search_lat_high(80.0); + candidates->set_omp_num_threads(1); candidates->get_properties_description("candidates", advanced_opt_defs); p_teca_table_reduce map_reduce = teca_table_reduce::New(); @@ -335,6 +339,10 @@ int main(int argc, char **argv) candidates->set_search_lat_high( opt_vals["highest_lat"].as()); + if (!opt_vals["n_omp_threads"].defaulted()) + candidates->set_omp_num_threads( + opt_vals["n_omp_threads"].as()); + if (!opt_vals["first_step"].defaulted()) map_reduce->set_start_index(opt_vals["first_step"].as()); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index bf01bf68c..c8bb51701 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -368,15 +368,25 @@ teca_add_test(test_binary_stream_mpi teca_add_test(test_tc_candidates_serial COMMAND test_tc_candidates "${TECA_DATA_ROOT}/test_tc_candidates_1990_07_0[12]\\.nc" - "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 1 + "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 1 1 U850 V850 UBOT VBOT PSL T500 T200 Z1000 Z200 -20 20 FEATURES ${TECA_HAS_NETCDF} ${TECA_HAS_UDUNITS} ${TECA_SERIAL_TESTS} REQ_TECA_DATA) +teca_add_test(test_tc_candidates_omp + COMMAND test_tc_candidates + "${TECA_DATA_ROOT}/test_tc_candidates_1990_07_0[12]\\.nc" + "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 1 ${TEST_CORES} + U850 V850 UBOT VBOT PSL T500 T200 Z1000 Z200 -20 20 + FEATURES ${TECA_HAS_NETCDF} ${TECA_HAS_UDUNITS} + REQ_TECA_DATA) +set_property(TEST test_tc_candidates_omp APPEND PROPERTY ENVIRONMENT + "OMP_DISPLAY_ENV=true;OMP_DISPLAY_AFFINITY=true;OMP_PLACES=cores;OMP_PROC_BIND=true") + teca_add_test(test_tc_candidates_mpi COMMAND ${MPIEXEC} -n ${TEST_CORES} test_tc_candidates "${TECA_DATA_ROOT}/test_tc_candidates_1990_07_0[12]\\.nc" - "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 1 + "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 1 1 U850 V850 UBOT VBOT PSL T500 T200 Z1000 Z200 -20 20 FEATURES ${TECA_HAS_NETCDF} ${TECA_HAS_UDUNITS} ${TECA_HAS_MPI} REQ_TECA_DATA) @@ -387,7 +397,7 @@ teca_add_test(test_tc_candidates_threads LIBS teca_core teca_data teca_io teca_alg ${teca_test_link} COMMAND test_tc_candidates "${TECA_DATA_ROOT}/test_tc_candidates_1990_07_0[12]\\.nc" - "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 ${TEST_CORES} + "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 ${TEST_CORES} 1 U850 V850 UBOT VBOT PSL T500 T200 Z1000 Z200 -20 20 FEATURES ${TECA_HAS_NETCDF} ${TECA_HAS_UDUNITS} REQ_TECA_DATA) @@ -395,7 +405,7 @@ teca_add_test(test_tc_candidates_threads teca_add_test(test_tc_candidates_mpi_threads COMMAND ${MPIEXEC} -n ${HALF_TEST_CORES} test_tc_candidates "${TECA_DATA_ROOT}/test_tc_candidates_1990_07_0[12]\\.nc" - "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 2 + "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 2 1 U850 V850 UBOT VBOT PSL T500 T200 Z1000 Z200 -20 20 FEATURES ${TECA_HAS_NETCDF} ${TECA_HAS_UDUNITS} ${TECA_HAS_MPI} ${TEST_MPI_THREADS} diff --git a/test/python/CMakeLists.txt b/test/python/CMakeLists.txt index 5ce7dddb0..49b9d85cc 100644 --- a/test/python/CMakeLists.txt +++ b/test/python/CMakeLists.txt @@ -313,16 +313,26 @@ teca_add_test(py_test_temporal_monthly_percentile_vv_mpi teca_add_test(py_test_tc_candidates_serial COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_tc_candidates.py "${TECA_DATA_ROOT}/test_tc_candidates_1990_07_0[12]\\.nc" - "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 1 + "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 1 1 U850 V850 UBOT VBOT PSL T500 T200 Z1000 Z200 -20 20 FEATURES ${TECA_HAS_NETCDF} ${TECA_HAS_UDUNITS} ${TECA_SERIAL_TESTS} REQ_TECA_DATA) +teca_add_test(py_test_tc_candidates_omp + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_tc_candidates.py + "${TECA_DATA_ROOT}/test_tc_candidates_1990_07_0[12]\\.nc" + "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 1 ${TEST_CORES} + U850 V850 UBOT VBOT PSL T500 T200 Z1000 Z200 -20 20 + FEATURES ${TECA_HAS_NETCDF} ${TECA_HAS_UDUNITS} + REQ_TECA_DATA) +set_property(TEST py_test_tc_candidates_omp APPEND PROPERTY ENVIRONMENT + "OMP_DISPLAY_ENV=true;OMP_DISPLAY_AFFINITY=true;OMP_PLACES=cores;OMP_PROC_BIND=true") + teca_add_test(py_test_tc_candidates_mpi COMMAND ${MPIEXEC} -n ${TEST_CORES} ${PYTHON_EXECUTABLE} -m mpi4py ${CMAKE_CURRENT_SOURCE_DIR}/test_tc_candidates.py "${TECA_DATA_ROOT}/test_tc_candidates_1990_07_0[12]\\.nc" - "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 1 + "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 1 1 U850 V850 UBOT VBOT PSL T500 T200 Z1000 Z200 -20 20 FEATURES ${TECA_HAS_NETCDF} ${TECA_HAS_UDUNITS} ${TECA_HAS_MPI} REQ_TECA_DATA) @@ -330,7 +340,7 @@ teca_add_test(py_test_tc_candidates_mpi teca_add_test(py_test_tc_candidates_threads COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_tc_candidates.py "${TECA_DATA_ROOT}/test_tc_candidates_1990_07_0[12]\\.nc" - "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 ${TEST_CORES} + "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 ${TEST_CORES} 1 U850 V850 UBOT VBOT PSL T500 T200 Z1000 Z200 -20 20 FEATURES ${TECA_HAS_NETCDF} ${TECA_HAS_UDUNITS} ${TECA_HAS_MPI} REQ_TECA_DATA) @@ -339,7 +349,7 @@ teca_add_test(py_test_tc_candidates_mpi_threads COMMAND ${MPIEXEC} -n ${HALF_TEST_CORES} ${PYTHON_EXECUTABLE} -m mpi4py ${CMAKE_CURRENT_SOURCE_DIR}/test_tc_candidates.py "${TECA_DATA_ROOT}/test_tc_candidates_1990_07_0[12]\\.nc" - "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 2 + "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 2 1 U850 V850 UBOT VBOT PSL T500 T200 Z1000 Z200 -20 20 FEATURES ${TECA_HAS_NETCDF} ${TECA_HAS_UDUNITS} ${TECA_HAS_MPI} ${TEST_MPI_THREADS} REQ_TECA_DATA) diff --git a/test/python/test_tc_candidates.py b/test/python/test_tc_candidates.py index d2496b10b..7c8c1c11e 100644 --- a/test/python/test_tc_candidates.py +++ b/test/python/test_tc_candidates.py @@ -59,33 +59,35 @@ def execute(port, data_in, req): return out_mesh return execute -if (len(sys.argv) != 17): +if (len(sys.argv) != 18): sys.stderr.write('\n\nUsage error:\n' \ 'test_tc_candidates [input regex] [output] [first step] [last step] [n threads] ' \ - '[850 mb wind x] [850 mb wind y] [surface wind x] [surface wind y] [surface pressure] ' \ - '[500 mb temp] [200 mb temp] [1000 mb z] [200 mb z] [low lat] [high lat]\n\n') + '[n_omp_threads] [850 mb wind x] [850 mb wind y] [surface wind x] [surface wind y] ' \ + '[surface pressure] [500 mb temp] [200 mb temp] [1000 mb z] [200 mb z] [low lat] ' \ + '[high lat]\n\n') sys.exit(-1) # parse command line -regex = sys.argv[1]; -baseline = sys.argv[2]; -start_index = int(sys.argv[3]); -end_index = int(sys.argv[4]); -n_threads = int(sys.argv[5]); -ux_850mb = sys.argv[6]; -uy_850mb = sys.argv[7]; -ux_surf = sys.argv[8]; -uy_surf = sys.argv[9]; -P_surf = sys.argv[10]; -T_500mb = sys.argv[11]; -T_200mb = sys.argv[12]; -z_1000mb = sys.argv[13]; -z_200mb = sys.argv[14]; -low_lat = float(sys.argv[15]); -high_lat = float(sys.argv[16]); +regex = sys.argv[1] +baseline = sys.argv[2] +start_index = int(sys.argv[3]) +end_index = int(sys.argv[4]) +n_threads = int(sys.argv[5]) +n_omp_threads = int(sys.argv[6]) +ux_850mb = sys.argv[7] +uy_850mb = sys.argv[8] +ux_surf = sys.argv[9] +uy_surf = sys.argv[10] +P_surf = sys.argv[11] +T_500mb = sys.argv[12] +T_200mb = sys.argv[13] +z_1000mb = sys.argv[14] +z_200mb = sys.argv[15] +low_lat = float(sys.argv[16]) +high_lat = float(sys.argv[17]) if (rank == 0): - sys.stderr.write('Testing on %d MPI processes %d threads\n'%(n_ranks, n_threads)) + sys.stderr.write('Testing on %d MPI processes %d threads %d omp threads\n'%(n_ranks, n_threads, n_omp_threads)) # create the pipeline objects @@ -146,6 +148,7 @@ def execute(port, data_in, req): cand.set_search_lat_high(high_lat) #cand.set_search_lon_low() #cand.set_search_lon_high() +cand.set_omp_num_threads(n_omp_threads) # map-reduce map_reduce = teca_table_reduce.New() @@ -155,10 +158,11 @@ def execute(port, data_in, req): map_reduce.set_verbose(1) map_reduce.set_thread_pool_size(n_threads) -# sort results in time +# sort results by wind speed, this is gives the test output a deterministic +# order independent of how many OpenMP threads are used sort = teca_table_sort.New() sort.set_input_connection(map_reduce.get_output_port()) -sort.set_index_column('storm_id') +sort.set_index_column('surface_wind') # compute dates cal = teca_table_calendar.New() @@ -166,14 +170,26 @@ def execute(port, data_in, req): do_test = system_util.get_environment_variable_bool('TECA_DO_TEST', True) if do_test and os.path.exists(baseline): + # run the test + sys.stderr.write('running the test ... \n') + table_reader = teca_table_reader.New() table_reader.set_file_name(baseline) + diff = teca_dataset_diff.New() diff.set_input_connection(0, table_reader.get_output_port()) diff.set_input_connection(1, cal.get_output_port()) + diff.set_verbose(1) + + # depends on the number of OpenMP threads + diff.set_skip_array('storm_id') + diff.update() + else: # write the data + sys.stderr.write('generating the baseling "%s"\n'%(baseline)) + table_writer = teca_table_writer.New() table_writer.set_input_connection(cal.get_output_port()) table_writer.set_file_name(baseline) diff --git a/test/test_tc_candidates.cpp b/test/test_tc_candidates.cpp index f28aab607..9be3f7f99 100644 --- a/test/test_tc_candidates.cpp +++ b/test/test_tc_candidates.cpp @@ -27,7 +27,6 @@ #include #include -using namespace std; using namespace teca_derived_quantity_numerics; @@ -41,31 +40,33 @@ int main(int argc, char **argv) teca_system_interface::set_stack_trace_on_mpi_error(); // parse command line - string regex; - string baseline; + std::string regex; + std::string baseline; int have_baseline = 0; long start_index = 0; long end_index = -1; int n_threads = -1; - string ux_850mb; - string uy_850mb; - string ux_surf; - string uy_surf; - string P_surf; - string T_500mb; - string T_200mb; - string z_1000mb; - string z_200mb; + int n_omp_threads = 1; + std::string ux_850mb; + std::string uy_850mb; + std::string ux_surf; + std::string uy_surf; + std::string P_surf; + std::string T_500mb; + std::string T_200mb; + std::string z_1000mb; + std::string z_200mb; double low_lat = 0; double high_lat = -1; - if (argc != 17) + if (argc != 18) { - cerr << endl << "Usage error:" << endl + std::cerr << std::endl << "Usage error:" << std::endl << "test_tc_candidates [input regex] [output] [first step] [last step] [n threads] " - "[850 mb wind x] [850 mb wind y] [surface wind x] [surface wind y] [surface pressure] " - "[500 mb temp] [200 mb temp] [1000 mb z] [200 mb z] [low lat] [high lat]" - << endl << endl; + "[n_omp_threads] [850 mb wind x] [850 mb wind y] [surface wind x] [surface wind y] " + "[surface pressure] [500 mb temp] [200 mb temp] [1000 mb z] [200 mb z] [low lat] " + "[high lat]" + << std::endl << std::endl; return -1; } @@ -77,17 +78,18 @@ int main(int argc, char **argv) start_index = atoi(argv[3]); end_index = atoi(argv[4]); n_threads = atoi(argv[5]); - ux_850mb = argv[6]; - uy_850mb = argv[7]; - ux_surf = argv[8]; - uy_surf = argv[9]; - P_surf = argv[10]; - T_500mb = argv[11]; - T_200mb = argv[12]; - z_1000mb = argv[13]; - z_200mb = argv[14]; - low_lat = atof(argv[15]); - high_lat = atof(argv[16]); + n_omp_threads = atoi(argv[6]); + ux_850mb = argv[7]; + uy_850mb = argv[8]; + ux_surf = argv[9]; + uy_surf = argv[10]; + P_surf = argv[11]; + T_500mb = argv[12]; + T_200mb = argv[13]; + z_1000mb = argv[14]; + z_200mb = argv[15]; + low_lat = atof(argv[16]); + high_lat = atof(argv[17]); // create the pipeline objects p_teca_cf_reader cf_reader = teca_cf_reader::New(); @@ -147,6 +149,7 @@ int main(int argc, char **argv) cand->set_search_lat_high(high_lat); //cand->set_search_lon_low(); //cand->set_search_lon_high(); + cand->set_omp_num_threads(n_omp_threads); // map-reduce p_teca_table_reduce map_reduce = teca_table_reduce::New(); @@ -154,12 +157,12 @@ int main(int argc, char **argv) map_reduce->set_start_index(start_index); map_reduce->set_end_index(end_index); map_reduce->set_verbose(1); - map_reduce->set_thread_pool_size(1); //n_threads); + map_reduce->set_thread_pool_size(n_threads); // sort results in time p_teca_table_sort sort = teca_table_sort::New(); sort->set_input_connection(map_reduce->get_output_port()); - sort->set_index_column("storm_id"); + sort->set_index_column("surface_wind"); // compute dates p_teca_table_calendar cal = teca_table_calendar::New(); @@ -171,12 +174,19 @@ int main(int argc, char **argv) if (do_test && have_baseline) { // run the test + if (rank == 0) + std::cerr << "running the test ... " << std::endl; + p_teca_table_reader table_reader = teca_table_reader::New(); table_reader->set_file_name(baseline); p_teca_dataset_diff diff = teca_dataset_diff::New(); diff->set_input_connection(0, table_reader->get_output_port()); diff->set_input_connection(1, cal->get_output_port()); + diff->set_verbose(1); + + // storm id is non-deterministic when OpenMP threading is used + diff->set_skip_array("storm_id"); diff->update(); } @@ -184,7 +194,8 @@ int main(int argc, char **argv) { // make a baseline if (rank == 0) - cerr << "generating baseline image " << baseline << endl; + std::cerr << "generating baseline image " << baseline << std::endl; + p_teca_table_writer table_writer = teca_table_writer::New(); table_writer->set_input_connection(cal->get_output_port()); table_writer->set_file_name(baseline.c_str()); From 88f06b90bc0e88ef7ee82ecf86379f5c0af94cdc Mon Sep 17 00:00:00 2001 From: Burlen Loring Date: Fri, 18 Aug 2023 13:42:05 -0700 Subject: [PATCH 5/6] test fix do_test control in event_filter test --- test/test_event_filter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_event_filter.cpp b/test/test_event_filter.cpp index 5b89f2166..8702a2e6f 100644 --- a/test/test_event_filter.cpp +++ b/test/test_event_filter.cpp @@ -57,7 +57,7 @@ int main(int argc, char **argv) bool do_test = true; teca_system_util::get_environment_variable("TECA_DO_TEST", do_test); - if (teca_file_util::file_exists(baseline.c_str())) + if (do_test && teca_file_util::file_exists(baseline.c_str())) { // run the test p_teca_table_reader baseline_table_reader = teca_table_reader::New(); From a01ecccc035bb5d7afeb2921b52d94b3217ed9da Mon Sep 17 00:00:00 2001 From: Burlen Loring Date: Tue, 22 Aug 2023 12:44:32 -0700 Subject: [PATCH 6/6] test_tc_candidates add control over the max iterations --- test/CMakeLists.txt | 10 +++++----- test/test_tc_candidates.cpp | 7 +++++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c8bb51701..a392130da 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -369,7 +369,7 @@ teca_add_test(test_tc_candidates_serial COMMAND test_tc_candidates "${TECA_DATA_ROOT}/test_tc_candidates_1990_07_0[12]\\.nc" "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 1 1 - U850 V850 UBOT VBOT PSL T500 T200 Z1000 Z200 -20 20 + U850 V850 UBOT VBOT PSL T500 T200 Z1000 Z200 -20 20 50 FEATURES ${TECA_HAS_NETCDF} ${TECA_HAS_UDUNITS} ${TECA_SERIAL_TESTS} REQ_TECA_DATA) @@ -377,7 +377,7 @@ teca_add_test(test_tc_candidates_omp COMMAND test_tc_candidates "${TECA_DATA_ROOT}/test_tc_candidates_1990_07_0[12]\\.nc" "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 1 ${TEST_CORES} - U850 V850 UBOT VBOT PSL T500 T200 Z1000 Z200 -20 20 + U850 V850 UBOT VBOT PSL T500 T200 Z1000 Z200 -20 20 50 FEATURES ${TECA_HAS_NETCDF} ${TECA_HAS_UDUNITS} REQ_TECA_DATA) set_property(TEST test_tc_candidates_omp APPEND PROPERTY ENVIRONMENT @@ -387,7 +387,7 @@ teca_add_test(test_tc_candidates_mpi COMMAND ${MPIEXEC} -n ${TEST_CORES} test_tc_candidates "${TECA_DATA_ROOT}/test_tc_candidates_1990_07_0[12]\\.nc" "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 1 1 - U850 V850 UBOT VBOT PSL T500 T200 Z1000 Z200 -20 20 + U850 V850 UBOT VBOT PSL T500 T200 Z1000 Z200 -20 20 50 FEATURES ${TECA_HAS_NETCDF} ${TECA_HAS_UDUNITS} ${TECA_HAS_MPI} REQ_TECA_DATA) @@ -398,7 +398,7 @@ teca_add_test(test_tc_candidates_threads COMMAND test_tc_candidates "${TECA_DATA_ROOT}/test_tc_candidates_1990_07_0[12]\\.nc" "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 ${TEST_CORES} 1 - U850 V850 UBOT VBOT PSL T500 T200 Z1000 Z200 -20 20 + U850 V850 UBOT VBOT PSL T500 T200 Z1000 Z200 -20 20 50 FEATURES ${TECA_HAS_NETCDF} ${TECA_HAS_UDUNITS} REQ_TECA_DATA) @@ -406,7 +406,7 @@ teca_add_test(test_tc_candidates_mpi_threads COMMAND ${MPIEXEC} -n ${HALF_TEST_CORES} test_tc_candidates "${TECA_DATA_ROOT}/test_tc_candidates_1990_07_0[12]\\.nc" "${TECA_DATA_ROOT}/test_tc_candidates_20.bin" 0 3 2 1 - U850 V850 UBOT VBOT PSL T500 T200 Z1000 Z200 -20 20 + U850 V850 UBOT VBOT PSL T500 T200 Z1000 Z200 -20 20 50 FEATURES ${TECA_HAS_NETCDF} ${TECA_HAS_UDUNITS} ${TECA_HAS_MPI} ${TEST_MPI_THREADS} REQ_TECA_DATA) diff --git a/test/test_tc_candidates.cpp b/test/test_tc_candidates.cpp index 9be3f7f99..5623da0cb 100644 --- a/test/test_tc_candidates.cpp +++ b/test/test_tc_candidates.cpp @@ -58,14 +58,15 @@ int main(int argc, char **argv) std::string z_200mb; double low_lat = 0; double high_lat = -1; + int max_it = 50; - if (argc != 18) + if (argc != 19) { std::cerr << std::endl << "Usage error:" << std::endl << "test_tc_candidates [input regex] [output] [first step] [last step] [n threads] " "[n_omp_threads] [850 mb wind x] [850 mb wind y] [surface wind x] [surface wind y] " "[surface pressure] [500 mb temp] [200 mb temp] [1000 mb z] [200 mb z] [low lat] " - "[high lat]" + "[high lat] [max it]" << std::endl << std::endl; return -1; } @@ -90,6 +91,7 @@ int main(int argc, char **argv) z_200mb = argv[15]; low_lat = atof(argv[16]); high_lat = atof(argv[17]); + max_it = atoi(argv[18]); // create the pipeline objects p_teca_cf_reader cf_reader = teca_cf_reader::New(); @@ -150,6 +152,7 @@ int main(int argc, char **argv) //cand->set_search_lon_low(); //cand->set_search_lon_high(); cand->set_omp_num_threads(n_omp_threads); + cand->set_minimizer_iterations(max_it); // map-reduce p_teca_table_reduce map_reduce = teca_table_reduce::New();