From 144928ff4118c6b7576f0522d76453836cf31143 Mon Sep 17 00:00:00 2001 From: Burlen Loring Date: Wed, 20 Sep 2023 09:45:58 -0700 Subject: [PATCH 1/2] buffer cmake option for page locked memory in host transfers --- CMakeLists.txt | 8 ++++++++ hamr_buffer_impl.h | 11 +++++++---- hamr_config.cmake.in | 1 + hamr_config.h.in | 1 + 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c7b014c..b81ec9b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -158,6 +158,14 @@ else() message(STATUS "HAMR: User defined objects -- disabled") endif() +# page locked memory for host transfers +set(HAMR_ENABLE_PAGE_LOCKED_MEMORY OFF CACHE BOOL + "Enables the use of page locked memory for host transfers.") +if (HAMR_ENABLE_PAGE_LOCKED_MEMORY) + message(STATUS "HAMR: Page locked memory for host transfers -- enabled") +else() + message(STATUS "HAMR: Page locked memory for host transfers -- disabled") +endif() # add the requisite flags. CMake enthusiasts will tell you that this is "not # the CMake way". However, CMake has spotty coverage, is inconsistent in diff --git a/hamr_buffer_impl.h b/hamr_buffer_impl.h index d3cda4e..6e9c65c 100644 --- a/hamr_buffer_impl.h +++ b/hamr_buffer_impl.h @@ -1768,8 +1768,9 @@ std::shared_ptr buffer::get_host_accessible() const else if ((m_alloc == allocator::cuda) || (m_alloc == allocator::cuda_async)) { // make a copy on the host. - std::shared_ptr tmp = malloc_allocator::allocate(m_size); - /*TODO:Using cudaMallocHost caused performance issues on Perlmutter +#if defined(HAMR_ENABLE_PAGE_LOCKED_MEMORY) + // Using cudaMallocHost caused performance issues on Perlmutter w. CUDA 11.7 + // however, page locked memory is required for asynchronous transfers. std::shared_ptr tmp = cuda_malloc_host_allocator::allocate(m_size); if (!tmp) { @@ -1777,8 +1778,10 @@ std::shared_ptr buffer::get_host_accessible() const " CUDA failed to allocate host pinned memory, falling back" " to the default system allocator." << std::endl; tmp = malloc_allocator::allocate(m_size); - }*/ - + } +#else + std::shared_ptr tmp = malloc_allocator::allocate(m_size); +#endif activate_cuda_device dev(m_owner); if (copy_to_host_from_cuda(m_stream, tmp.get(), m_data.get(), m_size)) diff --git a/hamr_config.cmake.in b/hamr_config.cmake.in index 12e0c10..70f4452 100644 --- a/hamr_config.cmake.in +++ b/hamr_config.cmake.in @@ -16,6 +16,7 @@ set(HAMR_NVHPC_CUDA @HAMR_NVHPC_CUDA@) set(HAMR_ENABLE_HIP @HAMR_ENABLE_HIP@) set(HAMR_ENABLE_OPENMP @HAMR_ENABLE_HIP@) set(HAMR_ENABLE_OBJECTS @HAMR_ENABLE_OBJECTS@) +set(HAMR_ENABLE_PAGE_LOCKED_MEMORY @HAMR_ENABLE_PAGE_LOCKED_MEMORY@) set(HAMR_ENABLE_PYTHON @HAMR_ENABLE_PYTHON@) set(HAMR_VERBOSE @HAMR_VERBOSE@) diff --git a/hamr_config.h.in b/hamr_config.h.in index f5a22bd..74fe8d8 100644 --- a/hamr_config.h.in +++ b/hamr_config.h.in @@ -13,6 +13,7 @@ #cmakedefine HAMR_ENABLE_OPENMP #define HAMR_OPENMP_LOOP @HAMR_OPENMP_LOOP@ #cmakedefine HAMR_ENABLE_OBJECTS +#cmakedefine HAMR_ENABLE_PAGE_LOCKED_MEMORY #cmakedefine HAMR_ENABLE_PYTHON #cmakedefine HAMR_VERBOSE From e6f1cf81e5d37275bb5b6484c06b975d20a91d67 Mon Sep 17 00:00:00 2001 From: Burlen Loring Date: Fri, 22 Sep 2023 14:52:58 -0700 Subject: [PATCH 2/2] copiers use PM memcpy equivalent when possible when possible use the progrmamming model equivalent of memcpy. This had inadvertantly been disabled. This avoids a kernel launch. --- hamr_copier_traits.h | 39 ++++++++++++++++ hamr_cuda_copy_async.cxx | 29 ++++-------- hamr_cuda_copy_async.h | 89 ++++++++++++++++--------------------- hamr_cuda_copy_async_impl.h | 88 +++++++++++++++--------------------- hamr_hip_copy.cxx | 29 ++++-------- hamr_hip_copy.h | 78 ++++++++++++++------------------ hamr_hip_copy_impl.h | 81 +++++++++++++++------------------ hamr_openmp_copy.cxx | 30 ++++--------- hamr_openmp_copy.h | 79 ++++++++++++++------------------ hamr_openmp_copy_impl.h | 77 ++++++++++++++------------------ hamr_openmp_print.h | 1 + 11 files changed, 274 insertions(+), 346 deletions(-) create mode 100644 hamr_copier_traits.h diff --git a/hamr_copier_traits.h b/hamr_copier_traits.h new file mode 100644 index 0000000..65c4f76 --- /dev/null +++ b/hamr_copier_traits.h @@ -0,0 +1,39 @@ +#ifndef hamr_copier_traits_h +#define hamr_copier_traits_h + +#include "hamr_config.h" +#include + +namespace hamr +{ +/// @name type trait that enables object copy +///@{ +template ::value || !std::is_arithmetic::value)> struct use_object_copier : std::false_type {}; +template struct use_object_copier : std::true_type {}; +template using use_object_copier_t = typename std::enable_if::value>::type; +///@} + + +/// @name type trait that enables POD copy from different types +///@{ +#if defined(HAMR_ENABLE_OBJECTS) +template ::value)> struct use_cons_copier : std::false_type {}; +template struct use_cons_copier : std::true_type {}; +template using use_cons_copier_t = typename std::enable_if::value>::type; +#else +template ::value && std::is_arithmetic::value)> struct use_cons_copier : std::false_type {}; +template struct use_cons_copier : std::true_type {}; +template using use_cons_copier_t = typename std::enable_if::value>::type; +#endif +///@} + +/// @name type trait that enables POD copy from the same types +///@{ +template ::value && std::is_arithmetic::value)> struct use_bytes_copier : std::false_type {}; +template struct use_bytes_copier : std::true_type {}; +template using use_bytes_copier_t = typename std::enable_if::value>::type; +///@} + +} + +#endif diff --git a/hamr_cuda_copy_async.cxx b/hamr_cuda_copy_async.cxx index 925541c..f76bb06 100644 --- a/hamr_cuda_copy_async.cxx +++ b/hamr_cuda_copy_async.cxx @@ -3,29 +3,18 @@ #include "hamr_cuda_copy_async.h" #include "hamr_cuda_copy_async_impl.h" -#if !defined(HAMR_ENABLE_OBJECTS) - -#define hamr_cuda_copy_async_instantiate_(T, U) \ -template int hamr::copy_to_cuda_from_host(cudaStream_t strm, T *dest, const U *src, size_t n_elem, void *); \ -template int hamr::copy_to_cuda_from_cuda(cudaStream_t strm, T *dest, const U *src, size_t n_elem, void *); \ -template int hamr::copy_to_cuda_from_cuda(cudaStream_t strm, T *dest, const U *src, int src_device, size_t n_elem, void *); \ -template int hamr::copy_to_host_from_cuda(cudaStream_t strm, T *dest, const U *src, size_t n_elem, void *); - -#else - #define hamr_cuda_copy_async_instantiate_(T, U) \ -template int hamr::copy_to_cuda_from_host(cudaStream_t strm, T *dest, const U *src, size_t n_elem); \ -template int hamr::copy_to_cuda_from_cuda(cudaStream_t strm, T *dest, const U *src, size_t n_elem); \ -template int hamr::copy_to_cuda_from_cuda(cudaStream_t strm, T *dest, const U *src, int src_device, size_t n_elem); \ -template int hamr::copy_to_host_from_cuda(cudaStream_t strm, T *dest, const U *src, size_t n_elem); - -#endif +template int hamr::copy_to_cuda_from_host(cudaStream_t strm, T *dest, const U *src, size_t n_elem, hamr::use_cons_copier_t *); \ +template int hamr::copy_to_cuda_from_cuda(cudaStream_t strm, T *dest, const U *src, size_t n_elem, hamr::use_cons_copier_t *); \ +template int hamr::copy_to_cuda_from_cuda(cudaStream_t strm, T *dest, const U *src, int src_device, size_t n_elem, hamr::use_cons_copier_t *); \ +template int hamr::copy_to_host_from_cuda(cudaStream_t strm, T *dest, const U *src, size_t n_elem, hamr::use_cons_copier_t *); #define hamr_cuda_copy_async_instantiate__(T) \ -template int hamr::copy_to_cuda_from_host(cudaStream_t strm, T *dest, const T *src, size_t n_elem, void *); \ -template int hamr::copy_to_cuda_from_cuda(cudaStream_t strm, T *dest, const T *src, size_t n_elem, void *); \ -template int hamr::copy_to_cuda_from_cuda(cudaStream_t strm, T *dest, const T *src, int src_device, size_t n_elem, void *); \ -template int hamr::copy_to_host_from_cuda(cudaStream_t strm, T *dest, const T *src, size_t n_elem, void *); +template int hamr::copy_to_cuda_from_host(cudaStream_t strm, T *dest, const T *src, size_t n_elem, hamr::use_bytes_copier_t *); \ +template int hamr::copy_to_cuda_from_cuda(cudaStream_t strm, T *dest, const T *src, size_t n_elem, hamr::use_bytes_copier_t *); \ +template int hamr::copy_to_cuda_from_cuda(cudaStream_t strm, T *dest, const T *src, int src_device, size_t n_elem, hamr::use_bytes_copier_t *); \ +template int hamr::copy_to_host_from_cuda(cudaStream_t strm, T *dest, const T *src, size_t n_elem, hamr::use_bytes_copier_t *); \ + hamr_cuda_copy_async_instantiate__(float) hamr_cuda_copy_async_instantiate__(double) diff --git a/hamr_cuda_copy_async.h b/hamr_cuda_copy_async.h index 1cc0ca2..922c3ce 100644 --- a/hamr_cuda_copy_async.h +++ b/hamr_cuda_copy_async.h @@ -2,6 +2,8 @@ #define hamr_cuda_copy_async_h #include "hamr_config.h" +#include "hamr_copier_traits.h" + #include #include @@ -20,8 +22,9 @@ namespace hamr */ template int copy_to_cuda_from_host(cudaStream_t str, T *dest, const U *src, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#else + hamr::use_object_copier_t * = nullptr); +#endif + /** Copies an array to the active CUDA device (fast path for arrays of * arithmetic types of the same type). * @@ -32,10 +35,9 @@ int copy_to_cuda_from_host(cudaStream_t str, T *dest, const U *src, size_t n_ele * * @returns 0 if there were no errors */ -template -int copy_to_cuda_from_host(cudaStream_t str, T *dest, const T *src, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#endif +template +int copy_to_cuda_from_host(cudaStream_t str, T *dest, const U *src, size_t n_elem, + hamr::use_bytes_copier_t * = nullptr); /** Copies an array to the active CUDA device. * @@ -47,11 +49,9 @@ int copy_to_cuda_from_host(cudaStream_t str, T *dest, const T *src, size_t n_ele * @returns 0 if there were no errors */ template -int copy_to_cuda_from_host(cudaStream_t str, T *dest, const U *src, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * = nullptr -#endif - ); +int copy_to_cuda_from_host(cudaStream_t str, T *dest, const U *src, size_t n_elem, + hamr::use_cons_copier_t * = nullptr); + #if !defined(HAMR_ENABLE_OBJECTS) /** Copies an array on the active CUDA device. @@ -65,8 +65,9 @@ int copy_to_cuda_from_host(cudaStream_t str, T *dest, const U *src, size_t n_ele */ template int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const U *src, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#else + hamr::use_object_copier_t * = nullptr); +#endif + /** Ccopies an array on the active CUAD device (fast path for arrays of * arithmetic types of the same type). * @@ -77,10 +78,10 @@ int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const U *src, size_t n_ele * * @returns 0 if there were no errors */ -template -int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const T *src, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#endif +template +int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const U *src, size_t n_elem, + hamr::use_bytes_copier_t * = nullptr); + /** Copies an array on the active CUDA device. * @@ -92,11 +93,8 @@ int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const T *src, size_t n_ele * @returns 0 if there were no errors */ template -int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const U *src, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * = nullptr -#endif - ); +int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const U *src, size_t n_elem, + hamr::use_cons_copier_t * = nullptr); #if !defined(HAMR_ENABLE_OBJECTS) /** Copies an array to the active CUDA device from the named CUDA device, @@ -110,10 +108,10 @@ int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const U *src, size_t n_ele * @returns 0 if there were no errors */ template -int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, - const U *src, int src_device, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#else +int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const U *src, + int src_device, size_t n_elem, hamr::use_object_copier_t * = nullptr); +#endif + /** Copies an array to the active CUDA device from the named CUDA device, (fast * path for arrays of arithmetic types of the same type). * @@ -125,11 +123,9 @@ int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, * * @returns 0 if there were no errors */ -template -int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, - const T *src, int src_device, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#endif +template +int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const U *src, + int src_device, size_t n_elem, hamr::use_bytes_copier_t * = nullptr); /** Copies an array on the active CUDA device. * @@ -142,12 +138,8 @@ int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, * @returns 0 if there were no errors */ template -int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, - const U *src, int src_device, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * = nullptr -#endif - ); +int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const U *src, + int src_device, size_t n_elem, hamr::use_cons_copier_t * = nullptr); #if !defined(HAMR_ENABLE_OBJECTS) /** Copies an array from the active CUDA device. @@ -160,10 +152,10 @@ int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, * @returns 0 if there were no errors */ template -int copy_to_host_from_cuda(cudaStream_t str, T *dest, - const U *src, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#else +int copy_to_host_from_cuda(cudaStream_t str, T *dest, const U *src, size_t n_elem, + hamr::use_object_copier_t * = nullptr); +#endif + /** Copies an array from the active CUDA device (fast path for arrays of * arithmetic types of the same type). * @@ -174,11 +166,9 @@ int copy_to_host_from_cuda(cudaStream_t str, T *dest, * * @returns 0 if there were no errors */ -template -int copy_to_host_from_cuda(cudaStream_t str, T *dest, - const T *src, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#endif +template +int copy_to_host_from_cuda(cudaStream_t str, T *dest, const U *src, size_t n_elem, + hamr::use_bytes_copier_t * = nullptr); /** Copies an array from the active CUDA device. * @@ -188,11 +178,8 @@ int copy_to_host_from_cuda(cudaStream_t str, T *dest, * @returns 0 if there were no errors */ template -int copy_to_host_from_cuda(cudaStream_t str, T *dest, const U *src, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * = nullptr -#endif - ); +int copy_to_host_from_cuda(cudaStream_t str, T *dest, const U *src, size_t n_elem, + hamr::use_cons_copier_t * = nullptr); } diff --git a/hamr_cuda_copy_async_impl.h b/hamr_cuda_copy_async_impl.h index d8de720..b5f0845 100644 --- a/hamr_cuda_copy_async_impl.h +++ b/hamr_cuda_copy_async_impl.h @@ -14,6 +14,7 @@ using cudaStream_t = void; #endif #include "hamr_malloc_allocator.h" +#include "hamr_copier_traits.h" #include #include @@ -27,7 +28,7 @@ namespace hamr // --------------------------------------------------------------------------- template int copy_to_cuda_from_host(cudaStream_t str, T *dest, const U *src, size_t n_elem, - typename std::enable_if::value>::type *) + hamr::use_object_copier_t *) { #if !defined(HAMR_ENABLE_CUDA) (void) str; @@ -48,11 +49,12 @@ int copy_to_cuda_from_host(cudaStream_t str, T *dest, const U *src, size_t n_ele return -1; #endif } -#else +#endif + // --------------------------------------------------------------------------- -template -int copy_to_cuda_from_host(cudaStream_t str, T *dest, const T *src, size_t n_elem, - typename std::enable_if::value>::type *) +template +int copy_to_cuda_from_host(cudaStream_t str, T *dest, const U *src, size_t n_elem, + hamr::use_bytes_copier_t *) { #if !defined(HAMR_ENABLE_CUDA) (void) str; @@ -87,15 +89,12 @@ int copy_to_cuda_from_host(cudaStream_t str, T *dest, const T *src, size_t n_ele return 0; #endif } -#endif + // --------------------------------------------------------------------------- template -int copy_to_cuda_from_host(cudaStream_t str, T *dest, const U *src, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * -#endif - ) +int copy_to_cuda_from_host(cudaStream_t str, T *dest, const U *src, size_t n_elem, + hamr::use_cons_copier_t *) { #if !defined(HAMR_ENABLE_CUDA) (void) str; @@ -163,7 +162,7 @@ int copy_to_cuda_from_host(cudaStream_t str, T *dest, const U *src, size_t n_ele // --------------------------------------------------------------------------- template int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const U *src, size_t n_elem, - typename std::enable_if::value>::type *) + hamr::use_object_copier_t *) { #if !defined(HAMR_ENABLE_CUDA) (void) str; @@ -183,11 +182,12 @@ int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const U *src, size_t n_ele return -1; #endif } -#else +#endif + // --------------------------------------------------------------------------- -template -int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const T *src, size_t n_elem, - typename std::enable_if::value>::type *) +template +int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const U *src, size_t n_elem, + hamr::use_bytes_copier_t *) { #if !defined(HAMR_ENABLE_CUDA) (void) str; @@ -221,15 +221,11 @@ int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const T *src, size_t n_ele return 0; #endif } -#endif // --------------------------------------------------------------------------- template -int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const U *src, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * -#endif - ) +int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const U *src, size_t n_elem, + hamr::use_cons_copier_t *) { #if !defined(HAMR_ENABLE_CUDA) (void) str; @@ -281,9 +277,8 @@ int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const U *src, size_t n_ele #if !defined(HAMR_ENABLE_OBJECTS) // --------------------------------------------------------------------------- template -int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, - const U *src, int src_device, size_t n_elem, - typename std::enable_if::value>::type *) +int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const U *src, + int src_device, size_t n_elem, hamr::use_object_copier_t *) { #if !defined(HAMR_ENABLE_CUDA) (void) str; @@ -305,12 +300,12 @@ int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, return -1; #endif } -#else +#endif + // --------------------------------------------------------------------------- -template -int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, - const T *src, int src_device, size_t n_elem, - typename std::enable_if::value>::type *) +template +int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const U *src, + int src_device, size_t n_elem, hamr::use_bytes_copier_t *) { #if !defined(HAMR_ENABLE_CUDA) (void) str; @@ -362,12 +357,8 @@ int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, // --------------------------------------------------------------------------- template -int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, - const U *src, int src_device, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * -#endif - ) +int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, const U *src, + int src_device, size_t n_elem ,hamr::use_cons_copier_t *) { #if !defined(HAMR_ENABLE_CUDA) (void) str; @@ -466,9 +457,8 @@ int copy_to_cuda_from_cuda(cudaStream_t str, T *dest, #if !defined(HAMR_ENABLE_OBJECTS) // --------------------------------------------------------------------------- template -int copy_to_host_from_cuda(cudaStream_t str, T *dest, - const U *src, size_t n_elem, - typename std::enable_if::value>::type *) +int copy_to_host_from_cuda(cudaStream_t str, T *dest, const U *src, size_t n_elem, + hamr::use_object_copier_t *) { #if !defined(HAMR_ENABLE_CUDA) (void) str; @@ -489,12 +479,12 @@ int copy_to_host_from_cuda(cudaStream_t str, T *dest, return -1; #endif } -#else +#endif + // --------------------------------------------------------------------------- -template -int copy_to_host_from_cuda(cudaStream_t str, T *dest, - const T *src, size_t n_elem, - typename std::enable_if::value>::type *) +template +int copy_to_host_from_cuda(cudaStream_t str, T *dest, const U *src, size_t n_elem, + hamr::use_bytes_copier_t *) { #if !defined(HAMR_ENABLE_CUDA) (void) str; @@ -528,15 +518,11 @@ int copy_to_host_from_cuda(cudaStream_t str, T *dest, return 0; #endif } -#endif // --------------------------------------------------------------------------- template -int copy_to_host_from_cuda(cudaStream_t str, T *dest, const U *src, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * -#endif - ) +int copy_to_host_from_cuda(cudaStream_t str, T *dest, const U *src, size_t n_elem, + hamr::use_cons_copier_t *) { #if !defined(HAMR_ENABLE_CUDA) (void) str; @@ -602,5 +588,3 @@ int copy_to_host_from_cuda(cudaStream_t str, T *dest, const U *src, size_t n_ele } } - -#endif diff --git a/hamr_hip_copy.cxx b/hamr_hip_copy.cxx index ee3d894..06d7da9 100644 --- a/hamr_hip_copy.cxx +++ b/hamr_hip_copy.cxx @@ -1,31 +1,18 @@ #include "hamr_config.h" - #include "hamr_hip_copy.h" #include "hamr_hip_copy_impl.h" -#if !defined(HAMR_ENABLE_OBJECTS) - -#define hamr_hip_copy_instantiate_(T, U) \ -template int hamr::copy_to_hip_from_host(T *dest, const U *src, size_t n_elem, void *); \ -template int hamr::copy_to_hip_from_hip(T *dest, const U *src, size_t n_elem, void *); \ -template int hamr::copy_to_hip_from_hip(T *dest, const U *src, int src_device, size_t n_elem, void *); \ -template int hamr::copy_to_host_from_hip(T *dest, const U *src, size_t n_elem, void *); - -#else - #define hamr_hip_copy_instantiate_(T, U) \ -template int hamr::copy_to_hip_from_host(T *dest, const U *src, size_t n_elem); \ -template int hamr::copy_to_hip_from_hip(T *dest, const U *src, size_t n_elem); \ -template int hamr::copy_to_hip_from_hip(T *dest, const U *src, int src_device, size_t n_elem); \ -template int hamr::copy_to_host_from_hip(T *dest, const U *src, size_t n_elem); - -#endif +template int hamr::copy_to_hip_from_host(T *dest, const U *src, size_t n_elem, hamr::use_cons_copier_t *); \ +template int hamr::copy_to_hip_from_hip(T *dest, const U *src, size_t n_elem, hamr::use_cons_copier_t *); \ +template int hamr::copy_to_hip_from_hip(T *dest, const U *src, int src_device, size_t n_elem, hamr::use_cons_copier_t *); \ +template int hamr::copy_to_host_from_hip(T *dest, const U *src, size_t n_elem, hamr::use_cons_copier_t *); #define hamr_hip_copy_instantiate__(T) \ -template int hamr::copy_to_hip_from_host(T *dest, const T *src, size_t n_elem, void *); \ -template int hamr::copy_to_hip_from_hip(T *dest, const T *src, size_t n_elem, void *); \ -template int hamr::copy_to_hip_from_hip(T *dest, const T *src, int src_device, size_t n_elem, void *); \ -template int hamr::copy_to_host_from_hip(T *dest, const T *src, size_t n_elem, void *); +template int hamr::copy_to_hip_from_host(T *dest, const T *src, size_t n_elem, hamr::use_bytes_copier_t *); \ +template int hamr::copy_to_hip_from_hip(T *dest, const T *src, size_t n_elem, hamr::use_bytes_copier_t *); \ +template int hamr::copy_to_hip_from_hip(T *dest, const T *src, int src_device, size_t n_elem, hamr::use_bytes_copier_t *); \ +template int hamr::copy_to_host_from_hip(T *dest, const T *src, size_t n_elem, hamr::use_bytes_copier_t *); \ hamr_hip_copy_instantiate__(float) hamr_hip_copy_instantiate__(double) diff --git a/hamr_hip_copy.h b/hamr_hip_copy.h index 2089c00..61bf7c6 100644 --- a/hamr_hip_copy.h +++ b/hamr_hip_copy.h @@ -3,7 +3,7 @@ #include "hamr_config.h" #include "hamr_env.h" -#include +#include "hamr_copier_traits.h" /// heterogeneous accelerator memory resource namespace hamr @@ -18,8 +18,9 @@ namespace hamr */ template int copy_to_hip_from_host(T *dest, const U *src, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#else + hamr::use_object_copier_t * = nullptr); +#endif + /** Copies an array to the active HIP device (fast path for arrays of * arithmetic types of the same type). * @@ -28,10 +29,9 @@ int copy_to_hip_from_host(T *dest, const U *src, size_t n_elem, * @param[in] n_elem the number of elements in the array * @returns 0 if there were no errors */ -template -int copy_to_hip_from_host(T *dest, const T *src, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#endif +template +int copy_to_hip_from_host(T *dest, const U *src, size_t n_elem, + hamr::use_bytes_copier_t * = nullptr); /** Copies an array to the active HIP device. * @@ -41,11 +41,8 @@ int copy_to_hip_from_host(T *dest, const T *src, size_t n_elem, * @returns 0 if there were no errors */ template -int copy_to_hip_from_host(T *dest, const U *src, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * = nullptr -#endif - ); +int copy_to_hip_from_host(T *dest, const U *src, size_t n_elem, + hamr::use_cons_copier_t * = nullptr); #if !defined(HAMR_ENABLE_OBJECTS) /** Copies an array on the active HIP device. @@ -57,8 +54,9 @@ int copy_to_hip_from_host(T *dest, const U *src, size_t n_elem */ template int copy_to_hip_from_hip(T *dest, const U *src, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#else + hamr::use_object_copier_t * = nullptr); +#endif + /** Ccopies an array on the active HIP device (fast path for arrays of * arithmetic types of the same type). * @@ -67,10 +65,9 @@ int copy_to_hip_from_hip(T *dest, const U *src, size_t n_elem, * @param[in] n_elem the number of elements in the array * @returns 0 if there were no errors */ -template -int copy_to_hip_from_hip(T *dest, const T *src, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#endif +template +int copy_to_hip_from_hip(T *dest, const U *src, size_t n_elem, + hamr::use_bytes_copier_t * = nullptr); /** Copies an array on the active HIP device. * @@ -80,11 +77,8 @@ int copy_to_hip_from_hip(T *dest, const T *src, size_t n_elem, * @returns 0 if there were no errors */ template -int copy_to_hip_from_hip(T *dest, const U *src, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * = nullptr -#endif - ); +int copy_to_hip_from_hip(T *dest, const U *src, size_t n_elem, + hamr::use_cons_copier_t * = nullptr); #if !defined(HAMR_ENABLE_OBJECTS) /** Copies an array to the active HIP device from the named HIP device, @@ -97,8 +91,9 @@ int copy_to_hip_from_hip(T *dest, const U *src, size_t n_elem */ template int copy_to_hip_from_hip(T *dest, const U *src, int src_device, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#else + hamr::use_object_copier_t * = nullptr); +#endif + /** Copies an array to the active HIP device from the named HIP device, (fast * path for arrays of arithmetic types of the same type). * @@ -108,10 +103,9 @@ int copy_to_hip_from_hip(T *dest, const U *src, int src_device, size_t n_elem, * @param[in] n_elem the number of elements in the array * @returns 0 if there were no errors */ -template -int copy_to_hip_from_hip(T *dest, const T *src, int src_device, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#endif +template +int copy_to_hip_from_hip(T *dest, const U *src, int src_device, size_t n_elem, + hamr::use_bytes_copier_t * = nullptr); /** Copies an array on the active HIP device. * @@ -122,11 +116,8 @@ int copy_to_hip_from_hip(T *dest, const T *src, int src_device, size_t n_elem, * @returns 0 if there were no errors */ template -int copy_to_hip_from_hip(T *dest, const U *src, int src_device, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * = nullptr -#endif - ); +int copy_to_hip_from_hip(T *dest, const U *src, int src_device, size_t n_elem, + hamr::use_cons_copier_t * = nullptr); #if !defined(HAMR_ENABLE_OBJECTS) /** Copies an array from the active HIP device. @@ -138,8 +129,9 @@ int copy_to_hip_from_hip(T *dest, const U *src, int src_device, size_t n_elem */ template int copy_to_host_from_hip(T *dest, const U *src, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#else + hamr::use_object_copier_t * = nullptr); +#endif + /** Copies an array from the active HIP device (fast path for arrays of * arithmetic types of the same type). * @@ -148,10 +140,9 @@ int copy_to_host_from_hip(T *dest, const U *src, size_t n_elem, * @param[in] n_elem the number of elements in the array * @returns 0 if there were no errors */ -template -int copy_to_host_from_hip(T *dest, const T *src, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#endif +template +int copy_to_host_from_hip(T *dest, const U *src, size_t n_elem, + hamr::use_bytes_copier_t * = nullptr); /** Copies an array from the active HIP device. * @@ -161,11 +152,8 @@ int copy_to_host_from_hip(T *dest, const T *src, size_t n_elem, * @returns 0 if there were no errors */ template -int copy_to_host_from_hip(T *dest, const U *src, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * = nullptr -#endif - ); +int copy_to_host_from_hip(T *dest, const U *src, size_t n_elem, + hamr::use_cons_copier_t * = nullptr); } diff --git a/hamr_hip_copy_impl.h b/hamr_hip_copy_impl.h index ca440d7..cfdc7b4 100644 --- a/hamr_hip_copy_impl.h +++ b/hamr_hip_copy_impl.h @@ -9,6 +9,7 @@ #include "hamr_hip_malloc_allocator.h" #endif #include "hamr_malloc_allocator.h" +#include "hamr_copier_traits.h" #include #include @@ -21,7 +22,7 @@ namespace hamr #if !defined(HAMR_ENABLE_OBJECTS) template int copy_to_hip_from_host(T *dest, const U *src, size_t n_elem, - typename std::enable_if::value>::type *) + hamr::use_object_copier_t *) { #if !defined(HAMR_ENABLE_HIP) (void) dest; @@ -40,10 +41,11 @@ int copy_to_hip_from_host(T *dest, const U *src, size_t n_elem, return -1; #endif } -#else -template -int copy_to_hip_from_host(T *dest, const T *src, size_t n_elem, - typename std::enable_if::value>::type *) +#endif + +template +int copy_to_hip_from_host(T *dest, const U *src, size_t n_elem, + hamr::use_bytes_copier_t *) { #if !defined(HAMR_ENABLE_HIP) (void) dest; @@ -75,14 +77,10 @@ int copy_to_hip_from_host(T *dest, const T *src, size_t n_elem, return 0; #endif } -#endif template -int copy_to_hip_from_host(T *dest, const U *src, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * -#endif - ) +int copy_to_hip_from_host(T *dest, const U *src, size_t n_elem, + hamr::use_cons_copier_t *) { #if !defined(HAMR_ENABLE_HIP) (void) dest; @@ -148,7 +146,7 @@ int copy_to_hip_from_host(T *dest, const U *src, size_t n_elem #if !defined(HAMR_ENABLE_OBJECTS) template int copy_to_hip_from_hip(T *dest, const U *src, size_t n_elem, - typename std::enable_if::value>::type *) + hamr::use_object_copier_t *) { #if !defined(HAMR_ENABLE_HIP) (void) dest; @@ -166,10 +164,11 @@ int copy_to_hip_from_hip(T *dest, const U *src, size_t n_elem, return -1; #endif } -#else -template -int copy_to_hip_from_hip(T *dest, const T *src, size_t n_elem, - typename std::enable_if::value>::type *) +#endif + +template +int copy_to_hip_from_hip(T *dest, const U *src, size_t n_elem, + hamr::use_bytes_copier_t *) { #if !defined(HAMR_ENABLE_HIP) (void) dest; @@ -201,14 +200,10 @@ int copy_to_hip_from_hip(T *dest, const T *src, size_t n_elem, return 0; #endif } -#endif template -int copy_to_hip_from_hip(T *dest, const U *src, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * -#endif - ) +int copy_to_hip_from_hip(T *dest, const U *src, size_t n_elem, + hamr::use_cons_copier_t *) { #if !defined(HAMR_ENABLE_HIP) (void) dest; @@ -259,7 +254,7 @@ int copy_to_hip_from_hip(T *dest, const U *src, size_t n_elem #if !defined(HAMR_ENABLE_OBJECTS) template int copy_to_hip_from_hip(T *dest, const U *src, int src_device, size_t n_elem, - typename std::enable_if::value>::type *) + hamr::use_object_copier_t *) { #if !defined(HAMR_ENABLE_HIP) (void) dest; @@ -279,10 +274,11 @@ int copy_to_hip_from_hip(T *dest, const U *src, int src_device, size_t n_elem, return -1; #endif } -#else -template -int copy_to_hip_from_hip(T *dest, const T *src, int src_device, size_t n_elem, - typename std::enable_if::value>::type *) +#endif + +template +int copy_to_hip_from_hip(T *dest, const U *src, int src_device, size_t n_elem, + hamr::use_bytes_copier_t *) { #if !defined(HAMR_ENABLE_HIP) (void) dest; @@ -327,14 +323,10 @@ int copy_to_hip_from_hip(T *dest, const T *src, int src_device, size_t n_elem, return 0; #endif } -#endif template -int copy_to_hip_from_hip(T *dest, const U *src, int src_device, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * -#endif - ) +int copy_to_hip_from_hip(T *dest, const U *src, int src_device, size_t n_elem, + hamr::use_cons_copier_t *) { #if !defined(HAMR_ENABLE_HIP) (void) dest; @@ -433,10 +425,12 @@ int copy_to_hip_from_hip(T *dest, const U *src, int src_device, size_t n_elem #endif } + + #if !defined(HAMR_ENABLE_OBJECTS) template int copy_to_host_from_hip(T *dest, const U *src, size_t n_elem, - typename std::enable_if::value>::type *) + hamr::use_object_copier_t *) { #if !defined(HAMR_ENABLE_HIP) (void) dest; @@ -455,10 +449,11 @@ int copy_to_host_from_hip(T *dest, const U *src, size_t n_elem, return -1; #endif } -#else -template -int copy_to_host_from_hip(T *dest, const T *src, size_t n_elem, - typename std::enable_if::value>::type *) +#endif + +template +int copy_to_host_from_hip(T *dest, const U *src, size_t n_elem, + hamr::use_bytes_copier_t *) { #if !defined(HAMR_ENABLE_HIP) (void) dest; @@ -493,11 +488,8 @@ int copy_to_host_from_hip(T *dest, const T *src, size_t n_elem, #endif template -int copy_to_host_from_hip(T *dest, const U *src, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * -#endif - ) +int copy_to_host_from_hip(T *dest, const U *src, size_t n_elem, + hamr::use_cons_copier_t *) { #if !defined(HAMR_ENABLE_HIP) (void) dest; @@ -563,6 +555,3 @@ int copy_to_host_from_hip(T *dest, const U *src, size_t n_elem } } - -#endif - diff --git a/hamr_openmp_copy.cxx b/hamr_openmp_copy.cxx index f862238..777052e 100644 --- a/hamr_openmp_copy.cxx +++ b/hamr_openmp_copy.cxx @@ -1,32 +1,18 @@ #include "hamr_config.h" - #include "hamr_openmp_copy.h" #include "hamr_openmp_copy_impl.h" -#if !defined(HAMR_ENABLE_OBJECTS) - #define hamr_openmp_copy_instantiate_(T, U) \ -template int hamr::copy_to_openmp_from_host(T *dest, const U *src, size_t n_elem, void *); \ -template int hamr::copy_to_openmp_from_openmp(T *dest, const U *src, size_t n_elem, void *); \ -template int hamr::copy_to_openmp_from_openmp(T *dest, const U *src, int src_device, size_t n_elem, void *); \ -template int hamr::copy_to_host_from_openmp(T *dest, const U *src, size_t n_elem, void *); - -#else - -#define hamr_openmp_copy_instantiate_(T, U) \ -template int hamr::copy_to_openmp_from_host(T *dest, const U *src, size_t n_elem); \ -template int hamr::copy_to_openmp_from_openmp(T *dest, const U *src, size_t n_elem); \ -template int hamr::copy_to_openmp_from_openmp(T *dest, const U *src, int src_device, size_t n_elem); \ -template int hamr::copy_to_host_from_openmp(T *dest, const U *src, size_t n_elem); - -#endif +template int hamr::copy_to_openmp_from_host(T *dest, const U *src, size_t n_elem, hamr::use_cons_copier_t *); \ +template int hamr::copy_to_openmp_from_openmp(T *dest, const U *src, size_t n_elem, hamr::use_cons_copier_t *); \ +template int hamr::copy_to_openmp_from_openmp(T *dest, const U *src, int src_device, size_t n_elem, hamr::use_cons_copier_t *); \ +template int hamr::copy_to_host_from_openmp(T *dest, const U *src, size_t n_elem, hamr::use_cons_copier_t *); #define hamr_openmp_copy_instantiate__(T) \ -template int hamr::copy_to_openmp_from_host(T *dest, const T *src, size_t n_elem, void *); \ -template int hamr::copy_to_openmp_from_openmp(T *dest, const T *src, size_t n_elem, void *); \ -template int hamr::copy_to_openmp_from_openmp(T *dest, const T *src, int src_device, size_t n_elem, void *); \ -template int hamr::copy_to_host_from_openmp(T *dest, const T *src, size_t n_elem, void *); - +template int hamr::copy_to_openmp_from_host(T *dest, const T *src, size_t n_elem, hamr::use_bytes_copier_t *); \ +template int hamr::copy_to_openmp_from_openmp(T *dest, const T *src, size_t n_elem, hamr::use_bytes_copier_t *); \ +template int hamr::copy_to_openmp_from_openmp(T *dest, const T *src, int src_device, size_t n_elem, hamr::use_bytes_copier_t *); \ +template int hamr::copy_to_host_from_openmp(T *dest, const T *src, size_t n_elem, hamr::use_bytes_copier_t *); \ hamr_openmp_copy_instantiate__(float) hamr_openmp_copy_instantiate__(double) diff --git a/hamr_openmp_copy.h b/hamr_openmp_copy.h index b6713d9..d6df731 100644 --- a/hamr_openmp_copy.h +++ b/hamr_openmp_copy.h @@ -2,7 +2,8 @@ #define hamr_openmp_copy_h #include "hamr_config.h" -#include +#include "hamr_copier_traits.h" +#include /// heterogeneous accelerator memory resource namespace hamr @@ -17,8 +18,9 @@ namespace hamr */ template int copy_to_openmp_from_host(T *dest, const U *src, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#else + hamr::use_object_copier_t * = nullptr); +#endif + /** Copies an array to the active OpenMP device (fast path for arrays of * arithmetic types of the same type). * @@ -27,10 +29,9 @@ int copy_to_openmp_from_host(T *dest, const U *src, size_t n_elem, * @param[in] n_elem the number of elements in the array * @returns 0 if there were no errors */ -template -int copy_to_openmp_from_host(T *dest, const T *src, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#endif +template +int copy_to_openmp_from_host(T *dest, const U *src, size_t n_elem, + hamr::use_bytes_copier_t * = nullptr); /** Copies an array to the active OpenMP device. * @@ -40,11 +41,8 @@ int copy_to_openmp_from_host(T *dest, const T *src, size_t n_elem, * @returns 0 if there were no errors */ template -int copy_to_openmp_from_host(T *dest, const U *src, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * = nullptr -#endif - ); +int copy_to_openmp_from_host(T *dest, const U *src, size_t n_elem, + hamr::use_cons_copier_t * = nullptr); #if !defined(HAMR_ENABLE_OBJECTS) /** Copies an array on the active OpenMP device. @@ -56,8 +54,9 @@ int copy_to_openmp_from_host(T *dest, const U *src, size_t n_elem */ template int copy_to_openmp_from_openmp(T *dest, const U *src, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#else + hamr::use_object_copier_t * = nullptr); +#endif + /** Copies an array on the active OpenMP device (fast path for arrays of * arithmetic types of the same type). * @@ -66,10 +65,9 @@ int copy_to_openmp_from_openmp(T *dest, const U *src, size_t n_elem, * @param[in] n_elem the number of elements in the array * @returns 0 if there were no errors */ -template -int copy_to_openmp_from_openmp(T *dest, const T *src, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#endif +template +int copy_to_openmp_from_openmp(T *dest, const U *src, size_t n_elem, + hamr::use_bytes_copier_t * = nullptr); /** Copies an array on the active OpenMP device. * @@ -79,11 +77,8 @@ int copy_to_openmp_from_openmp(T *dest, const T *src, size_t n_elem, * @returns 0 if there were no errors */ template -int copy_to_openmp_from_openmp(T *dest, const U *src, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * = nullptr -#endif - ); +int copy_to_openmp_from_openmp(T *dest, const U *src, size_t n_elem, + hamr::use_cons_copier_t * = nullptr); #if !defined(HAMR_ENABLE_OBJECTS) /** Copies an array to the active OpenMP device from the named OpenMP device, @@ -96,8 +91,9 @@ int copy_to_openmp_from_openmp(T *dest, const U *src, size_t n_elem */ template int copy_to_openmp_from_openmp(T *dest, const U *src, int src_device, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#else + hamr::use_object_copier_t * = nullptr); +#endif + /** Copies an array to the active OpenMP device from the named OpenMP device, * (fast path for arrays of arithmetic types of the same type). * @@ -107,10 +103,9 @@ int copy_to_openmp_from_openmp(T *dest, const U *src, int src_device, size_t n_e * @param[in] n_elem the number of elements in the array * @returns 0 if there were no errors */ -template -int copy_to_openmp_from_openmp(T *dest, const T *src, int src_device, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#endif +template +int copy_to_openmp_from_openmp(T *dest, const U *src, int src_device, size_t n_elem, + hamr::use_bytes_copier_t * = nullptr); /** Copies an array on the active OpenMP device. * @@ -121,11 +116,8 @@ int copy_to_openmp_from_openmp(T *dest, const T *src, int src_device, size_t n_e * @returns 0 if there were no errors */ template -int copy_to_openmp_from_openmp(T *dest, const U *src, int src_device, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * = nullptr -#endif - ); +int copy_to_openmp_from_openmp(T *dest, const U *src, int src_device, size_t n_elem, + hamr::use_cons_copier_t * = nullptr); #if !defined(HAMR_ENABLE_OBJECTS) /** Copies an array from the active OpenMP device. @@ -137,8 +129,9 @@ int copy_to_openmp_from_openmp(T *dest, const U *src, int src_device, size_t n_e */ template int copy_to_host_from_openmp(T *dest, const U *src, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#else + hamr::use_object_copier_t * = nullptr); +#endif + /** Copies an array from the active OpenMP device (fast path for arrays of * arithmetic types of the same type). * @@ -147,10 +140,9 @@ int copy_to_host_from_openmp(T *dest, const U *src, size_t n_elem, * @param[in] n_elem the number of elements in the array * @returns 0 if there were no errors */ -template -int copy_to_host_from_openmp(T *dest, const T *src, size_t n_elem, - typename std::enable_if::value>::type * = nullptr); -#endif +template +int copy_to_host_from_openmp(T *dest, const U *src, size_t n_elem, + hamr::use_bytes_copier_t * = nullptr); /** Copies an array from the active OpenMP device. * @@ -160,11 +152,8 @@ int copy_to_host_from_openmp(T *dest, const T *src, size_t n_elem, * @returns 0 if there were no errors */ template -int copy_to_host_from_openmp(T *dest, const U *src, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * = nullptr -#endif - ); +int copy_to_host_from_openmp(T *dest, const U *src, size_t n_elem, + hamr::use_cons_copier_t * = nullptr); } diff --git a/hamr_openmp_copy_impl.h b/hamr_openmp_copy_impl.h index 1d949fd..b626427 100644 --- a/hamr_openmp_copy_impl.h +++ b/hamr_openmp_copy_impl.h @@ -8,6 +8,7 @@ #include #endif #include "hamr_malloc_allocator.h" +#include "hamr_copier_traits.h" #include #include @@ -21,7 +22,7 @@ namespace hamr // --------------------------------------------------------------------------- template int copy_to_openmp_from_host(T *dest, const U *src, size_t n_elem, - typename std::enable_if::value>::type * ) + hamr::use_object_copier_t *) { #if !defined(HAMR_ENABLE_OPENMP) (void) dest; @@ -40,11 +41,12 @@ int copy_to_openmp_from_host(T *dest, const U *src, size_t n_elem, return -1; #endif } -#else +#endif + // --------------------------------------------------------------------------- -template -int copy_to_openmp_from_host(T *dest, const T *src, size_t n_elem, - typename std::enable_if::value>::type * ) +template +int copy_to_openmp_from_host(T *dest, const U *src, size_t n_elem, + hamr::use_bytes_copier_t *) { #if !defined(HAMR_ENABLE_OPENMP) (void) dest; @@ -80,15 +82,11 @@ int copy_to_openmp_from_host(T *dest, const T *src, size_t n_elem, return 0; #endif } -#endif // --------------------------------------------------------------------------- template -int copy_to_openmp_from_host(T *dest, const U *src, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * -#endif - ) +int copy_to_openmp_from_host(T *dest, const U *src, size_t n_elem, + hamr::use_cons_copier_t *) { #if !defined(HAMR_ENABLE_OPENMP) (void) dest; @@ -144,7 +142,7 @@ int copy_to_openmp_from_host(T *dest, const U *src, size_t n_elem // --------------------------------------------------------------------------- template int copy_to_openmp_from_openmp(T *dest, const U *src, size_t n_elem, - typename std::enable_if::value>::type * ) + hamr::use_object_copier_t *) { #if !defined(HAMR_ENABLE_OPENMP) (void) dest; @@ -162,11 +160,12 @@ int copy_to_openmp_from_openmp(T *dest, const U *src, size_t n_elem, return -1; #endif } -#else +#endif + // --------------------------------------------------------------------------- -template -int copy_to_openmp_from_openmp(T *dest, const T *src, size_t n_elem, - typename std::enable_if::value>::type * ) +template +int copy_to_openmp_from_openmp(T *dest, const U *src, size_t n_elem, + hamr::use_bytes_copier_t *) { #if !defined(HAMR_ENABLE_OPENMP) (void) dest; @@ -201,15 +200,11 @@ int copy_to_openmp_from_openmp(T *dest, const T *src, size_t n_elem, return 0; #endif } -#endif // --------------------------------------------------------------------------- template -int copy_to_openmp_from_openmp(T *dest, const U *src, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * -#endif - ) +int copy_to_openmp_from_openmp(T *dest, const U *src, size_t n_elem, + hamr::use_cons_copier_t *) { #if !defined(HAMR_ENABLE_OPENMP) (void) dest; @@ -245,7 +240,7 @@ int copy_to_openmp_from_openmp(T *dest, const U *src, size_t n_elem // --------------------------------------------------------------------------- template int copy_to_openmp_from_openmp(T *dest, const U *src, int src_device, size_t n_elem, - typename std::enable_if::value>::type * ) + hamr::use_object_copier_t *) { #if !defined(HAMR_ENABLE_OPENMP) (void) dest; @@ -265,11 +260,12 @@ int copy_to_openmp_from_openmp(T *dest, const U *src, int src_device, size_t n_e return -1; #endif } -#else +#endif + // --------------------------------------------------------------------------- -template -int copy_to_openmp_from_openmp(T *dest, const T *src, int src_device, size_t n_elem, - typename std::enable_if::value>::type * ) +template +int copy_to_openmp_from_openmp(T *dest, const U *src, int src_device, size_t n_elem, + hamr::use_bytes_copier_t *) { #if !defined(HAMR_ENABLE_OPENMP) (void) dest; @@ -303,15 +299,11 @@ int copy_to_openmp_from_openmp(T *dest, const T *src, int src_device, size_t n_e return 0; #endif } -#endif // --------------------------------------------------------------------------- template -int copy_to_openmp_from_openmp(T *dest, const U *src, int src_device, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * -#endif - ) +int copy_to_openmp_from_openmp(T *dest, const U *src, int src_device, size_t n_elem, + hamr::use_cons_copier_t *) { #if !defined(HAMR_ENABLE_OPENMP) (void) dest; @@ -363,7 +355,7 @@ int copy_to_openmp_from_openmp(T *dest, const U *src, int src_device, size_t n_e // --------------------------------------------------------------------------- template int copy_to_host_from_openmp(T *dest, const U *src, size_t n_elem, - typename std::enable_if::value>::type * ) + hamr::use_object_copier_t *) { #if !defined(HAMR_ENABLE_OPENMP) (void) dest; @@ -382,11 +374,12 @@ int copy_to_host_from_openmp(T *dest, const U *src, size_t n_elem, return -1; #endif } -#else +#endif + // --------------------------------------------------------------------------- -template -int copy_to_host_from_openmp(T *dest, const T *src, size_t n_elem, - typename std::enable_if::value>::type * ) +template +int copy_to_host_from_openmp(T *dest, const U *src, size_t n_elem, + hamr::use_bytes_copier_t *) { #if !defined(HAMR_ENABLE_OPENMP) (void) dest; @@ -422,15 +415,11 @@ int copy_to_host_from_openmp(T *dest, const T *src, size_t n_elem, return 0; #endif } -#endif // --------------------------------------------------------------------------- template -int copy_to_host_from_openmp(T *dest, const U *src, size_t n_elem -#if !defined(HAMR_ENABLE_OBJECTS) - ,typename std::enable_if::value>::type * -#endif - ) +int copy_to_host_from_openmp(T *dest, const U *src, size_t n_elem, + hamr::use_cons_copier_t *) { #if !defined(HAMR_ENABLE_OPENMP) (void) dest; diff --git a/hamr_openmp_print.h b/hamr_openmp_print.h index f11606d..56d3bf6 100644 --- a/hamr_openmp_print.h +++ b/hamr_openmp_print.h @@ -2,6 +2,7 @@ #define hamr_openmp_print_impl_h #include "hamr_config.h" +#include /// heterogeneous accelerator memory resource namespace hamr