diff --git a/HeterogeneousCore/CUDACore/interface/CUDAESProduct.h b/HeterogeneousCore/CUDACore/interface/CUDAESProduct.h deleted file mode 100644 index 36c17721249aa..0000000000000 --- a/HeterogeneousCore/CUDACore/interface/CUDAESProduct.h +++ /dev/null @@ -1,93 +0,0 @@ -#ifndef HeterogeneousCore_CUDACore_CUDAESProduct_h -#define HeterogeneousCore_CUDACore_CUDAESProduct_h - -#include -#include - -#include - -#include "FWCore/Concurrency/interface/hardware_pause.h" -#include "FWCore/ServiceRegistry/interface/Service.h" -#include "FWCore/Utilities/interface/thread_safety_macros.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" -#include "HeterogeneousCore/CUDAServices/interface/numberOfCUDADevices.h" - -template -class CUDAESProduct { -public: - CUDAESProduct(): gpuDataPerDevice_(numberOfCUDADevices()) {} - ~CUDAESProduct() = default; - - // transferAsync should be a function of (T&, cuda::stream_t<>&) - // which enqueues asynchronous transfers (possibly kernels as well) - // to the CUDA stream - template - const T& dataForCurrentDeviceAsync(cuda::stream_t<>& cudaStream, F transferAsync) const { - edm::Service cs; - auto device = cs->getCurrentDevice(); - - auto& data = gpuDataPerDevice_[device]; - if(data.m_filled.load()) { - // GPU data has already been filled, so can return it immediately - return data.m_data; - } - - - bool expected = false; - if(data.m_filling.compare_exchange_strong(expected, true)) { - // so nobody else was filling - // then check if it got filled in the mean time - if(data.m_filled.load()) { - // someone else finished the filling in the meantime - data.m_filling.store(false); - return data.m_data; - } - - // now we can be sure that the data is not yet on the GPU, and - // this thread is the first one to try that - try { - transferAsync(data.m_data, cudaStream); - - cudaStream.enqueue.callback([&filling = data.m_filling, - &filled = data.m_filled] - (cuda::stream::id_t streamId, cuda::status_t status) mutable { - // TODO: check status and throw if fails - auto should_be_false = filled.exchange(true); - assert(!should_be_false); - auto should_be_true = filling.exchange(false); - assert(should_be_true); - }); - } catch(...) { - // release the filling state and propagate exception - auto should_be_true = data.m_filling.exchange(false); - assert(should_be_true); - throw std::current_exception(); - } - - // Now the filling has been enqueued to the cudaStream, so we - // can return the GPU data immediately, since all subsequent - // work must be either enqueued to the cudaStream, or the cudaStream - // must be synchronized by the caller - return data.m_data; - } - - // can we do better than just spin on the atomic while waiting another thread to finish the filling? - while(data.m_filling.load()) { - hardware_pause(); - } - assert(data.m_filled.load()); - - return data.m_data; - } - -private: - struct Item { - mutable std::atomic m_filling = false; // true if some thread is already filling - mutable std::atomic m_filled = false; // easy check if data has been filled already or not - CMS_THREAD_GUARD(m_filling) mutable T m_data; - }; - - std::vector gpuDataPerDevice_; -}; - -#endif