From c09dbef939017639d055e12e472535e5ccc60f8f Mon Sep 17 00:00:00 2001 From: Georgi Mirazchiyski Date: Tue, 5 Mar 2024 18:02:16 +0000 Subject: [PATCH 1/4] [HIP] Enable seq_cst memory ordering for atomic fences --- source/adapters/hip/device.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index eac42d3039..43d753413f 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -806,6 +806,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE | UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL; +#if __HIP_PLATFORM_NVIDIA__ + // Nvidia introduced fence.sc for seq_cst only since SM 7.0. + int Major = 0; + UR_CHECK_ERROR(hipDeviceGetAttribute( + &Major, hipDeviceAttributeComputeCapabilityMajor, hDevice->get())); + if (Major >= 7) + Capabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; +#else + Capabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; +#endif return ReturnValue(Capabilities); } case UR_DEVICE_INFO_DEVICE_ID: { From 2c9c4073b1dd293d21b2601ea052ff312fe4e76c Mon Sep 17 00:00:00 2001 From: Georgi Mirazchiyski Date: Thu, 7 Mar 2024 11:49:31 +0000 Subject: [PATCH 2/4] [HIP] Enable device and system memory scopes for atomic fences --- source/adapters/hip/device.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index 43d753413f..f5d3c3ea43 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -785,8 +785,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE; return ReturnValue(Capabilities); } - case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: - case UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { // SYCL2020 4.6.4.2 minimum mandated capabilities for // atomic_fence/memory_scope_capabilities. // Because scopes are hierarchical, wider scopes support all narrower @@ -798,6 +797,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; return ReturnValue(Capabilities); } + case UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + uint64_t Capabilities = UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; + return ReturnValue(Capabilities); + } case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { // SYCL2020 4.6.4.2 minimum mandated capabilities for // atomic_fence_order_capabilities. From 9f6856a7b8070abf10157c771066b48c096a5e49 Mon Sep 17 00:00:00 2001 From: Georgi Mirazchiyski Date: Tue, 30 Apr 2024 11:11:05 +0100 Subject: [PATCH 3/4] Change platform macro definition check ifdef from if Co-authored-by: Hugh Delaney --- source/adapters/hip/device.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index f5d3c3ea43..0a69231f8b 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -813,7 +813,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE | UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL; -#if __HIP_PLATFORM_NVIDIA__ +#ifdef __HIP_PLATFORM_NVIDIA__ // Nvidia introduced fence.sc for seq_cst only since SM 7.0. int Major = 0; UR_CHECK_ERROR(hipDeviceGetAttribute( From 3e011c7000b1c833a6843fa08f9257fc82045d7c Mon Sep 17 00:00:00 2001 From: Georgi Mirazchiyski Date: Fri, 7 Jun 2024 13:05:37 +0100 Subject: [PATCH 4/4] Fix query return type --- source/adapters/hip/device.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index 0a69231f8b..b3ef15c570 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -798,11 +798,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(Capabilities); } case UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { - uint64_t Capabilities = UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | - UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP | - UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP | - UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE | - UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; + constexpr ur_memory_scope_capability_flags_t Capabilities = + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; return ReturnValue(Capabilities); } case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: {