From b11b3464adce4d160c247aa27eae8dc8f63387df Mon Sep 17 00:00:00 2001 From: Georgi Mirazchiyski Date: Thu, 7 Mar 2024 11:37:28 +0000 Subject: [PATCH 1/2] [HIP] Enable acq_rel and seq_cst memory order capabilities for atomics on HIP --- source/adapters/hip/device.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index 0e9b50f94e..de83c9421a 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -779,7 +779,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, ur_memory_order_capability_flags_t Capabilities = UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED | UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | - UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE; + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL; +#if __HIP_PLATFORM_NVIDIA__ + // Nvidia introduced fence.sc for seq_cst only since SM 7.0. + int Major = 0; + UR_CHECK_ERROR(hipDeviceGetAttribute( + &Major, hipDeviceAttributeComputeCapabilityMajor, hDevice->get())); + if (Major >= 7) + Capabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; +#else + Capabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; +#endif return ReturnValue(Capabilities); } case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { From 08b19b224c0be9cdd2073de080a3e654600007f1 Mon Sep 17 00:00:00 2001 From: Georgi Mirazchiyski Date: Thu, 7 Mar 2024 11:41:05 +0000 Subject: [PATCH 2/2] [HIP] Enable device and system memory scope for atomics on HIP --- source/adapters/hip/device.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index de83c9421a..813fd08ac4 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -794,15 +794,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(Capabilities); } case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { - // SYCL2020 4.6.4.2 minimum mandated capabilities for - // atomic_fence/memory_scope_capabilities. - // Because scopes are hierarchical, wider scopes support all narrower - // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and - // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382) ur_memory_scope_capability_flags_t Capabilities = UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP | - UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE; +#if __HIP_PLATFORM_NVIDIA__ + // Nvidia introduced system scope atomics only since SM 6.0. + int Major = 0; + UR_CHECK_ERROR(hipDeviceGetAttribute( + &Major, hipDeviceAttributeComputeCapabilityMajor, hDevice->get())); + if (Major >= 6) + Capabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; +#else + Capabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; +#endif return ReturnValue(Capabilities); } case UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: {