Skip to content
This repository has been archived by the owner on Jul 1, 2023. It is now read-only.

[ROCm] Changes to enable build for ROCm platform #401

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@
[submodule "third_party/libnop"]
path = third_party/libnop
url = https://github.com/google/libnop.git
[submodule "third_party/hipify"]
path = third_party/hipify
url = https://github.com/ROCmSoftwarePlatform/hipify-torch.git
14 changes: 13 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ project(tensorpipe LANGUAGES C CXX)

set(CMAKE_CXX_STANDARD 14)

list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
# ROCm related
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" "${PROJECT_SOURCE_DIR}/third_party/hipify/cmake")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@beauby Added a comment. Should we see if we can guard it with TP_USE_ROCM as well?


# Expose build options.
include(Options)
Expand All @@ -21,6 +22,17 @@ include(Sanitize)
# Misc checks to cope with various compiler modes.
include(MiscCheck)

# ROCm related
if (TP_USE_ROCM)
include(Hip)
if(TP_HAVE_HIP)
include(Hipify)
hipify(CUDA_SOURCE_DIR ${PROJECT_SOURCE_DIR})
else()
message(FATAL_ERROR "Not able to find HIP installation, so cant compile with ROCm support.")
endif()
endif()

add_subdirectory(tensorpipe)

install(EXPORT TensorpipeTargets
Expand Down
162 changes: 162 additions & 0 deletions cmake/Hip.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
set(TP_HAVE_HIP FALSE)

IF(NOT DEFINED ENV{ROCM_PATH})
SET(ROCM_PATH /opt/rocm)
ELSE()
SET(ROCM_PATH $ENV{ROCM_PATH})
ENDIF()

# HIP_PATH
IF(NOT DEFINED ENV{HIP_PATH})
SET(HIP_PATH ${ROCM_PATH}/hip)
ELSE()
SET(HIP_PATH $ENV{HIP_PATH})
ENDIF()

IF(NOT EXISTS ${HIP_PATH})
return()
ENDIF()

# HCC_PATH
IF(NOT DEFINED ENV{HCC_PATH})
SET(HCC_PATH ${ROCM_PATH}/hcc)
ELSE()
SET(HCC_PATH $ENV{HCC_PATH})
ENDIF()

# HSA_PATH
IF(NOT DEFINED ENV{HSA_PATH})
SET(HSA_PATH ${ROCM_PATH}/hsa)
ELSE()
SET(HSA_PATH $ENV{HSA_PATH})
ENDIF()

# ROCBLAS_PATH
IF(NOT DEFINED ENV{ROCBLAS_PATH})
SET(ROCBLAS_PATH ${ROCM_PATH}/rocblas)
ELSE()
SET(ROCBLAS_PATH $ENV{ROCBLAS_PATH})
ENDIF()

# ROCSPARSE_PATH
IF(NOT DEFINED ENV{ROCSPARSE_PATH})
SET(ROCSPARSE_PATH ${ROCM_PATH}/rocsparse)
ELSE()
SET(ROCSPARSE_PATH $ENV{ROCSPARSE_PATH})
ENDIF()

# ROCFFT_PATH
IF(NOT DEFINED ENV{ROCFFT_PATH})
SET(ROCFFT_PATH ${ROCM_PATH}/rocfft)
ELSE()
SET(ROCFFT_PATH $ENV{ROCFFT_PATH})
ENDIF()

# HIPSPARSE_PATH
IF(NOT DEFINED ENV{HIPSPARSE_PATH})
SET(HIPSPARSE_PATH ${ROCM_PATH}/hipsparse)
ELSE()
SET(HIPSPARSE_PATH $ENV{HIPSPARSE_PATH})
ENDIF()

# THRUST_PATH
IF(DEFINED ENV{THRUST_PATH})
SET(THRUST_PATH $ENV{THRUST_PATH})
ELSE()
SET(THRUST_PATH ${ROCM_PATH}/include)
ENDIF()

# HIPRAND_PATH
IF(NOT DEFINED ENV{HIPRAND_PATH})
SET(HIPRAND_PATH ${ROCM_PATH}/hiprand)
ELSE()
SET(HIPRAND_PATH $ENV{HIPRAND_PATH})
ENDIF()

# ROCRAND_PATH
IF(NOT DEFINED ENV{ROCRAND_PATH})
SET(ROCRAND_PATH ${ROCM_PATH}/rocrand)
ELSE()
SET(ROCRAND_PATH $ENV{ROCRAND_PATH})
ENDIF()

# MIOPEN_PATH
IF(NOT DEFINED ENV{MIOPEN_PATH})
SET(MIOPEN_PATH ${ROCM_PATH}/miopen)
ELSE()
SET(MIOPEN_PATH $ENV{MIOPEN_PATH})
ENDIF()

IF(NOT DEFINED ENV{TP_ROCM_ARCH})
SET(TP_ROCM_ARCH gfx900;gfx906;gfx908)
ELSE()
SET(TP_ROCM_ARCH $ENV{TP_ROCM_ARCH})
ENDIF()

# Add HIP to the CMAKE Module Path
set(CMAKE_MODULE_PATH ${HIP_PATH}/cmake ${CMAKE_MODULE_PATH})

# Disable Asserts In Code (Can't use asserts on HIP stack.)
ADD_DEFINITIONS(-DNDEBUG)

# Find the HIP Package
find_package(HIP 1.0)

IF(HIP_FOUND)
set(TP_HAVE_HIP TRUE)

if(HIP_COMPILER STREQUAL clang)
set(hip_library_name amdhip64)
else()
set(hip_library_name hip_hcc)
endif()
message("HIP library name: ${hip_library_name}")

set(CMAKE_HCC_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG})
set(CMAKE_HCC_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
FIND_LIBRARY(TP_HIP_HCC_LIBRARIES ${hip_library_name} HINTS ${HIP_PATH}/lib)

list(APPEND HIP_CXX_FLAGS -fPIC)
list(APPEND HIP_CXX_FLAGS -D__HIP_PLATFORM_HCC__=1)
list(APPEND HIP_CXX_FLAGS -D__HIP_NO_HALF_OPERATORS__=1)
list(APPEND HIP_CXX_FLAGS -D__HIP_NO_HALF_CONVERSIONS__=1)
list(APPEND HIP_CXX_FLAGS -DHIP_VERSION=${HIP_VERSION_MAJOR})
list(APPEND HIP_CXX_FLAGS -Wno-macro-redefined)
list(APPEND HIP_CXX_FLAGS -Wno-inconsistent-missing-override)
list(APPEND HIP_CXX_FLAGS -Wno-exceptions)
list(APPEND HIP_CXX_FLAGS -Wno-shift-count-negative)
list(APPEND HIP_CXX_FLAGS -Wno-shift-count-overflow)
list(APPEND HIP_CXX_FLAGS -Wno-unused-command-line-argument)
list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier)

set(HIP_CLANG_FLAGS ${HIP_CXX_FLAGS})
# Ask hcc to generate device code during compilation so we can use
# host linker to link.
list(APPEND HIP_CLANG_FLAGS -fno-gpu-rdc)
list(APPEND HIP_CLANG_FLAGS -Wno-defaulted-function-deleted)
foreach(tp_rocm_arch ${TP_ROCM_ARCH})
list(APPEND HIP_CLANG_FLAGS --amdgpu-target=${tp_rocm_arch})
endforeach()

set(hip_DIR ${HIP_PATH}/lib/cmake/hip)
set(hsa-runtime64_DIR ${ROCM_PATH}/lib/cmake/hsa-runtime64)
set(AMDDeviceLibs_DIR ${ROCM_PATH}/lib/cmake/AMDDeviceLibs)
set(amd_comgr_DIR ${ROCM_PATH}/lib/cmake/amd_comgr)
set(rocrand_DIR ${ROCRAND_PATH}/lib/cmake/rocrand)
set(hiprand_DIR ${HIPRAND_PATH}/lib/cmake/hiprand)
set(rocblas_DIR ${ROCBLAS_PATH}/lib/cmake/rocblas)
set(miopen_DIR ${MIOPEN_PATH}/lib/cmake/miopen)
set(rocfft_DIR ${ROCFFT_PATH}/lib/cmake/rocfft)
set(hipfft_DIR ${HIPFFT_PATH}/lib/cmake/hipfft)
set(hipsparse_DIR ${HIPSPARSE_PATH}/lib/cmake/hipsparse)
set(rccl_DIR ${RCCL_PATH}/lib/cmake/rccl)
set(rocprim_DIR ${ROCPRIM_PATH}/lib/cmake/rocprim)
set(hipcub_DIR ${HIPCUB_PATH}/lib/cmake/hipcub)
set(rocthrust_DIR ${ROCTHRUST_PATH}/lib/cmake/rocthrust)
set(ROCclr_DIR ${ROCM_PATH}/rocclr/lib/cmake/rocclr)

find_package(hip REQUIRED)

set(TP_HIP_INCLUDE ${ROCM_PATH}/include ${TP_HIP_INCLUDE})
set(TP_HIP_INCLUDE ${hip_INCLUDE_DIRS} $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}> $<INSTALL_INTERFACE:include> ${TP_HIP_INCLUDE})

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC, the only reason for this entire file to exist is to be able to provide ${hip_INCLUDE_DIRS}? If so, why don't we just use ${HIP_PATH}/include? It'd reduce a lot of the seemingly-unrelated code here. @jeffdaily for comment

ENDIF()
6 changes: 6 additions & 0 deletions cmake/Options.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ endmacro()

# TODO: Default to ON if CUDA available.
option(TP_USE_CUDA "Enable support for CUDA tensors" OFF)
option(TP_USE_ROCM "Enable support for ROCM tensors" OFF)

# if both TP_USE_CUDA and TP_USE_ROCM is set then break
if(TP_USE_CUDA AND TP_USE_ROCM)
message(FATAL_ERROR "TensorPipe does not support building for CUDA and ROCM at the same time. Please unset either TP_USE_CUDA or TP_USE_ROCM.")
endif()

# Optional features
option(TP_BUILD_BENCHMARK "Build benchmarks" OFF)
Expand Down
78 changes: 54 additions & 24 deletions tensorpipe/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ list(APPEND TP_PUBLIC_HDRS

### cma

tp_conditional_backend(
TP_CONDITIONAL_BACKEND(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So far we've been using this macro lower case, let's keep it consistent (i.e. tp_conditional_backend()).

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@beauby Sorry, that was I who suggested changing it to uppercase, to keep it consistent with the case in Options.cmake: https://github.com/pytorch/tensorpipe/blob/master/cmake/Options.cmake#L13
Would you still like us to change it back to lowercase?

TP_ENABLE_CMA "Enable cross-memory attach channel" "LINUX")
if(TP_ENABLE_CMA)
list(APPEND TP_SRCS
Expand All @@ -100,6 +100,7 @@ list(APPEND TP_SRCS
list(APPEND TP_PUBLIC_HDRS
channel/mpt/factory.h)


## Transports

### uv
Expand All @@ -124,7 +125,7 @@ list(APPEND TP_LINK_LIBRARIES uv::uv)

### shm

tp_conditional_backend(
TP_CONDITIONAL_BACKEND(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above.

TP_ENABLE_SHM "Enable shared-memory transport" "LINUX")
if(TP_ENABLE_SHM)
list(APPEND TP_SRCS
Expand All @@ -143,7 +144,7 @@ endif()

### ibv

tp_conditional_backend(
TP_CONDITIONAL_BACKEND(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above.

TP_ENABLE_IBV "Enable InfiniBand transport" "LINUX")
if(TP_ENABLE_IBV)
list(APPEND TP_SRCS
Expand Down Expand Up @@ -219,9 +220,9 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/config.h
DESTINATION ${TP_INSTALL_INCLUDEDIR}/tensorpipe)


## CUDA
## CUDA AND ROCM

if(TP_USE_CUDA)
if(TP_USE_CUDA OR TP_USE_ROCM)
# TP_SRCS is the list of source files that we need to build libtensorpipe.
set(TP_CUDA_SRCS)

Expand All @@ -234,9 +235,23 @@ if(TP_USE_CUDA)
# TP_INCLUDE_DIRS is list of include path to be used
set(TP_CUDA_INCLUDE_DIRS)

find_package(CUDA REQUIRED)
list(APPEND TP_CUDA_LINK_LIBRARIES ${CUDA_LIBRARIES})
list(APPEND TP_CUDA_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS})
# TP_CUDA_COMPILE_DEFS is list of compiler definitions to be used
set(TP_CUDA_COMPILE_DEFS)

if (TP_USE_CUDA)
set(TP_GPU_LIB_NAME "tensorpipe_cuda")
find_package(CUDA REQUIRED)
list(APPEND TP_CUDA_LINK_LIBRARIES ${CUDA_LIBRARIES})
list(APPEND TP_CUDA_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS})
elseif (TP_USE_ROCM)
set(TP_GPU_LIB_NAME "tensorpipe_hip")
# Finding of HIP package is already before hipifying the files
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curious, any reason not looking for packages here as existing code did for CUDA?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

list(APPEND TP_CUDA_LINK_LIBRARIES ${TP_HIP_HCC_LIBRARIES})
list(APPEND TP_CUDA_INCLUDE_DIRS ${TP_HIP_INCLUDE})
Comment on lines +249 to +250
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regarding the naming, any reason they don't follow the CUDA ones, i.e., HIP_LIBRARIES and HIP_INCLUDE_DIRS?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me check with the HIP team, if there is any particular reason for keeping this name, i.e., hip_INCLUDE_DIRS or different with CUDA, and get back if any reason.

list(APPEND TP_CUDA_COMPILE_DEFS TP_USE_ROCM)
option(TP_ROCM_TMP_COMPILE "This variable enables temp code for compilation on ROCm platform" OFF)
list(APPEND TP_CUDA_COMPILE_DEFS TP_ROCM_TMP_COMPILE)
endif()

list(APPEND TP_CUDA_SRCS
common/cuda_buffer.cc)
Expand All @@ -246,12 +261,16 @@ if(TP_USE_CUDA)

### cuda_xth

list(APPEND TP_CUDA_SRCS
channel/cuda_xth/channel_impl.cc
channel/cuda_xth/context_impl.cc
channel/cuda_xth/factory.cc)
list(APPEND TP_CUDA_PUBLIC_HDRS
channel/cuda_xth/factory.h)
TP_CONDITIONAL_BACKEND(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same remark regarding case.

TP_ENABLE_HIP_XTH "Enable HIP XTH communication channel" "TP_USE_ROCM")
if(TP_ENABLE_HIP_XTH OR TP_USE_CUDA)
list(APPEND TP_CUDA_SRCS
channel/cuda_xth/channel_impl.cc
channel/cuda_xth/context_impl.cc
channel/cuda_xth/factory.cc)
list(APPEND TP_CUDA_PUBLIC_HDRS
channel/cuda_xth/factory.h)
endif()

### cuda_basic

Expand All @@ -265,9 +284,11 @@ if(TP_USE_CUDA)

### cuda_ipc

tp_conditional_backend(
TP_CONDITIONAL_BACKEND(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Case.

TP_ENABLE_CUDA_IPC "Enable CUDA inter-process communication channel" "TP_USE_CUDA")
if(TP_ENABLE_CUDA_IPC)
TP_CONDITIONAL_BACKEND(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Case.

TP_ENABLE_HIP_IPC "Enable HIP inter-process communication channel" "TP_USE_ROCM")
if(TP_ENABLE_CUDA_IPC OR TP_ENABLE_HIP_IPC)
list(APPEND TP_CUDA_SRCS
channel/cuda_ipc/channel_impl.cc
channel/cuda_ipc/context_impl.cc
Expand All @@ -279,9 +300,11 @@ if(TP_USE_CUDA)

### cuda_gdr

tp_conditional_backend(
TP_CONDITIONAL_BACKEND(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Case.

TP_ENABLE_CUDA_GDR "Enable CUDA GpuDirect (InfiniBand) channel" "LINUX")
if(TP_ENABLE_CUDA_GDR)
TP_CONDITIONAL_BACKEND(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Case.

TP_ENABLE_HIP_GDR "Enable HIP GpuDirect (InfiniBand) channel" "LINUX")
if((TP_ENABLE_CUDA_GDR AND TP_USE_CUDA) OR (TP_ENABLE_HIP_GDR AND TP_USE_ROCM))
list(APPEND TP_CUDA_SRCS
common/ibv.cc
channel/cuda_gdr/channel_impl.cc
Expand All @@ -293,19 +316,26 @@ if(TP_USE_CUDA)
set(TENSORPIPE_HAS_CUDA_GDR_CHANNEL 1)
endif()

if(TP_USE_ROCM)
# Replace the cuda file names in TP_CUDA_SRCS & TP_CUDA_PUBLIC_HDRS file lists with hipified file names
get_hipified_list("${TP_CUDA_SRCS}" TP_CUDA_SRCS)
get_hipified_list("${TP_CUDA_PUBLIC_HDRS}" TP_CUDA_PUBLIC_HDRS)
endif()

configure_file(config_cuda.h.in config_cuda.h)

add_library(tensorpipe_cuda ${TP_STATIC_OR_SHARED} ${TP_CUDA_SRCS})
add_library(${TP_GPU_LIB_NAME} ${TP_STATIC_OR_SHARED} ${TP_CUDA_SRCS})

if(BUILD_SHARED_LIBS)
set_target_properties(tensorpipe_cuda PROPERTIES POSITION_INDEPENDENT_CODE 1)
set_target_properties(${TP_GPU_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE 1)
endif()

target_link_libraries(tensorpipe_cuda PUBLIC tensorpipe)
target_link_libraries(tensorpipe_cuda PRIVATE ${TP_CUDA_LINK_LIBRARIES})
target_include_directories(tensorpipe_cuda PUBLIC ${TP_CUDA_INCLUDE_DIRS})
target_link_libraries(${TP_GPU_LIB_NAME} PUBLIC tensorpipe)
target_link_libraries(${TP_GPU_LIB_NAME} PRIVATE ${TP_CUDA_LINK_LIBRARIES})
target_include_directories(${TP_GPU_LIB_NAME} PUBLIC ${TP_CUDA_INCLUDE_DIRS})
target_compile_definitions(${TP_GPU_LIB_NAME} PUBLIC ${TP_CUDA_COMPILE_DEFS})

install(TARGETS tensorpipe_cuda
install(TARGETS ${TP_GPU_LIB_NAME}
EXPORT TensorpipeTargets
LIBRARY DESTINATION ${TP_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${TP_INSTALL_LIBDIR})
Expand Down
Loading