Force push

wang-xinyu · Oct 23, 2024 · 3fcf9a4 · 3fcf9a4
1 parent cee3996
commit 3fcf9a4
Show file tree

Hide file tree

Showing 24 changed files with 817 additions and 130 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -5,42 +5,13 @@ project(
   VERSION 0.1
   LANGUAGES C CXX CUDA)
 
-set(CMAKE_CXX_STANDARD 17)
-set(CMAKE_CXX_STANDARD_REQUIRED ON)
-set(CMAKE_CUDA_STANDARD 17)
-set(CMAKE_CUDA_STANDARD_REQUIRED ON)
-
-set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
-
-set(CMAKE_BUILD_TYPE
-    "Debug"
-    CACHE STRING "build type" FORCE)
-
-option(CUDA_USE_STATIC_CUDA_RUNTIME "use static cuda runtime lib" OFF)
-
-if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
-  set(CMAKE_CUDA_ARCHITECTURES
-      60
-      70
-      72
-      75
-      80
-      86
-      89)
-endif()
-
-find_package(Threads REQUIRED)
-find_package(CUDAToolkit REQUIRED)
-
-include(cmake/FindTensorRT.cmake)
-
-set(TensorRT_7_TARGETS mlp lenet)
+set(TensorRT_7_8_10_TARGETS mlp lenet)
 
 set(TensorRT_8_TARGETS)
 
 set(TensorRT_10_TARGETS)
 
-set(ALL_TARGETS ${TensorRT_7_TARGETS} ${TensorRT_8_TARGETS}
+set(ALL_TARGETS ${TensorRT_7_8_10_TARGETS} ${TensorRT_8_TARGETS}
                 ${TensorRT_10_TARGETS})
 
 foreach(sub_dir ${ALL_TARGETS})

diff --git a/README.md b/README.md
@@ -58,42 +58,69 @@ The basic workflow of TensorRTx is:
 
 ## How to run
 
-**Note**: many of the network maybe not included in the "top" cmake project, because they are under refactoring, so please enter its subfolder to build.
+**Note**: this project support to build each network by the `CMakeLists.txt` in its subfolder, or you can build them together by the `CMakeLists.txt` on top of this project.
 
-Each model is a subproject, you can build them with cmake like:
-```BASH
+* General procedures before building and running:
+
+```bash
 # 1. generate xxx.wts from https://github.com/wang-xinyu/pytorchx/tree/master/lenet
 # ...
 
 # 2. put xxx.wts on top of this folder
 # ...
+```
 
-# 3.1 (Option) Uncomment the model you don't want to build or not suppoted by your TensorRT version in CMakeLists.txt
-# ...
-# 3.2 configure project with cmake and build with Ninja
+* (*Option 1*) To build a single subproject in this project, do:
+
+```bash
+## enter the subfolder
+cd tensorrtx/xxx
+
+## configure & build
+cmake -S . -B build
+make -C build
+```
+
+* (*Option 2*) To build many subprojects, firstly, in the top `CMakeLists.txt`, **uncomment** the project you don't want to build or not suppoted by your TensorRT version, e.g., you cannot build subprojects in `${TensorRT_8_Targets}` if your TensorRT is `7.x`. Then:
+
+```bash
+## enter the top of this project
 cd tensorrtx
-cmake -S . -B build -G Ninja
+
+## configure & build
+# you may use "Ninja" rather than "make" to significantly boost the build speed
+cmake -G Ninja -S . -B build
 ninja -C build
+```
+
+**WARNING**: This part is still under development, most subprojects are not adapted yet.
+
+* run the generated executable, e.g.:
+
+```bash
+# serialize model to plan file i.e. 'xxx.engine'
+build/xxx -s
 
-# 4.1 Serialize model to plan file i.e. 'xxx.engine'
-build/lenet -s
-# 4.2 deserialize plan file and run inference
-build/lenet -d
+# deserialize plan file and run inference
+build/xxx -d
 
-# 5. (Optional) See if the output is same as pytorchx/lenet
+# (Optional) check if the output is same as pytorchx/lenet
+# ...
+
+# (Optional) customize the project
 # ...
 ```
 
-For more details, each subfolder may contain a README.md inside, which explains more.
+For more details, each subfolder may contain a `README.md` inside, which explains more.
 
 ## Models
 
 Following models are implemented.
 
-|Name | Description |
-|-|-|
-|[mlp](./mlp) | the very basic model for starters, properly documented |
-|[lenet](./lenet) | the simplest, as a "hello world" of this project |
+| Name | Description | Supported TensorRT Version |
+|---------------|---------------|---------------|
+|[mlp](./mlp) | the very basic model for starters, properly documented | 7.x/8.x/10.x |
+|[lenet](./lenet) | the simplest, as a "hello world" of this project | 7.x/8.x/10.x |
 |[alexnet](./alexnet)| easy to implement, all layers are supported in tensorrt |
 |[googlenet](./googlenet)| GoogLeNet (Inception v1) |
 |[inception](./inception)| Inception v3, v4 |

diff --git a/docker/README.md b/docker/README.md
@@ -49,11 +49,11 @@ Change the `TAG` on top of the `.dockerfile`. Note: all images are officially ow
 
 For more detail of the support matrix, please check [HERE](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html)
 
-### How to customize opencv?
+### How to customize the opencv in the image?
 
 If prebuilt package from apt cannot meet your requirements, please refer to the demo code in `.dockerfile` to build opencv from source.
 
-### How to solve image build fail issues?
+### How to solve failiures when building image?
 
 For *443 timeout* or any similar network issues, a proxy may required. To make your host proxy work for building env of docker, please change the `build` node inside docker-compose file like this:
 ```YAML

diff --git a/docker/tensorrtx-docker-compose.yml b/docker/tensorrtx-docker-compose.yml
@@ -1,6 +1,6 @@
 services:
   tensorrt:
-    image: tensortx:1.0.0
+    image: tensortx:1.0.1
     container_name: tensortx
     environment:
       - NVIDIA_VISIBLE_DEVICES=all

diff --git a/docker/x86_64.dockerfile b/docker/x86_64.dockerfile
@@ -7,13 +7,16 @@ ENV DEBIAN_FRONTEND noninteractive
 # basic tools
 RUN apt update && apt-get install -y --fix-missing --no-install-recommends \
 sudo wget curl git ca-certificates ninja-build tzdata pkg-config \
-gdb libglib2.0-dev libmount-dev \
+gdb libglib2.0-dev libmount-dev locales \
 && rm -rf /var/lib/apt/lists/*
 RUN pip install --no-cache-dir yapf isort cmake-format pre-commit
 
+## fix a potential pre-commit error
+RUN locale-gen "en_US.UTF-8"
+
 ## override older cmake
 RUN find /usr/local/share -type d -name "cmake-*" -exec rm -rf {} + \
-&& curl -fsSL "https://github.com/Kitware/CMake/releases/download/v3.29.0/cmake-3.29.0-linux-x86_64.sh" \
+&& curl -fsSL "https://github.com/Kitware/CMake/releases/download/v3.30.0/cmake-3.30.0-linux-x86_64.sh" \
 -o cmake.sh && bash cmake.sh --skip-license --exclude-subdir --prefix=/usr/local && rm cmake.sh
 
 RUN apt update && apt-get install -y \

diff --git a/lenet/CMakeLists.txt b/lenet/CMakeLists.txt
@@ -5,9 +5,39 @@ project(
   VERSION 0.1
   LANGUAGES C CXX CUDA)
 
-add_executable(${PROJECT_NAME} lenet.cpp)
+if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+  set(CMAKE_CUDA_ARCHITECTURES
+      60
+      70
+      72
+      75
+      80
+      86
+      89)
+endif()
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CUDA_STANDARD 17)
+set(CMAKE_CUDA_STANDARD_REQUIRED ON)
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+set(CMAKE_INCLUDE_CURRENT_DIR TRUE)
+set(CMAKE_BUILD_TYPE
+    "Debug"
+    CACHE STRING "Build type for this project" FORCE)
+
+option(CUDA_USE_STATIC_CUDA_RUNTIME "Use static cudaruntime library" OFF)
 
-target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_SOURCE_DIR}/include)
+find_package(Threads REQUIRED)
+find_package(CUDAToolkit REQUIRED)
+
+if(NOT TARGET TensorRT::TensorRT)
+  include(FindTensorRT.cmake)
+else()
+  message("TensorRT has been found, skipping for ${PROJECT_NAME}")
+endif()
+
+add_executable(${PROJECT_NAME} lenet.cpp)
 
 target_link_libraries(${PROJECT_NAME} PUBLIC Threads::Threads CUDA::cudart
-                                       TensorRT::TensorRT )
+                                             TensorRT::TensorRT)
diff --git a/lenet/FindTensorRT.cmake b/lenet/FindTensorRT.cmake
@@ -0,0 +1,79 @@
+cmake_minimum_required(VERSION 3.17.0)
+
+set(TRT_VERSION
+    $ENV{TRT_VERSION}
+    CACHE STRING
+          "TensorRT version, e.g. \"8.6.1.6\" or \"8.6.1.6+cuda12.0.1.011\"")
+
+# find TensorRT include folder
+if(NOT TensorRT_INCLUDE_DIR)
+  if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
+    set(TensorRT_INCLUDE_DIR
+        "/usr/local/cuda/targets/aarch64-linux/include"
+        CACHE PATH "TensorRT_INCLUDE_DIR")
+  else()
+    set(TensorRT_INCLUDE_DIR
+        "/usr/include/x86_64-linux-gnu"
+        CACHE PATH "TensorRT_INCLUDE_DIR")
+  endif()
+  message(STATUS "TensorRT: ${TensorRT_INCLUDE_DIR}")
+endif()
+
+# find TensorRT library folder
+if(NOT TensorRT_LIBRARY_DIR)
+  if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
+    set(TensorRT_LIBRARY_DIR
+        "/usr/lib/aarch64-linux-gnu/tegra"
+        CACHE PATH "TensorRT_LIBRARY_DIR")
+  else()
+    set(TensorRT_LIBRARY_DIR
+        "/usr/include/x86_64-linux-gnu"
+        CACHE PATH "TensorRT_LIBRARY_DIR")
+  endif()
+  message(STATUS "TensorRT: ${TensorRT_LIBRARY_DIR}")
+endif()
+
+set(TensorRT_LIBRARIES)
+
+message(STATUS "Found TensorRT lib: ${TensorRT_LIBRARIES}")
+
+# process for different TensorRT version
+if(DEFINED TRT_VERSION AND NOT TRT_VERSION STREQUAL "")
+  string(REGEX MATCH "([0-9]+)" _match ${TRT_VERSION})
+  set(TRT_MAJOR_VERSION "${_match}")
+  set(_modules nvinfer nvinfer_plugin)
+  unset(_match)
+
+  if(TRT_MAJOR_VERSION GREATER_EQUAL 8)
+    list(APPEND _modules nvinfer_vc_plugin nvinfer_dispatch nvinfer_lean)
+  endif()
+else()
+  message(FATAL_ERROR "Please set a environment variable \"TRT_VERSION\"")
+endif()
+
+# find and add all modules of TensorRT into list
+foreach(lib IN LISTS _modules)
+  find_library(
+    TensorRT_${lib}_LIBRARY
+    NAMES ${lib}
+    HINTS ${TensorRT_LIBRARY_DIR})
+  list(APPEND TensorRT_LIBRARIES ${TensorRT_${lib}_LIBRARY})
+endforeach()
+
+# make the "TensorRT target"
+add_library(TensorRT IMPORTED INTERFACE)
+add_library(TensorRT::TensorRT ALIAS TensorRT)
+target_link_libraries(TensorRT INTERFACE ${TensorRT_LIBRARIES})
+
+set_target_properties(
+  TensorRT
+  PROPERTIES C_STANDARD 17
+             CXX_STANDARD 17
+             POSITION_INDEPENDENT_CODE ON
+             SKIP_BUILD_RPATH TRUE
+             BUILD_WITH_INSTALL_RPATH TRUE
+             INSTALL_RPATH "$\{ORIGIN\}"
+             INTERFACE_INCLUDE_DIRECTORIES "${TensorRT_INCLUDE_DIR}")
+
+unset(TRT_MAJOR_VERSION)
+unset(_modules)
diff --git a/lenet/lenet.cpp b/lenet/lenet.cpp
@@ -211,7 +211,7 @@ void doInference(IExecutionContext& ctx, float* input, float* output, int batchS
         auto const name = engine.getIOTensorName(i);
         auto dims = ctx.getTensorShape(name);
         auto total = std::accumulate(dims.d, dims.d + dims.nbDims, 1, std::multiplies<int>());
-        std::cout << name << " with total element size: " << total << std::endl;
+        std::cout << name << " element size: " << total << std::endl;
         ctx.setTensorAddress(name, buffers[i]);
     }
     assert(ctx.enqueueV3(stream));

diff --git a/include/logging.h → lenet/logging.h b/include/logging.h → lenet/logging.h
diff --git a/include/macros.h → lenet/macros.h b/include/macros.h → lenet/macros.h
diff --git a/include/utils.h → lenet/utils.h b/include/utils.h → lenet/utils.h
diff --git a/mlp/CMakeLists.txt b/mlp/CMakeLists.txt
@@ -5,9 +5,41 @@ project(
   VERSION 0.1
   LANGUAGES C CXX CUDA)
 
+if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+  set(CMAKE_CUDA_ARCHITECTURES
+      60
+      70
+      72
+      75
+      80
+      86
+      89)
+endif()
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CUDA_STANDARD 17)
+set(CMAKE_CUDA_STANDARD_REQUIRED ON)
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+set(CMAKE_INCLUDE_CURRENT_DIR TRUE)
+set(CMAKE_BUILD_TYPE
+    "Debug"
+    CACHE STRING "Build type for this project" FORCE)
+
+option(CUDA_USE_STATIC_CUDA_RUNTIME "Use static cudaruntime library" OFF)
+
+find_package(Threads REQUIRED)
+find_package(CUDAToolkit REQUIRED)
+
+if(NOT TARGET TensorRT::TensorRT)
+  include(FindTensorRT.cmake)
+else()
+  message("TensorRT has been found, skipping for ${PROJECT_NAME}")
+endif()
+
 add_executable(${PROJECT_NAME} mlp.cpp)
 
-target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_SOURCE_DIR}/include)
+target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_LIST_DIR})
 
-target_link_libraries(${PROJECT_NAME} PUBLIC Threads::Threads
-                                             TensorRT::TensorRT CUDA::cudart)
+target_link_libraries(${PROJECT_NAME} PUBLIC Threads::Threads CUDA::cudart
+                                             TensorRT::TensorRT)