Fix docker image build issue (#305)

1. fixed apt-get install issue 2. fixed tag issue for nightly --------- Co-authored-by: dhuangnm <[email protected]>
neuralmagic · Jun 13, 2024 · 2a7d49e · 2a7d49e · github-actions · Jun 14, 2024
1 parent 5aaec10
commit 2a7d49e
Show file tree

Hide file tree

Showing 4 changed files with 22 additions and 16 deletions.
diff --git a/.github/actions/nm-build-docker/action.yml b/.github/actions/nm-build-docker/action.yml
@@ -5,7 +5,7 @@ inputs:
     description: "tag to be used for the docker image"
     type: string
     required: true
-  additional_tag:
+  extra_tag:
     description: "additional tag for the docker image"
     type: string
     required: true
@@ -33,8 +33,8 @@ runs:
              --build-arg build_version=${{ inputs.build_version }} \
              --target vllm-openai . || status=$?
       if [ ${status} -eq 0 ]; then
-          echo "Add tag ${additional_tag} for "${build_type}" build too"
-          docker image tag ghcr.io/neuralmagic/nm-vllm-openai:${{ inputs.docker_tag }} ghcr.io/neuralmagic/nm-vllm-openai:${additional_tag} || ((status+=$?))
+          echo "Add tag ${{ inputs.extra_tag }} for "${{ inputs.build_type }}" build too"
+          docker image tag ghcr.io/neuralmagic/nm-vllm-openai:${{ inputs.docker_tag }} ghcr.io/neuralmagic/nm-vllm-openai:${{ inputs.extra_tag }} || ((status+=$?))
       fi
       docker image ls -a
       echo "status=${status}" >> $GITHUB_OUTPUT

diff --git a/.github/actions/nm-get-tag/action.yml → .github/actions/nm-get-docker-tag/action.yml b/.github/actions/nm-get-tag/action.yml → .github/actions/nm-get-docker-tag/action.yml
@@ -5,12 +5,17 @@ inputs:
     description: "type of nm-vllm to install for the docker image: NIGHTLY (default) or RELEASE"
     type: string
     default: 'NIGHTLY'
+outputs:
+  tag:
+    description: "extra tag for the docker image based on build type"
+    value: ${{ steps.extratag.outputs.tag }}
 runs:
   using: composite
   steps:
-  - run: |
+  - id: extratag
+    run: |
       tag=nightly
-      if [[ "${build_type}" = "RELEASE" ]]; then
+      if [[ "${{ inputs.build_type }}" = "RELEASE" ]]; then
           tag=latest
       fi
       echo "tag=${tag}" >> $GITHUB_OUTPUT

diff --git a/.github/workflows/publish-docker.yml b/.github/workflows/publish-docker.yml
@@ -40,18 +40,19 @@ jobs:
               password: ${{ secrets.GITHUB_TOKEN }}
 
           - name: Checkout code
-            uses: actions/checkout@v3
+            uses: actions/checkout@v4
             with:
-              fetch-depth: 1
+              fetch-depth: 0
+              ref: ${{ inputs.gitref }}
               submodules: recursive
 
           - name: Set up nvidia-container-toolkit
             id: setup
             uses: ./.github/actions/nm-setup-nvidia-container-toolkit/
 
-          - name: Get image additional tag
+          - name: Get docker image extra tag
             id: tag
-            uses: ./.github/actions/nm-get-tag/
+            uses: ./.github/actions/nm-get-docker-tag/
             with:
               build_type: ${{ inputs.build_type }}
 
@@ -60,7 +61,7 @@ jobs:
             uses: ./.github/actions/nm-build-docker/
             with:
               docker_tag: ${{ inputs.docker_tag }}
-              additional_tag: ${{ steps.tag.outputs.tag }}
+              extra_tag: ${{ steps.tag.outputs.tag }}
               build_type: ${{ inputs.build_type }}
               build_version: ${{ inputs.build_version }}
 

diff --git a/Dockerfile b/Dockerfile
@@ -9,8 +9,8 @@
 # prepare basic build environment
 FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 AS dev
 
-RUN apt-get update -y \
-    && apt-get install -y python3-pip git
+RUN apt-get update -y && \
+    apt-get install -y python3-pip git
 
 # Workaround for https://github.com/openai/triton/issues/2507 and
 # https://github.com/pytorch/pytorch/issues/107960 -- hopefully
@@ -60,8 +60,8 @@ RUN pip --verbose wheel flash-attn==${FLASH_ATTN_VERSION} \
 FROM nvidia/cuda:12.4.1-base-ubuntu22.04 AS vllm-base
 WORKDIR /vllm-workspace
 
-RUN apt-get update -y \
-    && apt-get install -y python3-pip git vim
+RUN apt-get update -y && \
+    apt-get install -y python3-pip git vim
 
 # Workaround for https://github.com/openai/triton/issues/2507 and
 # https://github.com/pytorch/pytorch/issues/107960 -- hopefully
@@ -70,15 +70,15 @@ RUN apt-get update -y \
 RUN ldconfig /usr/local/cuda-12.4/compat/
 
 # install nm-vllm wheel first, so that torch etc will be installed
-ARG build_type="nightly"
+ARG build_type="NIGHTLY"
 ARG build_version="latest"
 ENV INSTALL_TYPE=${build_type}
 ENV INSTALL_VERSION=${build_version}
 # UPSTREAM SYNC: Install nm-vllm with sparsity extras
 # use nm pypi for now for testing
 RUN --mount=type=bind,from=build \
     --mount=type=cache,target=/root/.cache/pip \
-    if [ "${INSTALL_TYPE}" = "nightly" ]; then \
+    if [ "${INSTALL_TYPE}" = "NIGHTLY" ]; then \
         if [ "${INSTALL_VERSION}" = "latest" ]; then \
             pip install nm-vllm-nightly[sparse] --extra-index-url https://pypi.neuralmagic.com/simple; \
         else \
Benchmark suite	Current: `2a7d49e`	Previous: `5aaec10`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`2.4356221441354475` prompts/s
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`935.2789033480118` tokens/s
Benchmark suite	Current: `2a7d49e`	Previous: `5aaec10`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.9.17 (main, Jun 7 2023, 12:34:12) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`2.468659245295787` prompts/s
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.9.17 (main, Jun 7 2023, 12:34:12) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`947.9651501935822` tokens/s
Benchmark suite	Current: `2a7d49e`	Previous: `5aaec10`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.11.4 (main, Jun 7 2023, 11:01:02) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`2.456621130673434` prompts/s
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.11.4 (main, Jun 7 2023, 11:01:02) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`943.3425141785985` tokens/s
Benchmark suite	Current: `2a7d49e`	Previous: `5aaec10`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.8.17 (default, Jun 7 2023, 12:29:56) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`2.448240731114912` prompts/s	`2.442178754247764` prompts/s	`1.00`
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.8.17 (default, Jun 7 2023, 12:29:56) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`940.1244407481262` tokens/s	`937.7966416311415` tokens/s	`1.00`