adjust order

neuralmagic · Jun 13, 2024 · 22d8f0b · 22d8f0b · github-actions · Jun 13, 2024
1 parent 4fcdcb0
commit 22d8f0b
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 6 deletions.
diff --git a/.github/actions/nm-build-docker/action.yml b/.github/actions/nm-build-docker/action.yml
@@ -1,14 +1,14 @@
 name: Build docker image
 description: 'build docker image for nm-vllm'
 inputs:
-  docker_tag:
-    description: "tag to be used for the docker image"
-    type: string
-    required: true
   additional_tag:
     description: "additional tag for the docker image"
     type: string
     required: true
+  docker_tag:
+    description: "tag to be used for the docker image"
+    type: string
+    required: true
   build_type:
     description: "type of nm-vllm to install for the docker image: NIGHTLY (default) or RELEASE"
     type: string

diff --git a/.github/workflows/publish-docker.yml b/.github/workflows/publish-docker.yml
@@ -42,8 +42,8 @@ jobs:
           - name: Checkout code
             uses: actions/checkout@v4
             with:
-              ref: ${{ inputs.gitref }}
               fetch-depth: 0
+              ref: ${{ inputs.gitref }}
               submodules: recursive
 
           - name: Set up nvidia-container-toolkit
@@ -60,8 +60,8 @@ jobs:
             id: build
             uses: ./.github/actions/nm-build-docker/
             with:
-              docker_tag: ${{ inputs.docker_tag }}
               additional_tag: ${{ steps.extratag.outputs.tag }}
+              docker_tag: ${{ inputs.docker_tag }}
               build_type: ${{ inputs.build_type }}
               build_version: ${{ inputs.build_version }}
Benchmark suite	Current: `22d8f0b`	Previous: `5aaec10`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`2.4394447363093943` prompts/s
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`936.7467787428075` tokens/s
Benchmark suite	Current: `22d8f0b`	Previous: `5aaec10`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.9.17 (main, Jun 7 2023, 12:34:12) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`2.46735888370802` prompts/s
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.9.17 (main, Jun 7 2023, 12:34:12) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`947.4658113438797` tokens/s
Benchmark suite	Current: `22d8f0b`	Previous: `5aaec10`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.11.4 (main, Jun 7 2023, 11:01:02) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`2.463452212626964` prompts/s
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.11.4 (main, Jun 7 2023, 11:01:02) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`945.9656496487542` tokens/s
Benchmark suite	Current: `22d8f0b`	Previous: `5aaec10`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.8.17 (default, Jun 7 2023, 12:29:56) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`2.4288331862951504` prompts/s	`2.442178754247764` prompts/s	`1.01`
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.8.17 (default, Jun 7 2023, 12:29:56) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`932.6719435373378` tokens/s	`937.7966416311415` tokens/s	`1.01`