opea-project · lvliang-intel · Nov 15, 2024 · Nov 15, 2024 · Nov 15, 2024 · Nov 15, 2024
@@ -65,7 +65,7 @@ jobs:
           fi
           if [[ $(grep -c "vllm-hpu:" ${docker_compose_yml}) != 0 ]]; then
                git clone https://github.com/HabanaAI/vllm-fork.git vllm-fork
-               cd vllm-fork && git rev-parse HEAD && cd ../
+               cd vllm-fork && git checkout 3c39626 && cd ../
           fi
       - name: Get build list
         id: get-build-list

@@ -56,12 +56,6 @@ bash ./build_docker_vllm.sh hpu
 
 Set `hw_mode` to `hpu`.
 
-Note: If you want to enable tensor parallel, please set `setuptools==69.5.1` in Dockerfile.hpu before build docker with following command.
-
-```
-sed -i "s/RUN pip install setuptools/RUN pip install setuptools==69.5.1/g" docker/Dockerfile.hpu
-```
-
 #### Launch vLLM service on single node
 
 For small model, we can just use single node.

@@ -37,6 +37,7 @@ fi
 if [ "$hw_mode" = "hpu" ]; then
     git clone https://github.com/HabanaAI/vllm-fork.git
     cd ./vllm-fork/
+    git checkout 3c39626
     docker build -f Dockerfile.hpu -t opea/vllm-hpu:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
     cd ..
     rm -rf vllm-fork

@@ -56,12 +56,6 @@ bash ./build_docker_vllm.sh hpu
 
 Set `hw_mode` to `hpu`.
 
-Note: If you want to enable tensor parallel, please set `setuptools==69.5.1` in Dockerfile.hpu before build docker with following command.
-
-```
-sed -i "s/RUN pip install setuptools/RUN pip install setuptools==69.5.1/g" docker/Dockerfile.hpu
-```
-
 #### Launch vLLM service on single node
 
 For small model, we can just use single node.

@@ -32,6 +32,7 @@ fi
 if [ "$hw_mode" = "hpu" ]; then
     git clone https://github.com/HabanaAI/vllm-fork.git
     cd ./vllm-fork/
+    git checkout 3c39626
     docker build -f Dockerfile.hpu -t opea/vllm-hpu:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
     cd ..
     rm -rf vllm-fork

@@ -12,6 +12,7 @@ function build_docker_images() {
     cd $WORKPATH
     git clone https://github.com/HabanaAI/vllm-fork.git
     cd vllm-fork/
+    git checkout 3c39626
     docker build --no-cache -f Dockerfile.hpu -t opea/vllm-hpu:comps --shm-size=128g .
     if [ $? -ne 0 ]; then
         echo "opea/vllm-hpu built fail"
@@ -34,7 +35,7 @@ function build_docker_images() {
 }
 
 function start_service() {
-    export LLM_MODEL="facebook/opt-125m"
+    export LLM_MODEL="Intel/neural-chat-7b-v3-3"
     port_number=5025
     docker run -d --rm \
         --runtime=habana \
@@ -76,7 +77,7 @@ function validate_microservice() {
     result=$(http_proxy="" curl http://${ip_address}:5025/v1/completions \
         -H "Content-Type: application/json" \
         -d '{
-        "model": "facebook/opt-125m",
+        "model": "Intel/neural-chat-7b-v3-3",
         "prompt": "What is Deep Learning?",
         "max_tokens": 32,
         "temperature": 0

@@ -12,6 +12,7 @@ function build_docker_images() {
     cd $WORKPATH
     git clone https://github.com/HabanaAI/vllm-fork.git
     cd vllm-fork/
+    git checkout 3c39626
     docker build --no-cache -f Dockerfile.hpu -t opea/vllm-hpu:comps --shm-size=128g .
     if [ $? -ne 0 ]; then
         echo "opea/vllm-hpu built fail"
@@ -34,7 +35,7 @@ function build_docker_images() {
 }
 
 function start_service() {
-    export LLM_MODEL="facebook/opt-125m"
+    export LLM_MODEL="Intel/neural-chat-7b-v3-3"
     port_number=5025
     docker run -d --rm \
         --runtime=habana \
@@ -76,7 +77,7 @@ function validate_microservice() {
     result=$(http_proxy="" curl http://${ip_address}:5025/v1/completions \
         -H "Content-Type: application/json" \
         -d '{
-        "model": "facebook/opt-125m",
+        "model": "Intel/neural-chat-7b-v3-3",
         "prompt": "What is Deep Learning?",
         "max_tokens": 32,
         "temperature": 0