♻️ Removed global Model ID var | resolved readme conflicts

Signed-off-by: Krishna Murti <[email protected]>
opea-project · Sep 6, 2024 · 815c51b · 815c51b
1 parent 140d1b5
commit 815c51b
Show file tree

Hide file tree

Showing 10 changed files with 55 additions and 17 deletions.
diff --git a/helm-charts/chatqna/README.md b/helm-charts/chatqna/README.md
@@ -24,7 +24,15 @@ Apart from above mentioned services, there are following conditional dependencie
 
 ## Installing the Chart
 
-To install the chart, run the following:
+Please follow the following steps to install the ChatQnA Chart:
+
+1. Clone the GenAIInfra repository:
+
+```bash
+git clone https://github.com/opea-project/GenAIInfra.git
+```
+
+2. Setup the dependencies and required environment variables:
 
 ```bash
 cd GenAIInfra/helm-charts/
@@ -33,20 +41,41 @@ helm dependency update chatqna
 export HFTOKEN="insert-your-huggingface-token-here"
 export MODELDIR="/mnt/opea-models"
 export MODELNAME="Intel/neural-chat-7b-v3-3"
+```
+
+3. Depending on the device which we are targeting for running ChatQnA, please use one the following installation commands:
+
+```bash
+# Install the chart on a Xeon machine
 helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME}
+```
 
+```bash
 # To use Gaudi device
 helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/gaudi-values.yaml
+```
 
+```bash
 # To use Nvidia GPU
 helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/nv-values.yaml
+```
 
+```bash
+# To include guardrail component in chatqna on Xeon
+helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f chatqna/guardrails-values.yaml
+```
 
-# To use OpenVINO vLLM inference engine on Xeon device
-
-helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set global.LLM_MODEL_ID=${MODELNAME} --set tags.tgi=false --set vllm-openvino.enabled=true
+```bash
+# To include guardrail component in chatqna on Gaudi
+helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f chatqna/guardrails-gaudi-values.yaml
 ```
 
+>**_NOTE:_** Default installation will use [TGI (Text Generation Inference)](https://github.com/huggingface/text-generation-inference) as inference engine. To use vLLM as inference engine, please see below.
+
+```bash
+# To use OpenVINO vLLM inference engine on Xeon device
+helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-vllm-uservice.LLM_MODEL_ID=${MODELNAME} --set vllm-openvino.LLM_MODEL_ID=${MODELNAME} --set tags.tgi=false --set vllm-openvino.enabled=true
+```
 
 ### IMPORTANT NOTE
 

diff --git a/helm-charts/chatqna/values.yaml b/helm-charts/chatqna/values.yaml
@@ -43,13 +43,17 @@ tolerations: []
 
 affinity: {}
 
-# To override values in subchart tgi and vllm-ov
+# To override values in subchart tgi, vllm-openvino and llm-vllm-uservice
 tgi:
   LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
 
 vllm-openvino:
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
   enabled: false
 
+llm-vllm-uservice:
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+
 tags:
   tgi: true
   vllm: false
@@ -61,7 +65,6 @@ global:
   no_proxy: ""
   HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
 
-  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
   # set modelUseHostPath or modelUsePVC to use model cache.
   modelUseHostPath: ""
   # modelUsePVC: model-volume

diff --git a/helm-charts/common/llm-vllm-uservice/README.md b/helm-charts/common/llm-vllm-uservice/README.md
@@ -31,22 +31,23 @@ export http_proxy=<your_http_proxy>
 export https_proxy=<your_https_proxy>
 
 helm dependency update
-helm install llm-vllm-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set vLLM_ENDPOINT=${vLLM_ENDPOINT} --set global.LLM_MODEL_ID=${MODELNAME} --set global.http_proxy=${http_proxy} --set global.https_proxy=${https_proxy} --wait
+helm install llm-vllm-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set vLLM_ENDPOINT=${vLLM_ENDPOINT} --set LLM_MODEL_ID=${MODELNAME} --set global.http_proxy=${http_proxy} --set global.https_proxy=${https_proxy} --wait
 ```
 
 ## (Option 2): Installing the chart with automatic installation of dependency:
 
 ```bash
 cd GenAIInfra/helm-charts/common/llm-vllm-uservice
 export HFTOKEN="insert-your-huggingface-token-here"
+export MODELDIR="/mnt/opea-models"
 export MODELNAME="bigscience/bloom-560m"
 
 # If proxy is required, please export the appropriate proxy values.
 export http_proxy=<your_http_proxy>
 export https_proxy=<your_https_proxy>
 
 helm dependency update
-helm install llm-vllm-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.LLM_MODEL_ID=${MODELNAME} --set autodependency.enabled=true --set global.http_proxy=${http_proxy} --set global.https_proxy=${https_proxy} --wait
+helm install llm-vllm-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set vllm-openvino.LLM_MODEL_ID=${MODELNAME} --set autodependency.enabled=true --set global.http_proxy=${http_proxy} --set global.https_proxy=${https_proxy} --wait
 ```
 
 `--wait` flag in above installation command will make sure that all the dependencies are resolved and all services are deployed.

diff --git a/helm-charts/common/llm-vllm-uservice/templates/configmap.yaml b/helm-charts/common/llm-vllm-uservice/templates/configmap.yaml
@@ -13,7 +13,7 @@ data:
   {{- else }}
   vLLM_ENDPOINT: "http://{{ .Release.Name }}-vllm-openvino"
   {{- end }}
-  LLM_MODEL: {{ .Values.global.LLM_MODEL_ID | quote }}
+  LLM_MODEL: {{ .Values.LLM_MODEL_ID | quote }}
   HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
   HF_HOME: "/tmp/.cache/huggingface"
   {{- if .Values.global.HF_ENDPOINT }}

diff --git a/helm-charts/common/llm-vllm-uservice/values.yaml b/helm-charts/common/llm-vllm-uservice/values.yaml
@@ -87,6 +87,13 @@ tolerations: []
 
 affinity: {}
 
+# Model ID to be used by llm-vllm microservice
+LLM_MODEL_ID: "Intel/neural-chat-7b-v3-3"
+
+# Overriding the Model ID being used by vllm-openvino service.(As llm-vllm microservice depends on vllm-openvino, these 2 values should be same.)
+vllm-openvino:
+  LLM_MODEL_ID: "Intel/neural-chat-7b-v3-3"
+
 global:
   http_proxy: ""
   https_proxy: ""
@@ -98,4 +105,3 @@ global:
   # comment out modeluseHostPath if you want to download the model from huggingface
   # modelUseHostPath: ""
   modelUseHostPath: ""
-  LLM_MODEL_ID: "Intel/neural-chat-7b-v3-3"
diff --git a/helm-charts/common/vllm-openvino/README.md b/helm-charts/common/vllm-openvino/README.md
@@ -16,7 +16,7 @@ export HFTOKEN="insert-your-huggingface-token-here"
 export http_proxy=<your_http_proxy>
 export https_proxy=<your_https_proxy>
 
-helm install vllm-openvino vllm-openvino --set global.modelUseHostPath=${MODELDIR} --set global.LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.http_proxy=${http_proxy} --set global.https_proxy=${https_proxy} --wait
+helm install vllm-openvino vllm-openvino --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.http_proxy=${http_proxy} --set global.https_proxy=${https_proxy} --wait
 ```
 
 `--wait` flag in the above helm installation command lets the shell wait till `vllm-openvino` is completely up and ready.

diff --git a/helm-charts/common/vllm-openvino/templates/configmap.yaml b/helm-charts/common/vllm-openvino/templates/configmap.yaml
@@ -8,7 +8,7 @@ metadata:
   labels:
     {{- include "vllm-openvino.labels" . | nindent 4 }}
 data:
-  MODEL_ID: {{ .Values.global.LLM_MODEL_ID | quote }}
+  MODEL_ID: {{ .Values.LLM_MODEL_ID | quote }}
   PORT: {{ .Values.service.port | quote }}
   HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
   VLLM_CPU_KVCACHE_SPACE: {{ .Values.VLLM_CPU_KVCACHE_SPACE | quote }}
@@ -22,7 +22,6 @@ data:
   no_proxy: {{ .Values.global.no_proxy | quote }}
   HABANA_LOGS: "/tmp/habana_logs"
   NUMBA_CACHE_DIR: "/tmp"
-  TRANSFORMERS_CACHE: "/tmp/transformers_cache"
   HF_HOME: "/tmp/.cache/huggingface"
   {{- if .Values.CUDA_GRAPHS }}
   CUDA_GRAPHS: {{ .Values.CUDA_GRAPHS | quote }}

diff --git a/helm-charts/common/vllm-openvino/templates/deployment.yaml b/helm-charts/common/vllm-openvino/templates/deployment.yaml
@@ -78,7 +78,7 @@ spec:
             - |
               cd / && \
               python3 -m vllm.entrypoints.openai.api_server \
-                --model "{{ .Values.global.LLM_MODEL_ID }}" \
+                --model "{{ .Values.LLM_MODEL_ID }}" \
                 --host 0.0.0.0 \
                 --port 80
       volumes:

diff --git a/helm-charts/common/vllm-openvino/templates/tests/test-pod.yaml b/helm-charts/common/vllm-openvino/templates/tests/test-pod.yaml
@@ -20,7 +20,7 @@ spec:
           for ((i=1; i<=max_retry; i++)); do \
             curl http://{{ include "vllm-openvino.fullname" . }}/v1/completions -sS --fail-with-body \
               -X POST \
-              -d '{"prompt":"What is Deep Learning?", "model": {{ .Values.global.LLM_MODEL_ID | quote }}, "max_tokens":17, "temperature": 0.5}' \
+              -d '{"prompt":"What is Deep Learning?", "model": {{ .Values.LLM_MODEL_ID | quote }}, "max_tokens":17, "temperature": 0.5}' \
               -H 'Content-Type: application/json' && break;
             curlcode=$?
             if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;

diff --git a/helm-charts/common/vllm-openvino/values.yaml b/helm-charts/common/vllm-openvino/values.yaml
@@ -96,6 +96,8 @@ tolerations: []
 
 affinity: {}
 
+LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+
 CUDA_GRAPHS: "0"
 VLLM_CPU_KVCACHE_SPACE: 50
 HABANA_VISIBLE_DEVICES: all
@@ -107,8 +109,6 @@ global:
   no_proxy: ""
   HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
 
-  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
-
   # Choose where to save your downloaded models
   # Set modelUseHostPath for local directory, this is good for one node test. Example:
   # modelUseHostPath: /mnt/opea-models