Skip to content

Commit

Permalink
Merge branch 'main' into zepan/new_check
Browse files Browse the repository at this point in the history
  • Loading branch information
ZePan110 authored Nov 22, 2024
2 parents 67d89da + ac47042 commit 21fdc38
Show file tree
Hide file tree
Showing 18 changed files with 48 additions and 109 deletions.
12 changes: 2 additions & 10 deletions ChatQnA/benchmark/performance/kubernetes/intel/gaudi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,6 @@ Results will be displayed in the terminal and saved as CSV file named `1_stats.c
- Persistent Volume Claim (PVC): This is the recommended approach for production setups. For more details on using PVC, refer to [PVC](https://github.com/opea-project/GenAIInfra/blob/main/helm-charts/README.md#using-persistent-volume).
- Local Host Path: For simpler testing, ensure that each node involved in the deployment follows the steps above to locally prepare the models. After preparing the models, use `--set global.modelUseHostPath=${MODELDIR}` in the deployment command.
- Add OPEA Helm Repository:
```bash
python deploy.py --add-repo
```
- Label Nodes
```base
python deploy.py --add-label --num-nodes 2
Expand Down Expand Up @@ -192,13 +188,9 @@ All the test results will come to the folder `GenAIEval/evals/benchmark/benchmar
## Teardown
After completing the benchmark, use the following commands to clean up the environment:
After completing the benchmark, use the following command to clean up the environment:
Remove Node Labels:
```base
python deploy.py --delete-label
```
Delete the OPEA Helm Repository:
```bash
python deploy.py --delete-repo
python deploy.py --delete-label
```
52 changes: 8 additions & 44 deletions ChatQnA/benchmark/performance/kubernetes/intel/gaudi/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,26 +83,6 @@ def clear_labels_from_nodes(label, node_names=None):
print(f"Label {label_key} not found on node {node_name}, skipping.")


def add_helm_repo(repo_name, repo_url):
# Add the repo if it does not exist
add_command = ["helm", "repo", "add", repo_name, repo_url]
try:
subprocess.run(add_command, check=True)
print(f"Added Helm repo {repo_name} from {repo_url}.")
except subprocess.CalledProcessError as e:
print(f"Failed to add Helm repo {repo_name}: {e}")


def delete_helm_repo(repo_name):
"""Delete Helm repo if it exists."""
command = ["helm", "repo", "remove", repo_name]
try:
subprocess.run(command, check=True)
print(f"Deleted Helm repo {repo_name}.")
except subprocess.CalledProcessError:
print(f"Failed to delete Helm repo {repo_name}. It may not exist.")


def install_helm_release(release_name, chart_name, namespace, values_file, device_type):
"""Deploy a Helm release with a specified name and chart.
Expand Down Expand Up @@ -132,14 +112,14 @@ def install_helm_release(release_name, chart_name, namespace, values_file, devic
if device_type == "gaudi":
print("Device type is gaudi. Pulling Helm chart to get gaudi-values.yaml...")

# Pull and untar the chart
subprocess.run(["helm", "pull", chart_name, "--untar"], check=True)
# Combine chart_name with fixed prefix
chart_pull_url = f"oci://ghcr.io/opea-project/charts/{chart_name}"

# Determine the directory name (get the actual chart_name if chart_name is in the format 'repo_name/chart_name', else use chart_name directly)
chart_dir_name = chart_name.split("/")[-1] if "/" in chart_name else chart_name
# Pull and untar the chart
subprocess.run(["helm", "pull", chart_pull_url, "--untar"], check=True)

# Find the untarred directory (assumes only one directory matches chart_dir_name)
untar_dirs = glob.glob(f"{chart_dir_name}*")
# Find the untarred directory
untar_dirs = glob.glob(f"{chart_name}*")
if untar_dirs:
untar_dir = untar_dirs[0]
hw_values_file = os.path.join(untar_dir, "gaudi-values.yaml")
Expand Down Expand Up @@ -210,20 +190,14 @@ def main():
parser.add_argument(
"--chart-name",
type=str,
default="opea/chatqna",
help="The chart name to deploy, composed of repo name and chart name (default: opea/chatqna).",
default="chatqna",
help="The chart name to deploy, composed of repo name and chart name (default: chatqna).",
)
parser.add_argument("--namespace", default="default", help="Kubernetes namespace (default: default).")
parser.add_argument("--hf-token", help="Hugging Face API token.")
parser.add_argument(
"--model-dir", help="Model directory, mounted as volumes for service access to pre-downloaded models"
)
parser.add_argument("--repo-name", default="opea", help="Helm repo name to add/delete (default: opea).")
parser.add_argument(
"--repo-url",
default="https://opea-project.github.io/GenAIInfra",
help="Helm repository URL (default: https://opea-project.github.io/GenAIInfra).",
)
parser.add_argument("--user-values", help="Path to a user-specified values.yaml file.")
parser.add_argument(
"--create-values-only", action="store_true", help="Only create the values.yaml file without deploying."
Expand All @@ -244,8 +218,6 @@ def main():
action="store_true",
help="Modify resources for services and change extraCmdArgs when creating values.yaml.",
)
parser.add_argument("--add-repo", action="store_true", help="Add the Helm repo specified by --repo-url.")
parser.add_argument("--delete-repo", action="store_true", help="Delete the Helm repo specified by --repo-name.")
parser.add_argument(
"--device-type",
type=str,
Expand All @@ -264,14 +236,6 @@ def main():
else:
args.num_nodes = num_node_names

# Helm repository management
if args.add_repo:
add_helm_repo(args.repo_name, args.repo_url)
return
elif args.delete_repo:
delete_helm_repo(args.repo_name)
return

# Node labeling management
if args.add_label:
add_labels_to_nodes(args.num_nodes, args.label, args.node_names)
Expand Down
2 changes: 1 addition & 1 deletion ChatQnA/docker_compose/amd/gpu/rocm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ docker compose up -d
Try the command below to check whether the TGI service is ready.
```bash
docker logs ${CONTAINER_ID} | grep Connected
docker logs chatqna-tgi-server | grep Connected
```
If the service is ready, you will get the response like below.
Expand Down
6 changes: 3 additions & 3 deletions ChatQnA/docker_compose/intel/hpu/gaudi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
Try the command below to check whether the LLM serving is ready.
```bash
docker logs tgi-service | grep Connected
docker logs tgi-gaudi-server | grep Connected
```
If the service is ready, you will get the response like below.
Expand All @@ -327,15 +327,15 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
```bash
# TGI service
curl http://${host_ip}:9009/v1/chat/completions \
curl http://${host_ip}:8005/v1/chat/completions \
-X POST \
-d '{"model": ${LLM_MODEL_ID}, "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
-H 'Content-Type: application/json'
```
```bash
# vLLM Service
curl http://${host_ip}:9009/v1/chat/completions \
curl http://${host_ip}:8007/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{"model": ${LLM_MODEL_ID}, "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
```
Expand Down
4 changes: 2 additions & 2 deletions ChatQnA/docker_compose/nvidia/gpu/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ docker compose up -d
Try the command below to check whether the TGI service is ready.
```bash
docker logs ${CONTAINER_ID} | grep Connected
docker logs tgi-server | grep Connected
```
If the service is ready, you will get the response like below.
Expand All @@ -285,7 +285,7 @@ docker compose up -d
Then try the `cURL` command below to validate TGI.
```bash
curl http://${host_ip}:9009/v1/chat/completions \
curl http://${host_ip}:8008/v1/chat/completions \
-X POST \
-d '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
-H 'Content-Type: application/json'
Expand Down
5 changes: 5 additions & 0 deletions EdgeCraftRAG/Dockerfile.server
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
chown -R user /home/user/

RUN mkdir /templates && \
chown -R user /templates
COPY ./edgecraftrag/prompt_template/default_prompt.txt /templates/
RUN chown -R user /templates/default_prompt.txt

COPY ./edgecraftrag /home/user/edgecraftrag

RUN mkdir -p /home/user/gradio_cache
Expand Down
44 changes: 10 additions & 34 deletions EdgeCraftRAG/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ Please follow this link [vLLM with OpenVINO](https://github.com/opea-project/Gen

### Start Edge Craft RAG Services with Docker Compose

If you want to enable vLLM with OpenVINO service, please finish the steps in [Launch vLLM with OpenVINO service](#optional-launch-vllm-with-openvino-service) first.

```bash
cd GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc

export MODEL_PATH="your model path for all your models"
export DOC_PATH="your doc path for uploading a dir of files"
export GRADIO_PATH="your gradio cache path for transferring files"
# If you have a specific prompt template, please uncomment the following line
# export PROMPT_PATH="your prompt path for prompt templates"

# Make sure all 3 folders have 1000:1000 permission, otherwise
# chown 1000:1000 ${MODEL_PATH} ${DOC_PATH} ${GRADIO_PATH}
Expand Down Expand Up @@ -70,49 +70,25 @@ optimum-cli export openvino -m BAAI/bge-small-en-v1.5 ${MODEL_PATH}/BAAI/bge-sma
optimum-cli export openvino -m BAAI/bge-reranker-large ${MODEL_PATH}/BAAI/bge-reranker-large --task sentence-similarity
optimum-cli export openvino -m Qwen/Qwen2-7B-Instruct ${MODEL_PATH}/Qwen/Qwen2-7B-Instruct/INT4_compressed_weights --weight-format int4

docker compose up -d
```

#### Launch services with local inference

```bash
docker compose -f compose.yaml up -d
```

#### (Optional) Launch vLLM with OpenVINO service
#### Launch services with vLLM + OpenVINO inference service

1. Set up Environment Variables
Set up Additional Environment Variables and start with compose_vllm.yaml

```bash
export LLM_MODEL=#your model id
export VLLM_SERVICE_PORT=8008
export vLLM_ENDPOINT="http://${HOST_IP}:${VLLM_SERVICE_PORT}"
export HUGGINGFACEHUB_API_TOKEN=#your HF token
```

2. Uncomment below code in 'GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml'

```bash
# vllm-openvino-server:
# container_name: vllm-openvino-server
# image: opea/vllm-arc:latest
# ports:
# - ${VLLM_SERVICE_PORT:-8008}:80
# environment:
# HTTPS_PROXY: ${https_proxy}
# HTTP_PROXY: ${https_proxy}
# VLLM_OPENVINO_DEVICE: GPU
# HF_ENDPOINT: ${HF_ENDPOINT}
# HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
# volumes:
# - /dev/dri/by-path:/dev/dri/by-path
# - $HOME/.cache/huggingface:/root/.cache/huggingface
# devices:
# - /dev/dri
# entrypoint: /bin/bash -c "\
# cd / && \
# export VLLM_CPU_KVCACHE_SPACE=50 && \
# export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON && \
# python3 -m vllm.entrypoints.openai.api_server \
# --model '${LLM_MODEL}' \
# --max_model_len=1024 \
# --host 0.0.0.0 \
# --port 80"
docker compose -f compose_vllm.yaml up -d
```

### ChatQnA with LLM Example (Command Line)
Expand Down
1 change: 1 addition & 0 deletions EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ services:
- ${DOC_PATH:-${PWD}}:/home/user/docs
- ${GRADIO_PATH:-${PWD}}:/home/user/gradio_cache
- ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache
- ${PROMPT_PATH:-${PWD}}:/templates/custom
ports:
- ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010}
devices:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ services:
- ${DOC_PATH:-${PWD}}:/home/user/docs
- ${GRADIO_PATH:-${PWD}}:/home/user/gradio_cache
- ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache
- ${PROMPT_PATH:-${PWD}}:/templates/custom
ports:
- ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010}
devices:
Expand Down
13 changes: 7 additions & 6 deletions EdgeCraftRAG/edgecraftrag/components/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,13 @@ def __init__(self, llm_model, prompt_template, inference_type, **kwargs):
("\n\n", "\n"),
("\t\n", "\n"),
)
template = prompt_template
self.prompt = (
DocumentedContextRagPromptTemplate.from_file(template)
if os.path.isfile(template)
else DocumentedContextRagPromptTemplate.from_template(template)
)
safe_root = "/templates"
template = os.path.normpath(os.path.join(safe_root, prompt_template))
if not template.startswith(safe_root):
raise ValueError("Invalid template path")
if not os.path.exists(template):
raise ValueError("Template file not exists")
self.prompt = DocumentedContextRagPromptTemplate.from_file(template)
self.llm = llm_model
if isinstance(llm_model, str):
self.model_id = llm_model
Expand Down
2 changes: 1 addition & 1 deletion EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
"device": "auto",
"weight": "INT4"
},
"prompt_path": "./edgecraftrag/prompt_template/default_prompt.txt",
"prompt_path": "./default_prompt.txt",
"inference_type": "local"
},
"active": "True"
Expand Down
2 changes: 1 addition & 1 deletion EdgeCraftRAG/tests/configs/test_pipeline_vllm.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
"device": "auto",
"weight": "INT4"
},
"prompt_path": "./edgecraftrag/prompt_template/default_prompt.txt",
"prompt_path": "./default_prompt.txt",
"inference_type": "vllm"
},
"active": "True"
Expand Down
3 changes: 1 addition & 2 deletions EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ vLLM_ENDPOINT="http://${HOST_IP}:${VLLM_SERVICE_PORT}"
function build_docker_images() {
cd $WORKPATH/docker_image_build
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="server ui ecrag"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log

echo "Build vllm_openvino image from GenAIComps..."
cd $WORKPATH && git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}"
Expand Down
2 changes: 1 addition & 1 deletion EdgeCraftRAG/tests/test_pipeline_local_llm.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
"device": "auto",
"weight": "INT4"
},
"prompt_path": "./edgecraftrag/prompt_template/default_prompt.txt",
"prompt_path": "./default_prompt.txt",
"inference_type": "local"
},
"active": "True"
Expand Down
2 changes: 1 addition & 1 deletion EdgeCraftRAG/ui/gradio/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ postprocessor: "reranker"

# Generator
generator: "chatqna"
prompt_path: "./edgecraftrag/prompt_template/default_prompt.txt"
prompt_path: "./default_prompt.txt"

# Models
embedding_model_id: "BAAI/bge-small-en-v1.5"
Expand Down
2 changes: 1 addition & 1 deletion EdgeCraftRAG/ui/gradio/ecrag_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def create_update_pipeline(
],
generator=api_schema.GeneratorIn(
# TODO: remove hardcoding
prompt_path="./edgecraftrag/prompt_template/default_prompt.txt",
prompt_path="./default_prompt.txt",
model=api_schema.ModelIn(model_id=llm_id, model_path=llm_path, device=llm_device, weight=llm_weights),
inference_type=llm_infertype,
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ metadata:
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "2.1.0"
data:
LLM_MODEL_ID: "haoranxu/ALMA-13B"
MODEL_ID: "haoranxu/ALMA-13B"
PORT: "2080"
HF_TOKEN: "insert-your-huggingface-token-here"
http_proxy: ""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ metadata:
app.kubernetes.io/instance: translation
app.kubernetes.io/version: "2.1.0"
data:
LLM_MODEL_ID: "haoranxu/ALMA-13B"
MODEL_ID: "haoranxu/ALMA-13B"
PORT: "2080"
HF_TOKEN: "insert-your-huggingface-token-here"
http_proxy: ""
Expand Down

0 comments on commit 21fdc38

Please sign in to comment.