From 8b8fe4bb55fe5b49b5ca4aaa355cc4837433ded0 Mon Sep 17 00:00:00 2001 From: Nika Smilga <42929200+smilni@users.noreply.github.com> Date: Tue, 15 Aug 2023 15:31:20 +0400 Subject: [PATCH 1/2] Feat/add external integration skill (#504) * dff_external_integration_skill first commit * added yml files; added formatter * draft of external integration skill * fake external server * server filez * wip * working version of fake server/skill; tests * text fix * update ymls and fix some bugs * change port * style * revert unnecessary changes * update acc to review * ports and comments n2 * fixes * style * style * formatter * external service timeout * revert accidential change * fixes for Dilya * flake8 fix! * fix old port in readme --- components.tsv | 5 + components/knoIA98f3bijjao9d9pqkne.yml | 24 ++++ services/external_fake_server/Dockerfile | 15 +++ .../external_fake_server/requirements.txt | 8 ++ services/external_fake_server/server.py | 36 ++++++ .../external-fake-service/environment.yml | 2 + .../external-fake-service/service.yml | 24 ++++ services/external_fake_server/test.py | 24 ++++ services/external_fake_server/test.sh | 3 + skills/external_integration_skill/Dockerfile | 26 +++++ skills/external_integration_skill/README.md | 104 ++++++++++++++++++ .../requirements.txt | 9 ++ skills/external_integration_skill/server.py | 65 +++++++++++ .../environment.yml | 7 ++ .../external-integration-skill/service.yml | 29 +++++ skills/external_integration_skill/test.py | 17 +++ skills/external_integration_skill/test.sh | 3 + state_formatters/dp_formatters.py | 7 ++ 18 files changed, 408 insertions(+) create mode 100644 components/knoIA98f3bijjao9d9pqkne.yml create mode 100644 services/external_fake_server/Dockerfile create mode 100644 services/external_fake_server/requirements.txt create mode 100644 services/external_fake_server/server.py create mode 100644 services/external_fake_server/service_configs/external-fake-service/environment.yml create mode 100644 services/external_fake_server/service_configs/external-fake-service/service.yml create mode 100644 services/external_fake_server/test.py create mode 100755 services/external_fake_server/test.sh create mode 100644 skills/external_integration_skill/Dockerfile create mode 100644 skills/external_integration_skill/README.md create mode 100644 skills/external_integration_skill/requirements.txt create mode 100644 skills/external_integration_skill/server.py create mode 100644 skills/external_integration_skill/service_configs/external-integration-skill/environment.yml create mode 100644 skills/external_integration_skill/service_configs/external-integration-skill/service.yml create mode 100644 skills/external_integration_skill/test.py create mode 100755 skills/external_integration_skill/test.sh diff --git a/components.tsv b/components.tsv index d07fba0b0d..a6511e6182 100644 --- a/components.tsv +++ b/components.tsv @@ -181,3 +181,8 @@ 8177 dff-journalist-helper-ru-prompted-skill 8178 transformers-lm-rugpt35 8179 dff-robot-prompted-skill +8180 +8181 +8182 +8183 external-integration-skill +8184 external-fake-server diff --git a/components/knoIA98f3bijjao9d9pqkne.yml b/components/knoIA98f3bijjao9d9pqkne.yml new file mode 100644 index 0000000000..8353bf21cc --- /dev/null +++ b/components/knoIA98f3bijjao9d9pqkne.yml @@ -0,0 +1,24 @@ +name: external_integration_skill +display_name: External Integration Skill +component_type: Script-based w/o NNs +model_type: Dictionary/Pattern-based +is_customizable: false +author: publisher@deeppavlov.ai +description: Generic skill to provide responses from external skills and services. +ram_usage: 128M +gpu_usage: null +group: skills +connector: + protocol: http + timeout: 2.0 + url: http://external-integration-skill:8183/respond +dialog_formatter: state_formatters.dp_formatters:external_integration_skill_formatter +response_formatter: state_formatters.dp_formatters:skill_with_attributes_formatter_service +previous_services: +- skill_selectors +required_previous_services: null +state_manager_method: add_hypothesis +tags: null +endpoint: respond +service: skills/external_integration_skill/service_configs/external-integration-skill +date_created: '2023-06-28T09:45:32' \ No newline at end of file diff --git a/services/external_fake_server/Dockerfile b/services/external_fake_server/Dockerfile new file mode 100644 index 0000000000..3b68d0de23 --- /dev/null +++ b/services/external_fake_server/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.9 + +ARG SERVICE_PORT +ENV SERVICE_PORT ${SERVICE_PORT} + +RUN mkdir /src + +COPY ./services/external_fake_server/requirements.txt /src/requirements.txt +RUN pip install -r /src/requirements.txt + +COPY ./services/external_fake_server /src/ +COPY ./common/ /src/common/ +WORKDIR /src + +CMD gunicorn --workers=2 server:app \ No newline at end of file diff --git a/services/external_fake_server/requirements.txt b/services/external_fake_server/requirements.txt new file mode 100644 index 0000000000..9106b1faa3 --- /dev/null +++ b/services/external_fake_server/requirements.txt @@ -0,0 +1,8 @@ +flask==1.1.1 +itsdangerous==2.0.1 +gunicorn==19.9.0 +requests==2.22.0 +sentry-sdk==0.12.3 +click==7.1.2 +jinja2<=3.0.3 +Werkzeug<=2.0.3 \ No newline at end of file diff --git a/services/external_fake_server/server.py b/services/external_fake_server/server.py new file mode 100644 index 0000000000..df9e7178ab --- /dev/null +++ b/services/external_fake_server/server.py @@ -0,0 +1,36 @@ +import logging +import time +from os import getenv + +import sentry_sdk + +from flask import Flask, request, jsonify + + +sentry_sdk.init(getenv("SENTRY_DSN")) + +logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO) +logger = logging.getLogger(__name__) + +app = Flask(__name__) + + +@app.route("/ping", methods=["POST"]) +def ping(): + return "pong" + + +@app.route("/return_response", methods=["POST"]) +def return_response(): + st_time = time.time() + message = request.json.get("payload", None) + dialog_id = request.json.get("dialog_id", None) + logger.info(f"fake-external-server got message: {message}, dialog_id: {dialog_id}") + if message and dialog_id: + results = {"response": "Success!", "confidence": 0.9} + else: + results = {"response": "", "confidence": 0.0} + logger.info(f"fake-external-server `return_response` results: {results}") + total_time = time.time() - st_time + logger.info(f"fake-external-server `return_response` exec time: {total_time:.3f}s") + return jsonify(results) diff --git a/services/external_fake_server/service_configs/external-fake-service/environment.yml b/services/external_fake_server/service_configs/external-fake-service/environment.yml new file mode 100644 index 0000000000..39e0545e74 --- /dev/null +++ b/services/external_fake_server/service_configs/external-fake-service/environment.yml @@ -0,0 +1,2 @@ +SERVICE_PORT: 8184 +SERVICE_NAME: external_fake_server diff --git a/services/external_fake_server/service_configs/external-fake-service/service.yml b/services/external_fake_server/service_configs/external-fake-service/service.yml new file mode 100644 index 0000000000..10e656e39c --- /dev/null +++ b/services/external_fake_server/service_configs/external-fake-service/service.yml @@ -0,0 +1,24 @@ +name: external-fake-server +endpoints: +- return_response +compose: + env_file: + - .env + build: + args: + SERVICE_PORT: 8184 + SERVICE_NAME: external_fake_server + context: . + dockerfile: ./skills/external_fake_server/Dockerfile + command: gunicorn --workers=2 server:app + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M + volumes: + - ./skills/external_fake_server:/src + - ./common:/src/common + ports: + - 8184:8184 diff --git a/services/external_fake_server/test.py b/services/external_fake_server/test.py new file mode 100644 index 0000000000..6c93a6c8a1 --- /dev/null +++ b/services/external_fake_server/test.py @@ -0,0 +1,24 @@ +import requests + + +def main(): + url = "http://0.0.0.0:8169/return_response" + + request_datas = [ + {"dialog_id": "jknvawoioqb783HGGIUUGI", "payload": "How are you doing?"}, + {"dialog_id": None, "payload": ""}, + ] + gold_results = [ + {"response": "Success!", "confidence": 0.9}, + {"response": "", "confidence": 0.0}, + ] + i = 0 + for request_data in request_datas: + result = requests.post(url, json=request_data).json() + assert result == gold_results[i], print(f"Got result: {result}, something is wrong.") + i += 1 + print("Success!") + + +if __name__ == "__main__": + main() diff --git a/services/external_fake_server/test.sh b/services/external_fake_server/test.sh new file mode 100755 index 0000000000..61672db785 --- /dev/null +++ b/services/external_fake_server/test.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +python test.py diff --git a/skills/external_integration_skill/Dockerfile b/skills/external_integration_skill/Dockerfile new file mode 100644 index 0000000000..f7766fc0eb --- /dev/null +++ b/skills/external_integration_skill/Dockerfile @@ -0,0 +1,26 @@ +FROM python:3.9 + +WORKDIR /src + +COPY ./skills/external_integration_skill/requirements.txt /src/requirements.txt +RUN pip install -r /src/requirements.txt + +ARG SERVICE_NAME +ENV SERVICE_NAME ${SERVICE_NAME} +ARG SERVICE_PORT +ENV SERVICE_PORT ${SERVICE_PORT} +ARG EXTERNAL_SKILL_URL +ENV EXTERNAL_SKILL_URL ${EXTERNAL_SKILL_URL} +ARG ARGUMENTS_TO_SEND +ENV ARGUMENTS_TO_SEND ${ARGUMENTS_TO_SEND} +ARG RESPONSE_KEY +ENV RESPONSE_KEY ${RESPONSE_KEY} +ARG PAYLOAD_ARGUMENT_NAME +ENV PAYLOAD_ARGUMENT_NAME ${PAYLOAD_ARGUMENT_NAME} +ARG EXTERNAL_TIMEOUT +ENV EXTERNAL_TIMEOUT ${EXTERNAL_TIMEOUT} + +COPY skills/external_integration_skill /src +COPY common /src/common + +CMD gunicorn --workers=1 server:app -b 0.0.0.0:${SERVICE_PORT} --timeout=300 diff --git a/skills/external_integration_skill/README.md b/skills/external_integration_skill/README.md new file mode 100644 index 0000000000..0adc1f1db1 --- /dev/null +++ b/skills/external_integration_skill/README.md @@ -0,0 +1,104 @@ +# Light-weighted skill for external service integration + +This skill can be used to integrate external services and skills into DeepPavlov Dream pipeline. + +## Testing the skill + +You may test the skill using external_fake_server component that imitates the work of an external service. +To do so, add the following files to the distribution you want to use for testing: + +__docker-compose.override.yml (add to WAIT_HOSTS)__ +``` +external-integration-skill:8183, external-fake-server:8184 +``` + +__docker-compose.override.yml__ +``` + external-integration-skill: + env_file: [ .env ] + build: + args: + SERVICE_NAME: external_integration_skill + EXTERNAL_SKILL_URL: http://external-fake-server:8184/return_response + ARGUMENTS_TO_SEND: dialog_id + PAYLOAD_ARGUMENT_NAME: payload + RESPONSE_KEY: response + EXTERNAL_TIMEOUT: 10 + context: . + dockerfile: ./skills/external_integration_skill/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8183 --reload + deploy: + resources: + limits: + memory: 128M + reservations: + memory: 128M + + external-fake-server: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8184 + SERVICE_NAME: external_fake_server + context: . + dockerfile: ./services/external_fake_server/Dockerfile + command: flask run -h 0.0.0.0 -p 8184 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M +``` + +__dev.yml__ +``` + external-integration-skill: + volumes: + - "./skills/external_integration_skill:/src" + - "./common:/src/common" + ports: + - 8183:8183 + + external-fake-server: + volumes: + - "./services/external_fake_server:/src" + - "./common:/src/common" + ports: + - 8184:8184 +``` + +__pipeline_conf.json (add to skills)__ +``` +"external_integration_skill": { + "connector": { + "protocol": "http", + "timeout": 2, + "url": "http://external-integration-skill:8183/respond" + }, + "dialog_formatter": "state_formatters.dp_formatters:external_integration_skill_formatter", + "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service", + "previous_services": [ + "skill_selectors" + ], + "state_manager_method": "add_hypothesis", + "is_enabled": true, + "source": { + "component": "components/knoIA98f3bijjao9d9pqkne.yml", + "service": "skills/external_integration_skill/service_configs/external-integration-skill" + } +} +``` + +To leave only your skill in the pipeline you can either get rid of the others in docker-compose.yml and dev.yml or do the following: + +__skill_selectors/rule_based_selector/connector.py__ +``` +asyncio.create_task(callback(task_id=payload["task_id"], response=list(set(skills_for_uttr)))) -> asyncio.create_task(callback(task_id=payload["task_id"], response=['external_integration_skill'])) +``` + +## Integrating real external services + +Do the same, but leave out external-fake-server component. Also, pay attention to ```EXTERNAL_SKILL_URL```, ```PAYLOAD_ARGUMENT_NAME```, ```RESPONSE_KEY```, ```ARGUMENTS_TO_SEND```. ```EXTERNAL_SKILL_URL``` is the link to the external service. ```PAYLOAD_ARGUMENT_NAME```, ```RESPONSE_KEY``` and ```ARGUMENTS_TO_SEND``` all depend on the input and output format of the external service. ```PAYLOAD_ARGUMENT_NAME``` is the key of the input json in which the external skill is expecting to receive the text of the message to reply to ("payload" by default); ```RESPONSE_KEY``` is the key in which the output json of the external skills contains the text of the reply we want to get (None by default); ```ARGUMENTS_TO_SEND``` are the arguments that the external servers needs to receive alongside with the message text, e.g. dialog_id or user_id. \ No newline at end of file diff --git a/skills/external_integration_skill/requirements.txt b/skills/external_integration_skill/requirements.txt new file mode 100644 index 0000000000..928f3e3d58 --- /dev/null +++ b/skills/external_integration_skill/requirements.txt @@ -0,0 +1,9 @@ +flask==1.1.1 +itsdangerous==2.0.1 +gunicorn==19.9.0 +requests==2.22.0 +sentry-sdk[flask]==0.14.1 +healthcheck==1.3.3 +jinja2<=3.0.3 +Werkzeug<=2.0.3 +openai==0.27.6 \ No newline at end of file diff --git a/skills/external_integration_skill/server.py b/skills/external_integration_skill/server.py new file mode 100644 index 0000000000..baec427f6f --- /dev/null +++ b/skills/external_integration_skill/server.py @@ -0,0 +1,65 @@ +import logging +from os import getenv +import sentry_sdk +from flask import Flask, request, jsonify +import requests + +# import common.dff.integration.context as int_ctx + +sentry_sdk.init(getenv("SENTRY_DSN")) +logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO) +logger = logging.getLogger(__name__) + +gunicorn_logger = logging.getLogger("gunicorn.error") +logger.handlers = gunicorn_logger.handlers +logger.setLevel(gunicorn_logger.level) + +app = Flask(__name__) + +EXTERNAL_SKILL_URL = getenv("EXTERNAL_SKILL_URL", None) +PAYLOAD_ARGUMENT_NAME = getenv("PAYLOAD_ARGUMENT_NAME", "payload") +EXTERNAL_TIMEOUT = int(getenv("EXTERNAL_TIMEOUT", 2)) +ARGUMENTS_TO_SEND = getenv("ARGUMENTS_TO_SEND", ["user_id"]) +if isinstance(ARGUMENTS_TO_SEND, str): + ARGUMENTS_TO_SEND = ARGUMENTS_TO_SEND.split(",") +RESPONSE_KEY = getenv("RESPONSE_KEY", "response") + +assert "EXTERNAL_SKILL_URL", logger.error("You need to provide the external skill url to get its responses.") + + +@app.route("/respond", methods=["POST"]) +def respond(): + responses = [] + confidences = [] + sentences = request.json.get("sentences", []) + user_ids = request.json.get("user_ids", []) + dialog_ids = request.json.get("dialog_ids", []) + logger.info(f"Got sentences: {sentences}, user_ids: {user_ids}, dialog_ids: {dialog_ids}") + for n_dialog, message_text in enumerate(sentences): + try: + payload = { + PAYLOAD_ARGUMENT_NAME: message_text, + } + if "user_id" in ARGUMENTS_TO_SEND: + user_id = user_ids[n_dialog] + payload["user_id"] = user_id + if "dialog_id" in ARGUMENTS_TO_SEND: + dialog_id = dialog_ids[n_dialog] + payload["dialog_id"] = dialog_id + result = requests.post(EXTERNAL_SKILL_URL, json=payload, timeout=EXTERNAL_TIMEOUT).json() + if RESPONSE_KEY: + response = result.get(RESPONSE_KEY, "") + confidence = result.get("confidence", 0.9) + except Exception as e: + sentry_sdk.capture_exception(e) + logger.exception(e) + response = "" + confidence = 0.0 + responses.append(response) + confidences.append(confidence) + logger.info(f"Responses: {str(responses)}, confidences: {str(confidences)}") + return jsonify(list(zip(responses, confidences))) + + +if __name__ == "__main__": + app.run(debug=False, host="0.0.0.0", port=3000) diff --git a/skills/external_integration_skill/service_configs/external-integration-skill/environment.yml b/skills/external_integration_skill/service_configs/external-integration-skill/environment.yml new file mode 100644 index 0000000000..0b5a7a7573 --- /dev/null +++ b/skills/external_integration_skill/service_configs/external-integration-skill/environment.yml @@ -0,0 +1,7 @@ +SERVICE_PORT: 8183 +SERVICE_NAME: external_integration_skill +EXTERNAL_SKILL_URL: http://external-fake-server:8184/return_response +ARGUMENTS_TO_SEND: dialog_id +PAYLOAD_ARGUMENT_NAME: payload +RESPONSE_KEY: response +EXTERNAL_TIMEOUT: 2 \ No newline at end of file diff --git a/skills/external_integration_skill/service_configs/external-integration-skill/service.yml b/skills/external_integration_skill/service_configs/external-integration-skill/service.yml new file mode 100644 index 0000000000..4cd606aba0 --- /dev/null +++ b/skills/external_integration_skill/service_configs/external-integration-skill/service.yml @@ -0,0 +1,29 @@ +name: external-integration-skill +endpoints: +- respond +compose: + env_file: + - .env + build: + args: + SERVICE_PORT: 8183 + SERVICE_NAME: external_integration_skill + EXTERNAL_SKILL_URL: http://external-fake-server:8184/return_response + ARGUMENTS_TO_SEND: dialog_id + PAYLOAD_ARGUMENT_NAME: payload + RESPONSE_KEY: response + EXTERNAL_TIMEOUT: 2 + context: . + dockerfile: ./skills/external_integration_skill/Dockerfile + command: gunicorn --workers=1 server:app -b 0.0.0.0:8183 --timeout=300 + deploy: + resources: + limits: + memory: 128M + reservations: + memory: 128M + volumes: + - ./skills/external_integration_skill:/src + - ./common:/src/common + ports: + - 8183:8183 diff --git a/skills/external_integration_skill/test.py b/skills/external_integration_skill/test.py new file mode 100644 index 0000000000..59168f396d --- /dev/null +++ b/skills/external_integration_skill/test.py @@ -0,0 +1,17 @@ +import requests + +SERVICE_PORT = 8171 + + +def test_respond(): + url = f"http://0.0.0.0:{SERVICE_PORT}/respond" + result = requests.post( + url, + json={"sentences": ["hi", ""], "dialog_ids": ["7379921", None]}, + ).json() + assert result == [["Success!", 0.9], ["", 0.0]], print(f"Got result: {result}, something is wrong.") + print("Success!") + + +if __name__ == "__main__": + test_respond() diff --git a/skills/external_integration_skill/test.sh b/skills/external_integration_skill/test.sh new file mode 100755 index 0000000000..468a5a38fc --- /dev/null +++ b/skills/external_integration_skill/test.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +python test.py \ No newline at end of file diff --git a/state_formatters/dp_formatters.py b/state_formatters/dp_formatters.py index 1707c1690a..57afff7859 100755 --- a/state_formatters/dp_formatters.py +++ b/state_formatters/dp_formatters.py @@ -1226,6 +1226,13 @@ def image_captioning_formatter(dialog: Dict) -> List[Dict]: return [{"image_paths": [dialog["human_utterances"][-1].get("attributes", {}).get("image")]}] +def external_integration_skill_formatter(dialog: Dict) -> List[Dict]: + last_sentences = [dialog["human_utterances"][-1]["text"]] + dialog_ids = [dialog.get("dialog_id", "unknown")] + user_ids = [dialog["human_utterances"][-1]["user"]["id"]] + return [{"sentences": last_sentences, "dialog_ids": dialog_ids, "user_ids": user_ids}] + + def robot_formatter(dialog: Dict) -> Dict: """This formatter currently provides the JSON as is, without modifying it. Either edit it later or choose one of the existing formatters""" From e0152414de2515ecccdf0edc741cf22102294974 Mon Sep 17 00:00:00 2001 From: "Dilyara Zharikova (Baymurzina)" Date: Wed, 16 Aug 2023 10:30:51 +0300 Subject: [PATCH 2/2] Fix/llms output postprocessing (#556) * fix: llms output postprocessing * fix: llms output postprocessing * fix: tests for llms --- services/transformers_lm/server.py | 7 +++++-- services/transformers_lm/test.py | 11 +---------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/services/transformers_lm/server.py b/services/transformers_lm/server.py index bec455694e..def2297c10 100644 --- a/services/transformers_lm/server.py +++ b/services/transformers_lm/server.py @@ -52,13 +52,13 @@ def add_replacement_tokens(text, replacement): for pair in replacement: - text = text.replace(pair[0], f"{pair[1]} ") + text = re.sub(pair[0], f"{pair[1]} ", text) return text def remove_replacement_tokens(text, replacement): for pair in replacement: - text = text.replace(pair[1], pair[0]) + text = re.sub(pair[1], pair[0], text) text = text.replace("\n ", "\n") return text @@ -133,6 +133,9 @@ def generate_responses(context, model, tokenizer, prompt, generation_params, con # preprocess dialog context to correctly remove it from output dialog_context = re.sub(r" +", " ", dialog_context) dialog_context = dialog_context.replace("\n ", "\n") + output = re.sub(r" +", " ", output) + output = output.replace("\n ", "\n") + result_cut = output.replace(dialog_context + " ", "") result_cut = cut_predictions_by_additional_eos(result_cut) result_cut = remove_replacement_tokens(result_cut, replacement) diff --git a/services/transformers_lm/test.py b/services/transformers_lm/test.py index f06611c2af..69d27e8fbf 100644 --- a/services/transformers_lm/test.py +++ b/services/transformers_lm/test.py @@ -2,14 +2,6 @@ import requests -DEFAULT_CONFIG = { - "max_new_tokens": 60, - "min_new_tokens": 8, - "top_p": 0.9, - "temperature": 0.9, - "do_sample": True, - "num_return_sequences": 2, -} SERVICE_PORT = int(os.getenv("SERVICE_PORT")) @@ -24,7 +16,7 @@ def test_respond(): ["Привет, Маркус! Я в порядке. Как дела?", "Я отлично. Какие у тебя планы на сегодня?"], ] prompts = [ - "Respond like a friendly chatbot.", + "Respond like a friendly chatbot. \n Dialog:\n", "Отвечай как дружелюбный бот.", ] result = requests.post( @@ -32,7 +24,6 @@ def test_respond(): json={ "dialog_contexts": contexts, "prompts": prompts, - "configs": [DEFAULT_CONFIG] * len(contexts), }, ).json() print(result)