From 7846b28fd8d90e60b18a4706d2438d40d75c9ad4 Mon Sep 17 00:00:00 2001 From: Nika Smilga <42929200+smilni@users.noreply.github.com> Date: Sun, 13 Aug 2023 12:56:42 +0400 Subject: [PATCH] Feat/refactor dummy skill (#530) * dummy skill refactor * add dummy params * split code into funcs * remove unnecessary func * change func logic * docker-compose update * style * prettier funcs and names * style * add agent cards * add fallback file --- .../dream_alexa/docker-compose.override.yml | 4 + .../docker-compose.override.yml | 5 + .../dream_alexa/environment.yml | 4 + .../service_configs/dream_alexa/service.yml | 4 + .../dream_script_based/environment.yml | 4 + .../dream_script_based/service.yml | 4 + skills/dummy_skill/connector.py | 346 ++++++++++-------- 7 files changed, 213 insertions(+), 158 deletions(-) diff --git a/assistant_dists/dream_alexa/docker-compose.override.yml b/assistant_dists/dream_alexa/docker-compose.override.yml index 27369f483d..df769ff8d5 100644 --- a/assistant_dists/dream_alexa/docker-compose.override.yml +++ b/assistant_dists/dream_alexa/docker-compose.override.yml @@ -24,6 +24,10 @@ services: HIGH_PRIORITY_INTENTS: 1 RESTRICTION_FOR_SENSITIVE_CASE: 1 ALWAYS_TURN_ON_ALL_SKILLS: 0 + ENABLE_NP_QUESTIONS: 1 + ENABLE_SWITCH_TOPIC: 1 + ENABLE_LINK_QUESTIONS: 1 + ENABLE_NP_FACTS: 1 LANGUAGE: EN FALLBACK_FILE: fallbacks_dream_en.json diff --git a/assistant_dists/dream_script_based/docker-compose.override.yml b/assistant_dists/dream_script_based/docker-compose.override.yml index 439eaf0ed0..e86e3238a2 100644 --- a/assistant_dists/dream_script_based/docker-compose.override.yml +++ b/assistant_dists/dream_script_based/docker-compose.override.yml @@ -25,7 +25,12 @@ services: HIGH_PRIORITY_INTENTS: 1 RESTRICTION_FOR_SENSITIVE_CASE: 1 ALWAYS_TURN_ON_ALL_SKILLS: 0 + ENABLE_NP_QUESTIONS: 1 + ENABLE_SWITCH_TOPIC: 1 + ENABLE_LINK_QUESTIONS: 1 + ENABLE_NP_FACTS: 1 LANGUAGE: EN + FALLBACK_FILE: fallbacks_dream_en.json convers-evaluator-annotator: env_file: [ .env ] diff --git a/services/agent_services/service_configs/dream_alexa/environment.yml b/services/agent_services/service_configs/dream_alexa/environment.yml index cf3ef6ea90..cf88d24b9d 100644 --- a/services/agent_services/service_configs/dream_alexa/environment.yml +++ b/services/agent_services/service_configs/dream_alexa/environment.yml @@ -3,5 +3,9 @@ WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-480} HIGH_PRIORITY_INTENTS: 1 RESTRICTION_FOR_SENSITIVE_CASE: 1 ALWAYS_TURN_ON_ALL_SKILLS: 0 +ENABLE_NP_QUESTIONS: 1 +ENABLE_SWITCH_TOPIC: 1 +ENABLE_LINK_QUESTIONS: 1 +ENABLE_NP_FACTS: 1 LANGUAGE: EN FALLBACK_FILE: fallbacks_dream_en.json diff --git a/services/agent_services/service_configs/dream_alexa/service.yml b/services/agent_services/service_configs/dream_alexa/service.yml index 84cc9cc932..d94482641d 100644 --- a/services/agent_services/service_configs/dream_alexa/service.yml +++ b/services/agent_services/service_configs/dream_alexa/service.yml @@ -9,6 +9,10 @@ compose: HIGH_PRIORITY_INTENTS: 1 RESTRICTION_FOR_SENSITIVE_CASE: 1 ALWAYS_TURN_ON_ALL_SKILLS: 0 + ENABLE_NP_QUESTIONS: 1 + ENABLE_SWITCH_TOPIC: 1 + ENABLE_LINK_QUESTIONS: 1 + ENABLE_NP_FACTS: 1 LANGUAGE: EN FALLBACK_FILE: fallbacks_dream_en.json volumes: diff --git a/services/agent_services/service_configs/dream_script_based/environment.yml b/services/agent_services/service_configs/dream_script_based/environment.yml index cf3ef6ea90..cf88d24b9d 100644 --- a/services/agent_services/service_configs/dream_script_based/environment.yml +++ b/services/agent_services/service_configs/dream_script_based/environment.yml @@ -3,5 +3,9 @@ WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-480} HIGH_PRIORITY_INTENTS: 1 RESTRICTION_FOR_SENSITIVE_CASE: 1 ALWAYS_TURN_ON_ALL_SKILLS: 0 +ENABLE_NP_QUESTIONS: 1 +ENABLE_SWITCH_TOPIC: 1 +ENABLE_LINK_QUESTIONS: 1 +ENABLE_NP_FACTS: 1 LANGUAGE: EN FALLBACK_FILE: fallbacks_dream_en.json diff --git a/services/agent_services/service_configs/dream_script_based/service.yml b/services/agent_services/service_configs/dream_script_based/service.yml index 9f86552deb..c926cf97fd 100644 --- a/services/agent_services/service_configs/dream_script_based/service.yml +++ b/services/agent_services/service_configs/dream_script_based/service.yml @@ -9,6 +9,10 @@ compose: HIGH_PRIORITY_INTENTS: 1 RESTRICTION_FOR_SENSITIVE_CASE: 1 ALWAYS_TURN_ON_ALL_SKILLS: 0 + ENABLE_NP_QUESTIONS: 1 + ENABLE_SWITCH_TOPIC: 1 + ENABLE_LINK_QUESTIONS: 1 + ENABLE_NP_FACTS: 1 LANGUAGE: EN FALLBACK_FILE: fallbacks_dream_en.json volumes: diff --git a/skills/dummy_skill/connector.py b/skills/dummy_skill/connector.py index d4fb560d16..f0dad3edcc 100644 --- a/skills/dummy_skill/connector.py +++ b/skills/dummy_skill/connector.py @@ -30,7 +30,7 @@ if_choose_topic, is_any_question_sentence_in_utterance, ) -from common.utils import get_topics, get_entities, is_no, get_intents, is_yes +from common.utils import get_entities, is_no, get_intents, is_yes logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO) @@ -44,6 +44,10 @@ FALLBACK_FILE = getenv("FALLBACK_FILE", "fallbacks_dream_en.json") DUMMY_DONTKNOW_RESPONSES = json.load(open(f"common/fallbacks/{FALLBACK_FILE}", "r")) LANGUAGE = getenv("LANGUAGE", "EN") +ENABLE_NP_QUESTIONS = int(getenv("ENABLE_NP_QUESTIONS", 0)) +ENABLE_SWITCH_TOPIC = int(getenv("ENABLE_SWITCH_TOPIC", 0)) +ENABLE_LINK_QUESTIONS = int(getenv("ENABLE_LINK_QUESTIONS", 0)) +ENABLE_NP_FACTS = int(getenv("ENABLE_NP_FACTS", 0)) with open("skills/dummy_skill/google-english-no-swears.txt", "r") as f: TOP_FREQUENT_UNIGRAMS = f.read().splitlines()[:1000] @@ -161,16 +165,6 @@ def get_link_to_question(dialog, all_prev_active_skills): return linked_question, human_attr -def generate_question_not_from_last_responses(dialog, all_prev_active_skills): - linked_question, human_attr = get_link_to_question(dialog, all_prev_active_skills) - - if len(linked_question) > 0: - result = linked_question - else: - result = "" - return result, human_attr - - def no_initiative(dialog): utts = dialog["human_utterances"] if len(utts) <= 2: @@ -184,162 +178,198 @@ def no_initiative(dialog): return False +def get_nounphrases(dialog): + curr_nounphrases = get_entities(dialog["human_utterances"][-1], only_named=False, with_labels=False) + for i in range(len(curr_nounphrases)): + np = re.sub(np_remove_expr, "", curr_nounphrases[i]) + np = re.sub(rm_spaces_expr, " ", np) + if re.search(np_ignore_expr, np): + curr_nounphrases[i] = "" + else: + curr_nounphrases[i] = np.strip() + + curr_nounphrases = [np for np in curr_nounphrases if len(np) > 0] + + logger.info(f"Found nounphrases: {curr_nounphrases}") + return curr_nounphrases + + +def get_link_questions(payload, dialog): + all_prev_active_skills = payload["payload"]["all_prev_active_skills"][0] + link_to_question, human_attr = get_link_to_question(dialog, all_prev_active_skills) + return link_to_question, human_attr + + +def get_hyp_np_questions(dialog): + curr_nounphrases = get_nounphrases(dialog) + questions_same_nps = [] + for _, nphrase in enumerate(curr_nounphrases): + for q_id in NP_QUESTIONS.get(nphrase, []): + questions_same_nps += [QUESTIONS_MAP[str(q_id)]] + + if len(questions_same_nps) > 0: + logger.info("Found special nounphrases for questions. Return question with the same nounphrase.") + cands = choice(questions_same_nps) + confs = 0.5 + attrs = {"type": "nounphrase_question", "response_parts": ["prompt"]} + human_attrs = {} + bot_attrs = {} + return cands, confs, attrs, human_attrs, bot_attrs + + return [] + + +def get_hyp_topic_switch(dialog): + last_utt = dialog["human_utterances"][-1] + user = last_utt["user"].get("attributes", {}) + entities = user.get("entities", {}) + entities = {ent: val for ent, val in entities.items() if len(val["human_encounters"])} + response = "" + if entities: + selected_entity = "" + # reverse so it uses recent entities first + sorted_entities = sorted( + entities.values(), + key=lambda d: d["human_encounters"][-1]["human_utterance_index"], + reverse=True, + ) + for entity_dict in sorted_entities: + if entity_dict["human_attitude"] == "like" and not entity_dict["mentioned_by_bot"]: + selected_entity = entity_dict["name"] + break + if selected_entity: + response = f"Previously, you have mentioned {selected_entity}, maybe you want to discuss it?" + logger.info(f"dummy_skill hypothesis no_initiative: {response}") + cands = response + confs = 0.5 + attrs = {"type": "entity_recap", "response_parts": ["prompt"]} + human_attrs = {} + bot_attrs = {} + return cands, confs, attrs, human_attrs, bot_attrs + return [] + + +def get_hyp_link_question(dialog, link_to_question, human_attr): + curr_nounphrases = get_nounphrases(dialog) + _prev_bot_uttr = dialog["bot_utterances"][-2]["text"] if len(dialog["bot_utterances"]) > 1 else "" + _bot_uttr = dialog["bot_utterances"][-1]["text"] if len(dialog["bot_utterances"]) > 0 else "" + _prev_active_skill = dialog["bot_utterances"][-1]["active_skill"] if len(dialog["bot_utterances"]) > 0 else "" + + _no_to_first_linkto = any([phrase in _bot_uttr for phrase in LINK_TO_PHRASES]) + _no_to_first_linkto = _no_to_first_linkto and all([phrase not in _prev_bot_uttr for phrase in LINK_TO_PHRASES]) + _no_to_first_linkto = _no_to_first_linkto and is_no(dialog["human_utterances"][-1]) + _no_to_first_linkto = _no_to_first_linkto and _prev_active_skill != "dff_friendship_skill" + + _if_switch_topic = is_switch_topic(dialog["human_utterances"][-1]) + bot_uttr_dict = dialog["bot_utterances"][-1] if len(dialog["bot_utterances"]) > 0 else {} + _if_choose_topic = if_choose_topic(dialog["human_utterances"][-1], bot_uttr_dict) + _is_ask_me_something = ASK_ME_QUESTION_PATTERN.search(dialog["human_utterances"][-1]["text"]) + + if len(dialog["human_utterances"]) > 1: + _was_cant_do = "cant_do" in get_intents(dialog["human_utterances"][-2]) and ( + len(curr_nounphrases) == 0 or is_yes(dialog["human_utterances"][-1]) + ) + _was_cant_do_stop_it = "cant_do" in get_intents(dialog["human_utterances"][-2]) and is_no( + dialog["human_utterances"][-1] + ) + else: + _was_cant_do = False + _was_cant_do_stop_it = False + + if _was_cant_do_stop_it: + link_to_question = "Sorry, bye! #+#exit" + confs = 1.0 # finish dialog request + elif _no_to_first_linkto: + confs = 0.99 + elif _is_ask_me_something or _if_switch_topic or _was_cant_do or _if_choose_topic: + confs = 1.0 # Use it only as response selector retrieve skill output modifier + else: + confs = 0.05 # Use it only as response selector retrieve skill output modifier + cands = link_to_question + attrs = {"type": "link_to_for_response_selector", "response_parts": ["prompt"]} + human_attrs = human_attr + bot_attrs = {} + return cands, confs, attrs, human_attrs, bot_attrs + + +def get_hyp_russ_link_question(): + cands = random.choice(RUSSIAN_RANDOM_QUESTIONS) + confs = 0.8 + attrs = {"type": "link_to_for_response_selector", "response_parts": ["prompt"]} + human_attrs = {} + bot_attrs = {} + return cands, confs, attrs, human_attrs, bot_attrs + + +def get_hyp_np_facts(dialog): + curr_nounphrases = get_nounphrases(dialog) + facts_same_nps = [] + for _, nphrase in enumerate(curr_nounphrases): + for fact_id in NP_FACTS.get(nphrase, []): + facts_same_nps += [ + f"Well, now that you've mentioned {nphrase}, I've remembered this. " + f"{FACTS_MAP[str(fact_id)]}. " + f"{(opinion_request_question() if random.random() < ASK_QUESTION_PROB else '')}" + ] + + if len(facts_same_nps) > 0: + logger.info("Found special nounphrases for facts. Return fact with the same nounphrase.") + cands = choice(facts_same_nps) + confs = 0.5 + attrs = {"type": "nounphrase_fact", "response_parts": ["body"]} + human_attrs = {} + bot_attrs = {} + return cands, confs, attrs, human_attrs, bot_attrs + return [] + + +def add_hypothesis(hyps_with_attrs, new_hyp_with_attrs): + if new_hyp_with_attrs: + cand, conf, attr, human_attr, bot_attr = new_hyp_with_attrs + cands, confs, attrs, human_attrs, bot_attrs = hyps_with_attrs + cands.append(cand) + confs.append(conf) + attrs.append(attr) + human_attrs.append(human_attr) + bot_attrs.append(bot_attr) + + class DummySkillConnector: async def send(self, payload: Dict, callback: Callable): try: st_time = time.time() dialog = deepcopy(payload["payload"]["dialogs"][0]) is_sensitive_case = is_sensitive_situation(dialog["human_utterances"][-1]) - all_prev_active_skills = payload["payload"]["all_prev_active_skills"][0] - - curr_topics = get_topics(dialog["human_utterances"][-1], which="cobot_topics") - curr_nounphrases = get_entities(dialog["human_utterances"][-1], only_named=False, with_labels=False) - - if len(curr_topics) == 0: - curr_topics = ["Phatic"] - logger.info(f"Found topics: {curr_topics}") - for i in range(len(curr_nounphrases)): - np = re.sub(np_remove_expr, "", curr_nounphrases[i]) - np = re.sub(rm_spaces_expr, " ", np) - if re.search(np_ignore_expr, np): - curr_nounphrases[i] = "" - else: - curr_nounphrases[i] = np.strip() - - curr_nounphrases = [np for np in curr_nounphrases if len(np) > 0] - - logger.info(f"Found nounphrases: {curr_nounphrases}") - - cands = [] - confs = [] - human_attrs = [] - bot_attrs = [] - attrs = [] - - cands += [choice(DUMMY_DONTKNOW_RESPONSES)] - confs += [0.5] - attrs += [{"type": "dummy"}] - human_attrs += [{}] - bot_attrs += [{}] - - if len(dialog["utterances"]) > 14 and not is_sensitive_case and LANGUAGE == "EN": - questions_same_nps = [] - for i, nphrase in enumerate(curr_nounphrases): - for q_id in NP_QUESTIONS.get(nphrase, []): - questions_same_nps += [QUESTIONS_MAP[str(q_id)]] - - if len(questions_same_nps) > 0: - logger.info("Found special nounphrases for questions. Return question with the same nounphrase.") - cands += [choice(questions_same_nps)] - confs += [0.5] - attrs += [{"type": "nounphrase_question", "response_parts": ["prompt"]}] - human_attrs += [{}] - bot_attrs += [{}] - - link_to_question, human_attr = get_link_to_question(dialog, all_prev_active_skills) - - if no_initiative(dialog) and LANGUAGE == "EN": - last_utt = dialog["human_utterances"][-1] - user = last_utt["user"].get("attributes", {}) - entities = user.get("entities", {}) - entities = {ent: val for ent, val in entities.items() if len(val["human_encounters"])} - response = "" - if entities: - selected_entity = "" - # reverse so it uses recent entities first - sorted_entities = sorted( - entities.values(), - key=lambda d: d["human_encounters"][-1]["human_utterance_index"], - reverse=True, - ) - for entity_dict in sorted_entities: - if entity_dict["human_attitude"] == "like" and not entity_dict["mentioned_by_bot"]: - selected_entity = entity_dict["name"] - break - if selected_entity: - response = f"Previously, you have mentioned {selected_entity}, maybe you want to discuss it?" - logger.info(f"dummy_skill hypothesis no_initiative: {response}") - cands += [response] - confs += [0.5] - attrs += [{"type": "entity_recap", "response_parts": ["prompt"]}] - human_attrs += [{}] - bot_attrs += [{}] - - if link_to_question and LANGUAGE == "EN": - _prev_bot_uttr = dialog["bot_utterances"][-2]["text"] if len(dialog["bot_utterances"]) > 1 else "" - _bot_uttr = dialog["bot_utterances"][-1]["text"] if len(dialog["bot_utterances"]) > 0 else "" - _prev_active_skill = ( - dialog["bot_utterances"][-1]["active_skill"] if len(dialog["bot_utterances"]) > 0 else "" - ) - - _no_to_first_linkto = any([phrase in _bot_uttr for phrase in LINK_TO_PHRASES]) - _no_to_first_linkto = _no_to_first_linkto and all( - [phrase not in _prev_bot_uttr for phrase in LINK_TO_PHRASES] - ) - _no_to_first_linkto = _no_to_first_linkto and is_no(dialog["human_utterances"][-1]) - _no_to_first_linkto = _no_to_first_linkto and _prev_active_skill != "dff_friendship_skill" - - _if_switch_topic = is_switch_topic(dialog["human_utterances"][-1]) - bot_uttr_dict = dialog["bot_utterances"][-1] if len(dialog["bot_utterances"]) > 0 else {} - _if_choose_topic = if_choose_topic(dialog["human_utterances"][-1], bot_uttr_dict) - _is_ask_me_something = ASK_ME_QUESTION_PATTERN.search(dialog["human_utterances"][-1]["text"]) - - if len(dialog["human_utterances"]) > 1: - _was_cant_do = "cant_do" in get_intents(dialog["human_utterances"][-2]) and ( - len(curr_nounphrases) == 0 or is_yes(dialog["human_utterances"][-1]) - ) - _was_cant_do_stop_it = "cant_do" in get_intents(dialog["human_utterances"][-2]) and is_no( - dialog["human_utterances"][-1] - ) - else: - _was_cant_do = False - _was_cant_do_stop_it = False - - if _was_cant_do_stop_it: - link_to_question = "Sorry, bye! #+#exit" - confs += [1.0] # finish dialog request - elif _no_to_first_linkto: - confs += [0.99] - elif _is_ask_me_something or _if_switch_topic or _was_cant_do or _if_choose_topic: - confs += [1.0] # Use it only as response selector retrieve skill output modifier - else: - confs += [0.05] # Use it only as response selector retrieve skill output modifier - cands += [link_to_question] - attrs += [{"type": "link_to_for_response_selector", "response_parts": ["prompt"]}] - human_attrs += [human_attr] - bot_attrs += [{}] - elif LANGUAGE == "RU": - cands += [random.choice(RUSSIAN_RANDOM_QUESTIONS)] - confs += [0.8] - attrs += [{"type": "link_to_for_response_selector", "response_parts": ["prompt"]}] - human_attrs += [{}] - bot_attrs += [{}] - - if LANGUAGE == "EN": - facts_same_nps = [] - for i, nphrase in enumerate(curr_nounphrases): - for fact_id in NP_FACTS.get(nphrase, []): - facts_same_nps += [ - f"Well, now that you've mentioned {nphrase}, I've remembered this. " - f"{FACTS_MAP[str(fact_id)]}. " - f"{(opinion_request_question() if random.random() < ASK_QUESTION_PROB else '')}" - ] - else: - facts_same_nps = [] - - if len(facts_same_nps) > 0 and not is_sensitive_case and LANGUAGE == "EN": - logger.info("Found special nounphrases for facts. Return fact with the same nounphrase.") - cands += [choice(facts_same_nps)] - confs += [0.5] - attrs += [{"type": "nounphrase_fact", "response_parts": ["body"]}] - human_attrs += [{}] - bot_attrs += [{}] + is_no_initiative = no_initiative(dialog) + is_long_dialog = len(dialog["utterances"]) > 14 + + hyps_with_attrs = [[choice(DUMMY_DONTKNOW_RESPONSES)], [0.5], [{"type": "dummy"}], [{}], [{}]] + # always append at least basic dummy response + + if ENABLE_NP_QUESTIONS and is_long_dialog and not is_sensitive_case and LANGUAGE == "EN": + new_hyp_with_attrs = get_hyp_np_questions(dialog) + add_hypothesis(hyps_with_attrs, new_hyp_with_attrs) + + if ENABLE_SWITCH_TOPIC and is_no_initiative and LANGUAGE == "EN": + new_hyp_with_attrs = get_hyp_topic_switch(dialog) + add_hypothesis(hyps_with_attrs, new_hyp_with_attrs) + + if ENABLE_LINK_QUESTIONS: + link_to_question, human_attr_q = get_link_questions(payload, dialog) + if link_to_question and LANGUAGE == "EN": + new_hyp_with_attrs = get_hyp_link_question(dialog, link_to_question, human_attr_q) + add_hypothesis(hyps_with_attrs, new_hyp_with_attrs) + elif LANGUAGE == "RU": + new_hyp_with_attrs = get_hyp_russ_link_question() + add_hypothesis(hyps_with_attrs, new_hyp_with_attrs) + + if ENABLE_NP_FACTS and not is_sensitive_case and LANGUAGE == "EN": + new_hyp_with_attrs = get_hyp_np_facts(dialog) + add_hypothesis(hyps_with_attrs, new_hyp_with_attrs) total_time = time.time() - st_time logger.info(f"dummy_skill exec time: {total_time:.3f}s") - asyncio.create_task( - callback(task_id=payload["task_id"], response=[cands, confs, human_attrs, bot_attrs, attrs]) - ) + asyncio.create_task(callback(task_id=payload["task_id"], response=hyps_with_attrs)) except Exception as e: logger.exception(e) sentry_sdk.capture_exception(e)