示例#1
0
def utt_non_punct_dialog(dialog: Dict):
    """
    Used by: book_skill
    """
    dialog = utils.get_last_n_turns(dialog)
    dialog = utils.remove_clarification_turns_from_dialog(dialog)
    return [{"dialogs": [dialog]}]
示例#2
0
def convert_formatter_dialog(dialog: Dict) -> List[Dict]:
    # Used by: convert
    dialog_20 = utils.get_last_n_turns(dialog, bot_last_turns=20)
    dialog = utils.get_last_n_turns(dialog)
    dialog = utils.remove_clarification_turns_from_dialog(dialog)
    dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent")
    return [{
        "utterances_histories":
        [[utt["text"] for utt in dialog_20["utterances"]]],
        "personality": [dialog["bot"]["persona"]],
        "num_ongoing_utt": [
            utils.count_ongoing_skill_utterances(dialog["bot_utterances"],
                                                 "convert_reddit")
        ],
        "human_attributes": [dialog["human"]["attributes"]],
    }]
示例#3
0
def alice_formatter_dialog(dialog: Dict) -> List:
    # Used by: alice
    dialog = utils.get_last_n_turns(dialog, bot_last_turns=4)
    dialog = utils.remove_clarification_turns_from_dialog(dialog)
    return utils.last_n_human_utt_dialog_formatter(dialog,
                                                   last_n_utts=2,
                                                   only_last_sentence=True)
示例#4
0
def entity_detection_formatter_dialog(dialog: Dict) -> List[Dict]:
    num_last_utterances = 2
    dialog = utils.get_last_n_turns(dialog, bot_last_turns=1)
    dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent")
    context = [[
        uttr["text"] for uttr in dialog["utterances"][-num_last_utterances:]
    ]]
    return [{"sentences": context}]
示例#5
0
def utt_sentseg_punct_dialog(dialog: Dict):
    """
    Used by: skill_with_attributes_formatter; punct_dialogs_formatter,
    dummy_skill_formatter, base_response_selector_formatter
    """
    dialog = utils.get_last_n_turns(dialog)
    dialog = utils.remove_clarification_turns_from_dialog(dialog)
    dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent")
    return [{"dialogs": [dialog]}]
示例#6
0
def dialog_breakdown_formatter(dialog: Dict) -> List[Dict]:
    # Used by: dialog_breakdown
    dialog = utils.get_last_n_turns(dialog, bot_last_turns=2)
    dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent")
    context = " ".join([uttr["text"] for uttr in dialog["utterances"][-4:-1]])
    return [{
        "context": [context],
        "curr_utterance": [dialog["human_utterances"][-1]["text"]]
    }]
示例#7
0
def cobot_formatter_dialog(dialog: Dict):
    # Used by: cobot_dialogact_formatter, cobot_classifiers_formatter
    dialog = utils.get_last_n_turns(dialog)
    dialog = utils.remove_clarification_turns_from_dialog(dialog)
    dialog = utils.replace_with_annotated_utterances(dialog, mode="segments")
    utterances_histories = []
    for utt in dialog["utterances"]:
        utterances_histories.append(utt["text"])
    return [{"utterances_histories": [utterances_histories]}]
示例#8
0
def topic_recommendation_formatter(dialog: Dict):
    dialog = utils.get_last_n_turns(dialog)
    dialog = utils.remove_clarification_turns_from_dialog(dialog)
    active_skills, topics = [], []
    for utt in dialog["utterances"]:
        active_skills.append(utt.get("active_skill", ""))
        topics += utt.get("annotations", {}).get("cobot_topics",
                                                 {}).get("text", [])
    active_skills = [skill for skill in active_skills if skill]
    return [{"active_skills": [active_skills], "cobot_topics": [topics]}]
示例#9
0
def hypotheses_list_for_dialog_breakdown(dialog: Dict) -> List[Dict]:
    # Used by: dialog_breakdown
    dialog = utils.get_last_n_turns(dialog, bot_last_turns=2)
    dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent")
    context = " ".join([uttr["text"] for uttr in dialog["utterances"][-3:]])
    hyps = {"context": [], "curr_utterance": []}
    for hyp in dialog["human_utterances"][-1]["hypotheses"]:
        hyps["context"].append(context)
        hyps["curr_utterance"].append(hyp["text"])
    return [hyps]
示例#10
0
def last_utt_and_history_dialog(dialog: Dict) -> List:
    # Used by: topicalchat retrieval skills
    dialog = utils.get_last_n_turns(dialog)
    dialog = utils.remove_clarification_turns_from_dialog(dialog)
    dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent")
    sent = dialog["human_utterances"][-1]["annotations"].get(
        "spelling_preprocessing", dialog["human_utterances"][-1]["text"])
    return [{
        "sentences": [sent],
        "utterances_histories": [[utt["text"] for utt in dialog["utterances"]]]
    }]
示例#11
0
def game_cooperative_skill_formatter(dialog: Dict):
    dialog = utils.get_last_n_turns(dialog)
    dialog = utils.remove_clarification_turns_from_dialog(dialog)
    dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent")
    dialog["human"]["attributes"] = {
        "game_cooperative_skill":
        dialog["human"]["attributes"].get("game_cooperative_skill", {}),
        "used_links":
        dialog["human"]["attributes"].get("used_links", {}),
    }
    return [{"dialogs": [dialog]}]
示例#12
0
def sent_rewrite_formatter_w_o_last_dialog(dialog: Dict) -> List[Dict]:
    dialog = utils.get_last_n_turns(dialog, utils.LAST_N_TURNS + 1)
    dialog = utils.remove_clarification_turns_from_dialog(dialog)
    dialog = utils.replace_with_annotated_utterances(dialog, mode="segments")
    utterances_histories = []
    annotation_histories = []
    for utt in dialog["utterances"][:-1]:
        annotation_histories.append(deepcopy(utt["annotations"]))
        utterances_histories.append(utt["text"])
    return [{
        "utterances_histories": [utterances_histories],
        "annotation_histories": [annotation_histories]
    }]
示例#13
0
def full_history_dialog(dialog: Dict):
    """
    Used ONLY by: response selector
    """
    all_prev_active_skills = [
        uttr.get("active_skill", "") for uttr in dialog["bot_utterances"]
    ]
    all_prev_active_skills = [
        skill_name for skill_name in all_prev_active_skills if skill_name
    ][-15:]
    dialog = utils.get_last_n_turns(dialog, bot_last_turns=10)
    return [{
        "dialogs": [dialog],
        "all_prev_active_skills": [all_prev_active_skills]
    }]
示例#14
0
def fact_random_formatter_dialog(dialog: Dict):
    # Used by: fact-random annotator
    dialog = utils.get_last_n_turns(dialog, bot_last_turns=1)
    dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent")
    last_human_utt = dialog["human_utterances"][-1]

    entity_info_list = last_human_utt["annotations"].get(
        "entity_linking", [{}])
    entity_substr_list = []

    for entity_info in entity_info_list:
        if "entity_pages" in entity_info and entity_info["entity_pages"]:
            entity_substr_list.append(entity_info["entity_substr"])

    return [{"text": last_human_utt["text"], "entities": entity_substr_list}]
示例#15
0
def convers_evaluator_annotator_formatter(dialog: Dict) -> List[Dict]:
    dialog = utils.get_last_n_turns(dialog)
    dialog = utils.remove_clarification_turns_from_dialog(dialog)
    conv = dict()
    hypotheses = dialog["human_utterances"][-1]["hypotheses"]
    conv["hypotheses"] = [h["text"] for h in hypotheses]
    conv["currentUtterance"] = dialog["utterances"][-1]["text"]
    # cobot recommends to take 2 last utt for conversation evaluation service
    conv["pastUtterances"] = [
        uttr["text"] for uttr in dialog["human_utterances"]
    ][-3:-1]
    conv["pastResponses"] = [
        uttr["text"] for uttr in dialog["bot_utterances"]
    ][-2:]
    return [conv]
示例#16
0
def utt_sentrewrite_modified_last_dialog(dialog: Dict):
    # Used by: book_skill_formatter; misheard_asr_formatter, cobot_qa_formatter
    all_prev_active_skills = [
        uttr.get("active_skill", "") for uttr in dialog["bot_utterances"]
    ]
    all_prev_active_skills = [
        skill_name for skill_name in all_prev_active_skills if skill_name
    ]
    dialog = utils.get_last_n_turns(dialog)
    dialog = utils.remove_clarification_turns_from_dialog(dialog)
    dialog = utils.replace_with_annotated_utterances(dialog,
                                                     mode="modified_sents")
    return [{
        "dialogs": [dialog],
        "all_prev_active_skills": [all_prev_active_skills]
    }]
示例#17
0
def hypothesis_histories_list(dialog: Dict):
    hypotheses = dialog["human_utterances"][-1]["hypotheses"]
    dialog = utils.get_last_n_turns(dialog)
    dialog = utils.remove_clarification_turns_from_dialog(dialog)
    dialog = utils.replace_with_annotated_utterances(dialog, mode="segments")
    utterances_histories_batch = []
    for hyp in hypotheses:
        utterances_histories = []
        for utt in dialog["utterances"]:
            utt_text = utt["text"]
            if isinstance(utt_text, list):
                utt_text = " ".join(utt_text)
            utterances_histories.append(utt_text)
        # hyp["text"] is a string. We need to pass here list of strings.
        utterances_histories.append(hyp["text"])
        utterances_histories_batch.append(utterances_histories)

    return [{"utterances_with_histories": utterances_histories_batch}]
示例#18
0
def eliza_formatter_dialog(dialog: Dict) -> List[Dict]:
    # Used by: eliza_formatter
    dialog = utils.get_last_n_turns(dialog)
    dialog = utils.remove_clarification_turns_from_dialog(dialog)
    history = []
    prev_human_utterance = None
    for utt in dialog["utterances"]:
        if utt["user"]["user_type"] == "human":
            prev_human_utterance = utt["annotations"].get(
                "spelling_preprocessing", utt["text"])
        elif utt["user"]["user_type"] == "bot" and utt[
                "active_skill"] == "eliza" and prev_human_utterance is not None:
            history.append(prev_human_utterance)
    last_utterance = dialog["human_utterances"][-1]["annotations"].get(
        "spelling_preprocessing", dialog["human_utterances"][-1]["text"])
    return [{
        "last_utterance_batch": [last_utterance],
        "human_utterance_history_batch": [history],
    }]
示例#19
0
def entity_storer_formatter(dialog: Dict) -> List[Dict]:
    human_utter_index = len(dialog["human_utterances"]) - 1
    attributes = {
        "entities":
        dialog.get("human", {}).get("attributes", {}).get("entities", {})
    }

    dialog = utils.get_last_n_turns(dialog,
                                    bot_last_turns=1,
                                    human_last_turns=2)
    dialog = utils.replace_with_annotated_utterances(dialog, mode="clean_sent")

    # rm all execpt human_utterances, bot_utterances
    # we need only: text, annotations, active_skill
    new_dialog = utils.clean_up_utterances_to_avoid_unwanted_keys(
        dialog, types_utterances=["human_utterances", "bot_utterances"])

    new_dialog["human"] = {"attributes": attributes}

    return [{
        "human_utter_indexes": [human_utter_index],
        "dialogs": [new_dialog]
    }]
示例#20
0
def el_formatter_dialog(dialog: Dict):
    # Used by: entity_linking annotator
    num_last_utterances = 2
    ner_output = get_entities(dialog["human_utterances"][-1],
                              only_named=True,
                              with_labels=True)
    nounphrases = get_entities(dialog["human_utterances"][-1],
                               only_named=False,
                               with_labels=False)
    entity_substr_list = []
    if ner_output:
        for entity in ner_output:
            if entity and isinstance(
                    entity, dict
            ) and "text" in entity and entity["text"].lower() != "alexa":
                entity_substr_list.append(entity["text"])
    entity_substr_lower_list = {
        entity_substr.lower()
        for entity_substr in entity_substr_list
    }
    dialog = utils.get_last_n_turns(dialog, bot_last_turns=1)
    dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent")
    context = [[
        uttr["text"] for uttr in dialog["utterances"][-num_last_utterances:]
    ]]
    if nounphrases:
        entity_substr_list += [
            nounphrase for nounphrase in nounphrases
            if nounphrase.lower() not in entity_substr_lower_list
        ]
    entity_substr_list = list(set(entity_substr_list))

    return [{
        "entity_substr": [entity_substr_list],
        "template": [""],
        "context": context
    }]
示例#21
0
def fact_retrieval_formatter_dialog(dialog: Dict):
    # Used by: odqa annotator
    dialog = utils.get_last_n_turns(dialog, bot_last_turns=1)
    dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent")
    dialog_history = [
        " ".join([uttr["text"] for uttr in dialog["utterances"][-3:]])
    ]

    last_human_utt = dialog["human_utterances"][-1]

    nounphrases = [
        last_human_utt["annotations"].get("cobot_entities",
                                          {}).get("entities", [])
    ]

    entity_info_list = last_human_utt["annotations"].get(
        "entity_linking", [{}])
    entity_pages_list = []
    entity_ids_list = []
    entity_substr_list = []
    entity_pages_titles_list = []
    for entity_info in entity_info_list:
        if "entity_pages" in entity_info and entity_info["entity_pages"]:
            entity_pages_list.append(entity_info["entity_pages"])
            entity_ids_list.append(entity_info["entity_ids"])
            entity_substr_list.append(entity_info["entity_substr"])
            entity_pages_titles_list.append(entity_info["entity_pages_titles"])
    return [{
        "human_sentences": [last_human_utt["text"]],
        "dialog_history": dialog_history,
        "nounphrases": nounphrases,
        "entity_substr": [entity_substr_list],
        "entity_pages": [entity_pages_list],
        "entity_ids": [entity_ids_list],
        "entity_pages_titles": [entity_pages_titles_list],
    }]
示例#22
0
def base_skill_selector_formatter_dialog(dialog: Dict) -> List[Dict]:
    # Used by: base_skill_selector_formatter
    dialog = utils.get_last_n_turns(dialog, bot_last_turns=5)
    dialog = utils.remove_clarification_turns_from_dialog(dialog)
    dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent")
    return [{"states_batch": [dialog]}]
示例#23
0
def utt_sentrewrite_modified_last_dialog_emotion_skill(dialog: Dict):
    dialog = utils.get_last_n_turns(dialog, bot_last_turns=2)
    dialog = utils.remove_clarification_turns_from_dialog(dialog)
    dialog = utils.replace_with_annotated_utterances(dialog,
                                                     mode="modified_sents")
    return [{"dialogs": [dialog]}]