Exemplo n.º 1
0
    def parse_action(self, action_list):
        actions = self._parse_action_aggregated(action_list)

        # Substitute booking domain
        for action in actions:
            if action.domain == "booking":
                if self._domain_substitute:
                    action.domain = self._domain_substitute
                    if action.name not in ["advise", "reply"]:
                        if action.tags:
                            action.tags.append("booking")
                        else:
                            action.tags = ["booking"]
                    else:
                        # Let `reply_booking_DOMAIN` be just `reply_DOMAIN`
                        action.tags = None
                    action.store_in_domain_info()
                else:
                    log_problem({"type": "unclear_booking_domain"})
                    return []

        # Sort actions
        actions.sort(key=lambda a: a.sort_key)

        # Post-process
        actions2 = []
        for action in actions:

            # Remove repetitions of actions
            if actions2 == [] or action.to_string() != actions2[-1].to_string(
            ):

                # Insert status slots when appropriate
                if self.add_status_slots:
                    if (action.name == "inform" and action.tags and "booking" in action.tags) or \
                            (action.name in ["offerbook"]):
                        slot = DialogSlot(
                            {f"{action.domain}_status": "unique"})
                        slot.store_in_domain_info()
                        actions2.append(slot)
                        actions2.append(action)
                    elif action.name in ["nobook", "nooffer"]:
                        slot = DialogSlot({f"{action.domain}_status": "NA"})
                        slot.store_in_domain_info()
                        actions2.append(slot)
                        actions2.append(action)
                    elif action.name in ["book", "offerbooked"]:
                        actions2.append(action)
                        slot = DialogSlot(
                            {f"{action.domain}_status": "booked"})
                        slot.store_in_domain_info()
                        actions2.append(slot)
                    else:
                        actions2.append(action)
                else:
                    actions2.append(action)
        actions = actions2

        return actions
Exemplo n.º 2
0
    def _parse_action_aggregated(self, action_list):
        action_infos = []
        if type(action_list) is not dict:
            if type(action_list) is str and action_list == "No Annotation":
                log_problem({"type": "no_annotation"})
            else:
                log_problem({
                    "type": "bad_action_list_type",
                    "action_list": action_list
                })
            return []
        for base_action, slots in action_list.items():
            # Parse key (basic action and domain)
            if "-" in base_action:
                domain, action_name = base_action.split("-")
            else:
                # Single-letter actions like "A" or "N" are actually never requested
                log_problem({
                    "type": "bad_action_format",
                    "base_action": base_action
                })
                return []

            action_name = action_name.lower()

            # Parse value (slots)
            for slot in slots:
                if slot == ["none", "none"]:
                    spec = ""
                else:
                    slot_name = slot[0].lower()
                    spec = slot_name.strip()

                action_info = {
                    "activity": action_name,
                    "spec": spec,
                    "domain": domain.lower()
                }

                action_infos += [action_info]

        # Combine all `action_infos` entries to strings of aggregated actions
        # for each domain and activity
        action_groups = {}
        for info in action_infos:
            head = info["domain"] + "_" + info["activity"]
            if head not in action_groups:
                action_groups[head] = set()
            action_groups[head].update([info["spec"]])

        actions = []
        for head, specs in action_groups.items():
            domain, activity = head.split("_")
            string = "   - " + activity
            string += "_" + domain
            action = DialogTurn.from_string(string)
            action.store_in_domain_info()
            actions.append(action)

        return actions
Exemplo n.º 3
0
 def find_differences(
     self, baseline, dataset, results, lookups, branch=""
 ):
     """Searches `baseline` and `dataset` synchronously with a depth-first-search and
     appends the location of any difference to `results`."""
     assert type(baseline) is type(dataset)
     if type(baseline) is list:
         if len(baseline) <= len(dataset):
             for i in range(len(baseline)):
                 self.find_differences(
                     baseline[i],
                     dataset[i],
                     results,
                     lookups,
                     branch
                 )
         if len(baseline) < len(dataset):
             # The Wizard looked up something (a booking was made)
             lookups += self.parse_lookup(branch, baseline, dataset)
         if len(baseline) > len(dataset):
             log_problem({
                     "type": "long_baseline",
                     "branch": branch,
                     "baseline": baseline,
                     "dataset": dataset
                 })
     elif type(baseline) is dict:
         for key in baseline:
             self.find_differences(
                 baseline[key],
                 dataset[key],
                 results,
                 lookups,
                 branch + "_" + str(key)
             )
     elif type(baseline) is str:
         if baseline != dataset:
             results.append(branch + "_" + dataset)
     else:
         raise TypeError("Dataset contains objects that is not dict/list/str.")
Exemplo n.º 4
0
    def parse_story(self,
                    name,
                    verbose=0,
                    infuse_chitchat_callback=None,
                    chitchat_variability=1):
        """
        Parse a MultiWOZ story.
        :param name: Name of the story (e.g. MUL0129.json)
        :param verbose: Level of output (0 = no print, 1 = print parsed story, 2 = also print utterances)
        :param infuse_chitchat_callback: Function that takes the current story name, present and maximum number of
        turns and returns the number of chitchats that should be infused at this point.
        :param chitchat_variability: How many different chitchat intents/actions should be created
        :return:
        """
        initial_num_problems = len(multiwoz.domain_info.problems)
        dialog = self.data[name]
        log = dialog["log"]
        num_turns = len(log)

        story = ""
        parse_intent = IntentParser(self.slot_parser,
                                    add_status_slots=self.add_status_slots)
        name = name[:-5]

        story += f"## story_{name}" + "\n"
        if verbose > 0:
            print(colored(f"## story_{name}", "green"))

        # If we infuse chitchat, then add the chitchat action to the domain file
        # The intent is added in the parse_stories.py script
        if infuse_chitchat_callback is not None:
            if chitchat_variability > 1:
                for v in range(chitchat_variability):
                    DialogAction(f"chitchat_{v + 1}",
                                 "general").store_in_domain_info()
            else:
                DialogAction("chitchat", "general").store_in_domain_info()

        count_use = 0  # How often the user spoke
        count_wiz = 0  # How often the wizard replied (consecutive actions count as one)
        self._domain_substitute = None
        for step in log:
            if len(step["metadata"]) == 0:  # User-texts don't have metadata
                # Possibly infuse a chitchat detour
                if infuse_chitchat_callback is not None:
                    # Determine how many chitchats should be added
                    req_num_chitchat = infuse_chitchat_callback(
                        name, count_use + count_wiz + 1, num_turns)
                    # Create the intent/action pairs
                    turns = []
                    for _ in range(req_num_chitchat):
                        # Determine the chitchat type
                        if chitchat_variability > 1:
                            cc_name = f"chitchat_{random.randint(1, chitchat_variability)}"
                        else:
                            cc_name = "chitchat"
                        turns.append(DialogIntent(cc_name))
                        turns.append(DialogAction(cc_name, "general"))
                    # Add the turns to the story
                    for turn in turns:
                        story += turn.to_string() + "\n"

                # User's text
                if verbose > 1:
                    print("U:  " + step["text"])

                count_use += 1
            else:
                turns_from_wizard = []
                count_wiz += 1

                # Infer user intent from new information
                # This includes the user intent + possible additional slots that come from the wizard
                # looking up information during booking
                turns_from_user, domain_substitute = parse_intent(
                    step["metadata"])

                if domain_substitute:
                    self._domain_substitute = domain_substitute

                # If this is the end of the dialog, then we assume the user's `inform{}` (no slots) is actually a `bye`
                if count_wiz * 2 == len(log) and len(
                        turns_from_user) == 1 and not turns_from_user[0].slots:
                    turns_from_user[0].name = "bye"

                if verbose > 0:
                    for turn in turns_from_user:
                        print(colored(turn.to_string(), "blue"))

                # Wizard's text
                if verbose > 1:
                    print("W:  " + step["text"])

                # Wizard's information
                if str(count_wiz) in self.acts[name]:
                    action = self.acts[name][str(count_wiz)]
                    turns_from_wizard = self.parse_action(action)

                    if verbose > 0:
                        for turn in turns_from_wizard:
                            print(colored(turn.to_string(), "red"))
                else:
                    log_problem({
                        "type": "no_action",
                        "count_wiz": count_wiz,
                        "actions": self.acts[name]
                    })

                # Merge adjacent slots
                all_turns = []
                last_slot = None
                for turn in turns_from_user + turns_from_wizard:
                    if turn.is_slot or turn.is_intent:
                        if last_slot:
                            if last_slot.slots:
                                last_slot.slots.update(turn.slots)
                            else:
                                last_slot.slots = turn.slots
                        else:
                            last_slot = turn
                    else:
                        if last_slot:
                            all_turns.append(last_slot)
                            last_slot = None
                        all_turns.append(turn)
                if last_slot:
                    all_turns.append(last_slot)

                for turn in all_turns:
                    story += turn.to_string() + "\n"

        story += "\n"

        if len(multiwoz.domain_info.problems) > initial_num_problems:
            story = None

        return story
Exemplo n.º 5
0
    def parse_story_e2e(self, name, verbose=0):
        """
        Parse a MultiWOZ story for end-to-end training.
        :param name: Name of the story (e.g. MUL0129.json)
        :param verbose: Level of output (0 = no print, 1 = print parsed story, 2 = also print utterances)
        :return:
        """
        initial_num_problems = len(multiwoz.domain_info.problems)
        dialog = self.data[name]
        log = dialog["log"]
        num_turns = len(log)

        story = ""
        parse_intent = IntentParser(self.slot_parser,
                                    add_status_slots=self.add_status_slots)
        name = name[:-5]

        story += f"## story_{name}" + "\n"
        if verbose > 0:
            print(colored(f"## story_{name}", "green"))

        count_use = 0  # How often the user spoke
        count_wiz = 0  # How often the wizard replied (consecutive actions count as one)
        self._domain_substitute = None
        intent_name = None
        for step in log:
            if len(step["metadata"]) == 0:  # User-texts don't have metadata
                # User's text
                if verbose > 0:
                    print("U:  " + step["text"].strip())

                intent_name = step["text"]

                count_use += 1
            else:
                turns_from_wizard = []
                count_wiz += 1

                # Infer user intent from new information
                # This includes the user intent + possible additional slots that come from the wizard
                # looking up information during booking
                turns_from_user, domain_substitute = parse_intent(
                    step["metadata"])

                if domain_substitute:
                    self._domain_substitute = domain_substitute

                if verbose > 1:
                    for turn in turns_from_user:
                        print(colored(turn.to_string(), "blue"))

                # Wizard's information
                if str(count_wiz) in self.acts[name]:
                    action = self.acts[name][str(count_wiz)]
                    turns_from_wizard = self.parse_action(action)

                    if verbose > 1:
                        for turn in turns_from_wizard:
                            print(colored(turn.to_string(), "red"))
                else:
                    log_problem({
                        "type": "no_action",
                        "count_wiz": count_wiz,
                        "story": name,
                        "actions": self.acts[name]
                    })

                if self.add_status_slots:
                    # Merge all slots
                    all_slots = DialogSlot({})
                    for turn in turns_from_user + turns_from_wizard:
                        if not turn.is_action and turn.slots:
                            all_slots.slots.update(turn.slots)

                    status_slots = DialogSlot({
                        k: v
                        for k, v in all_slots.slots.items()
                        if k.endswith("status")
                    })
                else:
                    status_slots = None

                action_name = step["text"]

                # Substitute entities
                intent_name = self._substitute_entity(intent_name)
                action_name = self._substitute_entity(action_name)

                # Remove line breaks, `/`, and redundant whitespaces or tabs
                intent_name = re.sub(r"[/:\"'`#]+", lambda k: " ", intent_name)
                intent_name = intent_name.replace("\n", "").strip(" \t/")
                intent_name = re.sub(r"\s\s+", lambda k: " ", intent_name)
                action_name = re.sub(r"[/:\"'`#]+", lambda k: " ", action_name)
                action_name = action_name.replace("\n", "").strip(" \t/")
                action_name = re.sub(r"\s\s+", lambda k: " ", action_name)

                if verbose > 0:
                    print(f"U: {intent_name}")
                    print(f"W: {step['text'].strip()}")
                    print(f"W: {action_name}")

                # Store action for domain (if no errors occurred)
                if len(multiwoz.domain_info.problems) == initial_num_problems:
                    multiwoz.domain_info.e2e_actions.update({
                        action_name:
                        sorted([
                            a.to_string()[5:] for a in turns_from_wizard
                            if a.is_action
                        ])
                    })

                story += f"* {intent_name}\n"
                if self.add_status_slots:
                    if status_slots.slots:
                        story += status_slots.to_string() + "\n"
                story += f"   - {action_name}\n"

        story += "\n"

        if len(multiwoz.domain_info.problems) > initial_num_problems:
            story = None

        return story