Пример #1
0
 def add_to_number_linking_scores(self,
                                  all_numbers: Set[str],
                                  number_linking_scores: Dict[str, Tuple[str, str, List[int]]],
                                  get_number_linking_dict: Callable[[str, List[Token]],
                                                                    Dict[str, List[int]]],
                                  current_tokenized_utterance: List[Token],
                                  nonterminal: str) -> None:
     """
     This is a helper method for adding different types of numbers (eg. starting time ranges) as entities.
     We first go through all utterances in the interaction and find the numbers of a certain type and add
     them to the set ``all_numbers``, which is initialized with default values. We want to add all numbers
     that occur in the interaction, and not just the current turn because the query could contain numbers
     that were triggered before the current turn. For each entity, we then check if it is triggered by tokens
     in the current utterance and construct the linking score.
     """
     number_linking_dict: Dict[str, List[int]] = {}
     for utterance, tokenized_utterance in zip(self.utterances, self.tokenized_utterances):
         number_linking_dict = get_number_linking_dict(utterance, tokenized_utterance)
         all_numbers.update(number_linking_dict.keys())
     all_numbers_list: List[str] = sorted(all_numbers, reverse=True)
     for number in all_numbers_list:
         entity_linking = [0 for token in current_tokenized_utterance]
         # ``number_linking_dict`` is for the last utterance here. If the number was triggered
         # before the last utterance, then it will have linking scores of 0's.
         for token_index in number_linking_dict.get(number, []):
             if token_index < len(entity_linking):
                 entity_linking[token_index] = 1
         action = format_action(nonterminal, number, is_number=True)
         number_linking_scores[action] = (nonterminal, number, entity_linking)
Пример #2
0
    def init_all_valid_actions(self) -> Dict[str, List[str]]:
        """
        We initialize the world's valid actions with that of the context. This means that the strings
        and numbers that were valid earlier in the interaction are also valid. We then add new valid strings
        and numbers from the current utterance.
        """
        valid_actions = deepcopy(self.sql_table_context.valid_actions)
        for string in self.get_strings_from_utterance():
            action = format_action('string', string)
            if action not in valid_actions['string']:
                valid_actions['string'].append(action)

        numbers = ['0', '1']
        for utterance in self.utterances:
            numbers.extend(get_numbers_from_utterance(utterance))
            for number in numbers:
                action = format_action('number', number)
                if action not in valid_actions['number']:
                    valid_actions['number'].append(action)

        return valid_actions
Пример #3
0
    def init_all_valid_actions(
            self) -> Tuple[Dict[str, List[str]], numpy.ndarray]:
        """
        We initialize the valid actions with the global actions. We then iterate through the
        utterances up to and including the current utterance and add the valid strings.
        """

        valid_actions = deepcopy(self.sql_table_context.valid_actions)
        linking_scores = []

        current_tokenized_utterance = [] if not self.tokenized_utterances \
                else self.tokenized_utterances[-1]

        numbers = {'0', '1'}
        number_linking_dict: Dict[str, List[int]] = {}

        for utterance, tokenized_utterance in zip(self.utterances,
                                                  self.tokenized_utterances):
            number_linking_dict = get_numbers_from_utterance(
                utterance, tokenized_utterance)
            numbers.update(number_linking_dict.keys())
        numbers_list: List[str] = sorted(numbers, reverse=True)

        # We construct the linking scores for numbers from the ``number_linking_dict`` here.
        number_linking_scores = []
        for number in numbers_list:
            entity_linking = [0 for token in current_tokenized_utterance]
            # number_linking_scores has the numbers and linking scores from the last utterance.
            # If the number is not in the last utterance, then the linking scores will be all 0.
            for token_index in number_linking_dict.get(number, []):
                entity_linking[token_index] = 1
            number_linking_scores.append(entity_linking)
        linking_scores.extend(number_linking_scores)

        for number in list(numbers_list):
            action = format_action('number', number)
            valid_actions['number'].append(action)

        return valid_actions, numpy.array(linking_scores)
Пример #4
0
    def init_all_valid_actions(self)                                              :
        u"""
        We initialize the valid actions with the global actions. We then iterate through the
        utterances up to and including the current utterance and add the valid strings.
        """

        valid_actions = deepcopy(self.sql_table_context.valid_actions)
        linking_scores = []
        current_tokenized_utterance = [] if not self.tokenized_utterances\
                else self.tokenized_utterances[-1]

        strings           = set()
        for tokenized_utterance in self.tokenized_utterances:
            string_linking_dict = get_strings_from_utterance(tokenized_utterance)
            strings.update(list(string_linking_dict.keys()))

        # We want to sort things in reverse here to be consistent with the grammar.
        # The parser is greedy which means that if we have a rule that has
        # multiple options for the right hand side, the first one that succeeds is
        # the one that is used. For example, if ``1400`` appears in the query, and
        # both ``1400`` and ``1`` are valid numbers, then we want to try to match
        # ``1400`` first. Otherwise, ``1`` will succeed but nothing will match ``400``.
        # The same applies for strings here.
        strings_list            = sorted(strings, reverse=True)

        # We construct the linking scores for strings from the ``string_linking_dict`` here.
        string_linking_scores = []
        for string in strings_list:
            entity_linking = [0 for token in current_tokenized_utterance]
            # string_linking_dict has the strings and linking scores from the last utterance.
            # If the string is not in the last utterance, then the linking scores will be all 0.
            for token_index in string_linking_dict.get(string, []):
                entity_linking[token_index] = 1
            string_linking_scores.append(entity_linking)
        linking_scores.extend(string_linking_scores)

        for string in strings_list:
            action = format_action(u'string', string)
            if action not in valid_actions[u'string']:
                valid_actions[u'string'].append(action)

        numbers = set([u'0', u'1'])
        number_linking_dict                       = {}

        for utterance, tokenized_utterance in izip(self.utterances, self.tokenized_utterances):
            number_linking_dict = get_numbers_from_utterance(utterance, tokenized_utterance)
            numbers.update(list(number_linking_dict.keys()))
        numbers_list            = sorted(numbers, reverse=True)

        # We construct the linking scores for numbers from the ``number_linking_dict`` here.
        number_linking_scores = []
        for number in numbers_list:
            entity_linking = [0 for token in current_tokenized_utterance]
            # number_linking_scores has the numbers and linking scores from the last utterance.
            # If the number is not in the last utterance, then the linking scores will be all 0.
            for token_index in number_linking_dict.get(number, []):
                entity_linking[token_index] = 1
            number_linking_scores.append(entity_linking)
        linking_scores.extend(number_linking_scores)

        for number in list(numbers_list):
            action = format_action(u'number', number)
            valid_actions[u'number'].append(action)
        return valid_actions, numpy.array(linking_scores)
Пример #5
0
    def _get_linked_entities(self) -> Dict[str, Dict[str, Tuple[str, str, List[int]]]]:
        """
        This method gets entities from the current utterance finds which tokens they are linked to.
        The entities are divided into two main groups, ``numbers`` and ``strings``. We rely on these
        entities later for updating the valid actions and the grammar.
        """
        current_tokenized_utterance = [] if not self.tokenized_utterances \
                else self.tokenized_utterances[-1]

        # We generate a dictionary where the key is the type eg. ``number`` or ``string``.
        # The value is another dictionary where the key is the action and the value is a tuple
        # of the nonterminal, the string value and the linking score.
        entity_linking_scores: Dict[str, Dict[str, Tuple[str, str, List[int]]]] = {}

        number_linking_scores: Dict[str, Tuple[str, str, List[int]]] = {}
        string_linking_scores: Dict[str, Tuple[str, str, List[int]]] = {}

        # Get time range start
        self.add_to_number_linking_scores({'0'},
                                          number_linking_scores,
                                          get_time_range_start_from_utterance,
                                          current_tokenized_utterance,
                                          'time_range_start')

        self.add_to_number_linking_scores({"1200"},
                                          number_linking_scores,
                                          get_time_range_end_from_utterance,
                                          current_tokenized_utterance,
                                          'time_range_end')

        self.add_to_number_linking_scores({'0', '1'},
                                          number_linking_scores,
                                          get_numbers_from_utterance,
                                          current_tokenized_utterance,
                                          'number')

        # Add string linking dict.
        string_linking_dict: Dict[str, List[int]] = {}
        for tokenized_utterance in self.tokenized_utterances:
            string_linking_dict = get_strings_from_utterance(tokenized_utterance)
        strings_list = []

        if self.tables_with_strings:
            for table, columns in self.tables_with_strings.items():
                for column in columns:
                    self.cursor.execute(f'SELECT DISTINCT {table} . {column} FROM {table}')
                    strings_list.extend([(format_action(f"{table}_{column}_string", str(row[0]),
                                                        is_string=not 'number' in column,
                                                        is_number='number' in column),
                                          str(row[0]))
                                         for row in self.cursor.fetchall()])

        # We construct the linking scores for strings from the ``string_linking_dict`` here.
        for string in strings_list:
            entity_linking = [0 for token in current_tokenized_utterance]
            # string_linking_dict has the strings and linking scores from the last utterance.
            # If the string is not in the last utterance, then the linking scores will be all 0.
            for token_index in string_linking_dict.get(string[1], []):
                entity_linking[token_index] = 1
            action = string[0]
            string_linking_scores[action] = (action.split(' -> ')[0], string[1], entity_linking)

        entity_linking_scores['number'] = number_linking_scores
        entity_linking_scores['string'] = string_linking_scores
        return entity_linking_scores