def extract_variables(self, word): """ Извлекает значения переменных, возможные для данного слова """ # TO DO: НАУЧИТЬСЯ ИЗВЛЕКАТЬ ТОЛЬКО ОПТИМАЛЬНЫЕ ЗНАЧЕНИЯ if not self._regexp.match(word): return [] # в слове нет переменных if len(self.variable_indexes) == 0: return [[]] variable_positions = self._find_variable_start_positions(word) if variable_positions is None: return [] # извлекаем возрастающие последовательности индексов variable_position_seqs = utility.extract_ordered_sequences( variable_positions + [[len(word)]], self._differences, strict_min=False) answer = [] for seq in variable_position_seqs: answer.append([ word[seq[i]:(seq[i + 1] - len(part))] for i, part in enumerate(self.const_fragments[1:]) ]) return answer
def _extract_indexes_from_path(self, path): """ Возвращает слова, принимаемые на данном пути, вместе с соответствующими индексами """ if len(path) <= 1: return [("", [])] if not hasattr(self, 'edge_labels_'): self._make_edge_labels() state_pairs = [(elem, path[i + 1]) for i, elem in enumerate(path[:-1])] # edge_labels_on_path = [[('п', ((0,), (0,), (0,)))], [('е', ((1,), (1,), (1,)))], # [('с', ((2,), (2,), (2,)))], [('к', ((4,), (3,), (3,)))]] edge_labels_on_path = [ self.edge_labels_[state_pair] for state_pair in state_pairs ] words_with_indexes = [ zip(*elem) for elem in product(*edge_labels_on_path) ] # words_with_indexes = [('песк', [((0,), (0,), (0,)), ((1,), (1,), (1,)), # ((2,), (2,), (2,)), ((4,), (3,), (3,))])] words_with_indexes = [("".join(first), list(second)) for first, second in words_with_indexes] answer = [None] * len(words_with_indexes) for i, (word, data) in enumerate(words_with_indexes): lists = [elem for elem in zip(*data)] word_indexes = [extract_ordered_sequences(elem) for elem in lists] # word_indexes = [list(map(tuple, product(*elem))) for elem in lists] # word_indexes = [[(0, 1, 2, 4)], [(0, 1, 2, 3)], [(0, 1, 2, 3)]] answer[i] = (word, word_indexes) return answer
def _extract_indexes_from_path(self, path): """ Возвращает слова, принимаемые на данном пути, вместе с соответствующими индексами """ if len(path) <= 1: return [("", [])] if not hasattr(self, 'edge_labels_'): self._make_edge_labels() state_pairs = [(elem, path[i + 1]) for i, elem in enumerate(path[:-1])] # edge_labels_on_path = [[('п', ((0,), (0,), (0,)))], [('е', ((1,), (1,), (1,)))], # [('с', ((2,), (2,), (2,)))], [('к', ((4,), (3,), (3,)))]] edge_labels_on_path = [self.edge_labels_[state_pair] for state_pair in state_pairs] words_with_indexes = [zip(*elem) for elem in product(*edge_labels_on_path)] # words_with_indexes = [('песк', [((0,), (0,), (0,)), ((1,), (1,), (1,)), # ((2,), (2,), (2,)), ((4,), (3,), (3,))])] words_with_indexes = [("".join(first), list(second)) for first, second in words_with_indexes] answer = [None] * len(words_with_indexes) for i, (word, data) in enumerate(words_with_indexes): lists = [elem for elem in zip(*data)] word_indexes = [extract_ordered_sequences(elem) for elem in lists] # word_indexes = [list(map(tuple, product(*elem))) for elem in lists] # word_indexes = [[(0, 1, 2, 4)], [(0, 1, 2, 3)], [(0, 1, 2, 3)]] answer[i] = (word, word_indexes) return answer
def extract_variables(self, word): """ Извлекает значения переменных, возможные для данного слова """ # TO DO: НАУЧИТЬСЯ ИЗВЛЕКАТЬ ТОЛЬКО ОПТИМАЛЬНЫЕ ЗНАЧЕНИЯ if not self._regexp.match(word): return [] # в слове нет переменных if len(self.variable_indexes) == 0: return [[]] variable_positions = self._find_variable_start_positions(word) if variable_positions is None: return [] # извлекаем возрастающие последовательности индексов variable_position_seqs = utility.extract_ordered_sequences( variable_positions + [[len(word)]], self._differences, strict_min=False ) answer = [] for seq in variable_position_seqs: answer.append([word[seq[i] : (seq[i + 1] - len(part))] for i, part in enumerate(self.const_fragments[1:])]) return answer