Пример #1
0
 def extract_variables(self, word):
     """
     Извлекает значения переменных, возможные для данного слова
     """
     # TO DO: НАУЧИТЬСЯ ИЗВЛЕКАТЬ ТОЛЬКО ОПТИМАЛЬНЫЕ ЗНАЧЕНИЯ
     if not self._regexp.match(word):
         return []
     # в слове нет переменных
     if len(self.variable_indexes) == 0:
         return [[]]
     variable_positions = self._find_variable_start_positions(word)
     if variable_positions is None:
         return []
     # извлекаем возрастающие последовательности индексов
     variable_position_seqs = utility.extract_ordered_sequences(
         variable_positions + [[len(word)]],
         self._differences,
         strict_min=False)
     answer = []
     for seq in variable_position_seqs:
         answer.append([
             word[seq[i]:(seq[i + 1] - len(part))]
             for i, part in enumerate(self.const_fragments[1:])
         ])
     return answer
Пример #2
0
 def _extract_indexes_from_path(self, path):
     """
     Возвращает слова, принимаемые на данном пути,
     вместе с соответствующими индексами
     """
     if len(path) <= 1:
         return [("", [])]
     if not hasattr(self, 'edge_labels_'):
         self._make_edge_labels()
     state_pairs = [(elem, path[i + 1]) for i, elem in enumerate(path[:-1])]
     # edge_labels_on_path = [[('п', ((0,), (0,), (0,)))], [('е', ((1,), (1,), (1,)))],
     #                        [('с', ((2,), (2,), (2,)))], [('к', ((4,), (3,), (3,)))]]
     edge_labels_on_path = [
         self.edge_labels_[state_pair] for state_pair in state_pairs
     ]
     words_with_indexes = [
         zip(*elem) for elem in product(*edge_labels_on_path)
     ]
     # words_with_indexes = [('песк', [((0,), (0,), (0,)), ((1,), (1,), (1,)),
     #                                 ((2,), (2,), (2,)), ((4,), (3,), (3,))])]
     words_with_indexes = [("".join(first), list(second))
                           for first, second in words_with_indexes]
     answer = [None] * len(words_with_indexes)
     for i, (word, data) in enumerate(words_with_indexes):
         lists = [elem for elem in zip(*data)]
         word_indexes = [extract_ordered_sequences(elem) for elem in lists]
         # word_indexes = [list(map(tuple, product(*elem))) for elem in lists]
         # word_indexes = [[(0, 1, 2, 4)], [(0, 1, 2, 3)], [(0, 1, 2, 3)]]
         answer[i] = (word, word_indexes)
     return answer
Пример #3
0
 def _extract_indexes_from_path(self, path):
     """
     Возвращает слова, принимаемые на данном пути,
     вместе с соответствующими индексами
     """
     if len(path) <= 1:
         return [("", [])]
     if not hasattr(self, 'edge_labels_'):
         self._make_edge_labels()
     state_pairs = [(elem, path[i + 1]) for i, elem in enumerate(path[:-1])]
     # edge_labels_on_path = [[('п', ((0,), (0,), (0,)))], [('е', ((1,), (1,), (1,)))],
     #                        [('с', ((2,), (2,), (2,)))], [('к', ((4,), (3,), (3,)))]]
     edge_labels_on_path = [self.edge_labels_[state_pair] for state_pair in state_pairs]
     words_with_indexes = [zip(*elem) for elem in product(*edge_labels_on_path)]
     # words_with_indexes = [('песк', [((0,), (0,), (0,)), ((1,), (1,), (1,)),
     #                                 ((2,), (2,), (2,)), ((4,), (3,), (3,))])]
     words_with_indexes = [("".join(first), list(second))
                           for first, second in words_with_indexes]
     answer = [None] * len(words_with_indexes)
     for i, (word, data) in enumerate(words_with_indexes):
         lists = [elem for elem in zip(*data)]
         word_indexes = [extract_ordered_sequences(elem) for elem in lists]
         # word_indexes = [list(map(tuple, product(*elem))) for elem in lists]
         # word_indexes = [[(0, 1, 2, 4)], [(0, 1, 2, 3)], [(0, 1, 2, 3)]]
         answer[i] = (word, word_indexes)
     return answer
Пример #4
0
 def extract_variables(self, word):
     """
     Извлекает значения переменных, возможные для данного слова
     """
     # TO DO: НАУЧИТЬСЯ ИЗВЛЕКАТЬ ТОЛЬКО ОПТИМАЛЬНЫЕ ЗНАЧЕНИЯ
     if not self._regexp.match(word):
         return []
     # в слове нет переменных
     if len(self.variable_indexes) == 0:
         return [[]]
     variable_positions = self._find_variable_start_positions(word)
     if variable_positions is None:
         return []
     # извлекаем возрастающие последовательности индексов
     variable_position_seqs = utility.extract_ordered_sequences(
         variable_positions + [[len(word)]], self._differences, strict_min=False
     )
     answer = []
     for seq in variable_position_seqs:
         answer.append([word[seq[i] : (seq[i + 1] - len(part))] for i, part in enumerate(self.const_fragments[1:])])
     return answer