Пример #1
0
Файл: doc.py Проект: Sheng-J/plp
 def transform_tokens_gen(token_iter):
     left, right = [], []
     try:
         center = next(token_iter)
     except StopIteration:
         return
     while center is not None:
         new_tokens = None
         if len(left) == max_num_left and len(right) == max_num_right:
             for transformer in token_transformers:
                 new_tokens = transformer[left, center, right]
                 if new_tokens is not None:
                     break
         else:
             for transformer in token_transformers:
                 if transformer.is_applicable(len(left), len(right)):
                     new_tokens = transformer[left, center, right]
                     if new_tokens is not None:
                         break
             if new_tokens is None:
                 if len(right) < max_num_right:
                     try:
                         right.append(next(token_iter))
                         continue
                     except StopIteration:
                         pass
         if new_tokens is None:
             yield center
         else:
             for new_token in new_tokens:
                 yield new_token
         # pdb.set_trace()
         center = ptoken.shift_context_center_tokens(
             (left, center, right), token_iter, max_num_left)
Пример #2
0
Файл: doc.py Проект: Sheng-J/plp
 def get_iters_with_annotations(self, annotation_transformers):
     max_num_left, max_num_right = ptoken.get_transformers_max_num_tokens(
         annotation_transformers)
     left, right = [], []
     token_iter = iter(self)
     try:
         center = next(token_iter)
     except StopIteration:
         return
     while center is not None:
         annotation = None
         if len(left) == max_num_left and len(right) == max_num_right:
             for transformer in annotation_transformers:
                 annotation = transformer[left, center, right]
                 if annotation is not None:
                     break
         else:
             for transformer in annotation_transformers:
                 if transformer.is_applicable(len(left), len(right)):
                     annotation = transformer[left, center, right]
                     if annotation is not None:
                         break
             if annotation is None:
                 if len(right) < max_num_right:
                     try:
                         right.append(next(token_iter))
                         continue
                     except StopIteration:
                         pass
         yield center, annotation
         center = ptoken.shift_context_center_tokens(
             (left, center, right), token_iter, max_num_left)
Пример #3
0
Файл: doc.py Проект: Sheng-J/plp
 def skip_tokens_gen(token_iter):
     left, right = [], []
     try:
         center = next(token_iter)
     except StopIteration:
         return
     while center is not None:
         skip_flag = False
         if len(left) == max_num_left and len(right) == max_num_right:
             for transformer in bool_token_transformers:
                 skip_flag = transformer[left, center, right]
                 if skip_flag:
                     break
             if not skip_flag:
                 yield center
         else:
             for transformer in bool_token_transformers:
                 if transformer.is_applicable(len(left), len(right)):
                     skip_flag = transformer[left, center, right]
                     if skip_flag:
                         break
             if not skip_flag:
                 if len(right) < max_num_right:
                     try:
                         right.append(next(token_iter))
                         continue
                     except StopIteration:
                         pass
                 yield center
         center = ptoken.shift_context_center_tokens(
             (left, center, right), token_iter, max_num_left)
Пример #4
0
 def transform_flags_gen(seq_flag_iter):
     left_seq_list, right_seq_list = [], []
     left_flag_list, right_flag_list = [], []
     try:
         seq, center = next(seq_flag_iter)
     except StopIteration:
         return
     while center is not None:
         new_center = None
         if len(left_flag_list) == max_num_left and len(right_flag_list) == max_num_right:
             for transformer in flag_token_transformers:
                 new_center = transformer[left_flag_list, center, right_flag_list,]
                 # if one successfully transformed the flag, stop checking the rest
                 if new_center is not None:
                     break
         else:
             for transformer in flag_token_transformers:
                 if transformer.is_applicable(len(left_flag_list), len(right_flag_list)):
                     new_center = transformer[left_flag_list, center, right_flag_list]
                     # if one successfully transformed the flag, stop checking the rest
                     if new_center is not None:
                         break
             # If none of the transformer was applicable or transforming the token
             # Expand right flag list until max
             if new_center is None:
                 if len(right_flag_list) < max_num_right:
                     try:
                         right_seq, right_flag = next(seq_flag_iter)
                         right_seq_list.append(right_seq)
                         right_flag_list.append(right_flag)
                         continue
                     except StopIteration:
                         pass
         if new_center is None:
             yield seq, center
         else:
             yield seq, new_center
         seq, center = ptoken.shift_context_center_tokens(
             (
                 [left_seq_list, left_flag_list],
                 [seq, center],
                 [right_seq_list, right_flag_list]
             ),
             seq_flag_iter, max_num_left
         )
Пример #5
0
    def _word2vec_gen(self, token_iter, unk_token):
        try:
            center_word = next(token_iter)
        except StopIteration:
            return

        left_context_words = []
        right_context_words = []
        for _ in range(self._window_size):
            try:
                right_context_words.append(next(token_iter))
            except StopIteration:
                break
        while center_word is not None:
            context_words = left_context_words + right_context_words
            if center_word != unk_token:
                for context_word in context_words:
                    if context_word != unk_token:
                        yield center_word, context_word
            center_word = ptoken.shift_context_center_tokens(
                (left_context_words, center_word, right_context_words),
                token_iter,
                self._window_size
            )