Python Processor примеры использования

Язык программирования: Python
Пространство имен/Пакет: LTTL
Класс/Тип: Processor
Примеров на hotexamples.com: 1
Python Processor - 1 пример найден. Это лучшие примеры Python кода для LTTL.Processor, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.
Основные методы
Показать Скрыть
count_in_context(3)
cooc_in_context(2)
cooc_in_window(2)
annotate_contexts(1)
collocations(1)
context(1)
count_in_chain(1)
count_in_window(1)
length_in_context(1)
length_in_window(1)
neighbors(1)
variety_in_context(1)
variety_in_window(1)
Пример #1
Показать файл
Файл: test_cooc.py Проект: axanthos/LTTL
    def setUp(self):
        input_seg = Input("un texte")
        word_seg = Segmenter.tokenize(
            input_seg,
            [(re.compile(r'\w+'), 'tokenize')],
            import_annotations=False,
        )
        letter_seg = Segmenter.tokenize(
            input_seg,
            [
                (re.compile(r'\w'), 'tokenize', {'type': 'C'}),
                (re.compile(r'[aeiouy]'), 'tokenize', {'type': 'V'}),
            ],
            import_annotations=False,
            merge_duplicates=True,
        )
        vowel_seg, consonant_seg = Segmenter.select(
            letter_seg,
            re.compile(r'V'),
            annotation_key='type',
        )

        #  Create the cooccurrence matrix for cooccurrence in window
        #  with window_size=3 and without annotation (woa):
        self.window_woa_row_ids = ['u', 'n', 't', 'e', 'x']
        self.window_woa_col_ids = ['u', 'n', 't', 'e', 'x']
        self.window_woa_values = {
            ('u', 'u'): 1,
            ('u', 'n'): 1,
            ('u', 't'): 1,
            ('u', 'e'): 0,
            ('u', 'x'): 0,
            ('n', 'u'): 1,
            ('n', 'n'): 2,
            ('n', 't'): 2,
            ('n', 'e'): 1,
            ('n', 'x'): 0,
            ('t', 'u'): 1,
            ('t', 'n'): 2,
            ('t', 't'): 5,
            ('t', 'e'): 4,
            ('t', 'x'): 3,
            ('e', 'u'): 0,
            ('e', 'n'): 1,
            ('e', 't'): 4,
            ('e', 'e'): 4,
            ('e', 'x'): 3,
            ('x', 'u'): 0,
            ('x', 'n'): 0,
            ('x', 't'): 3,
            ('x', 'e'): 3,
            ('x', 'x'): 3,
        }
        self.window_woa_header_row_id = '__unit__'
        self.window_woa_header_row_type = 'string'
        self.window_woa_header_col_id = '__unit2__'
        self.window_woa_header_col_type = 'string'
        self.window_woa_col_type = {
            col_id: 'continuous' for col_id in self.window_woa_col_ids
            }
        self.window_woa_ref = IntPivotCrosstab(
            self.window_woa_row_ids,
            self.window_woa_col_ids,
            self.window_woa_values,
            self.window_woa_header_row_id,
            self.window_woa_header_row_type,
            self.window_woa_header_col_id,
            self.window_woa_header_col_type,
            self.window_woa_col_type,
        )
        #  Create the cooccurrence matrix for cooccurrence in window
        #  with window_size=3 and with annotation (wa):
        self.window_wa_row_ids = ['C', 'V']
        self.window_wa_col_ids = ['C', 'V']
        self.window_wa_values = {
            ('C', 'C'): 5,
            ('C', 'V'): 5,
            ('V', 'C'): 5,
            ('V', 'V'): 5,
        }
        self.window_wa_header_row_id = '__unit__'
        self.window_wa_header_row_type = 'string'
        self.window_wa_header_col_id = '__unit2__'
        self.window_wa_header_col_type = 'string'
        self.window_wa_col_type = {
            col_id: 'continuous' for col_id in self.window_wa_col_ids
            }
        self.window_wa_ref = IntPivotCrosstab(
            self.window_wa_row_ids,
            self.window_wa_col_ids,
            self.window_wa_values,
            self.window_wa_header_row_id,
            self.window_wa_header_row_type,
            self.window_wa_header_col_id,
            self.window_wa_header_col_type,
            self.window_wa_col_type,
        )
        # Create the cooccurrence matrix for cooccurrence in context
        # without the secondary unit (wos) and without annotation (woa):
        self.context_wos_woa_row_ids = ['u', 'n', 't', 'e', 'x']
        self.context_wos_woa_col_ids = ['u', 'n', 't', 'e', 'x']
        self.context_wos_woa_values = {
            ('u', 'u'): 1,
            ('u', 'n'): 1,
            ('u', 't'): 0,
            ('u', 'e'): 0,
            ('u', 'x'): 0,
            ('n', 'u'): 1,
            ('n', 'n'): 1,
            ('n', 't'): 0,
            ('n', 'e'): 0,
            ('n', 'x'): 0,
            ('t', 'u'): 0,
            ('t', 'n'): 0,
            ('t', 't'): 1,
            ('t', 'e'): 1,
            ('t', 'x'): 1,
            ('e', 'u'): 0,
            ('e', 'n'): 0,
            ('e', 't'): 1,
            ('e', 'e'): 1,
            ('e', 'x'): 1,
            ('x', 'u'): 0,
            ('x', 'n'): 0,
            ('x', 't'): 1,
            ('x', 'e'): 1,
            ('x', 'x'): 1,
        }
        self.context_wos_woa_header_row_id = '__unit__'
        self.context_wos_woa_header_row_type = 'string'
        self.context_wos_woa_header_col_id = '__unit2__'
        self.context_wos_woa_header_col_type = 'string'
        self.context_wos_woa_col_type = {
            col_id: 'continuous' for col_id in self.context_wos_woa_col_ids
            }
        self.context_wos_woa_ref = IntPivotCrosstab(
            self.context_wos_woa_row_ids,
            self.context_wos_woa_col_ids,
            self.context_wos_woa_values,
            self.context_wos_woa_header_row_id,
            self.context_wos_woa_header_row_type,
            self.context_wos_woa_header_col_id,
            self.context_wos_woa_header_col_type,
            self.context_wos_woa_col_type,
        )
        # Create the cooccurrence matrix for cooccurrence in context
        # without the secondary unit (wos) and with annotation (wa):
        self.context_wos_wa_row_ids = ['V', 'C']
        self.context_wos_wa_col_ids = ['V', 'C']
        self.context_wos_wa_values = {
            ('V', 'V'): 2,
            ('V', 'C'): 2,
            ('C', 'V'): 2,
            ('C', 'C'): 2,
        }
        self.context_wos_wa_header_row_id = '__unit__'
        self.context_wos_wa_header_row_type = 'string'
        self.context_wos_wa_header_col_id = '__unit2__'
        self.context_wos_wa_header_col_type = 'string'
        self.context_wos_wa_col_type = {
            col_id: 'continuous' for col_id in self.context_wos_wa_col_ids
            }
        self.context_wos_wa_ref = IntPivotCrosstab(
            self.context_wos_wa_row_ids,
            self.context_wos_wa_col_ids,
            self.context_wos_wa_values,
            self.context_wos_wa_header_row_id,
            self.context_wos_wa_header_row_type,
            self.context_wos_wa_header_col_id,
            self.context_wos_wa_header_col_type,
            self.context_wos_wa_col_type,
        )
        # Create the cooccurrence matrix for cooccurrence in context
        # with the secondary unit (ws) and without annotation (woa):
        self.context_ws_woa_col_ids = ['u', 'e']
        self.context_ws_woa_row_ids = ['n', 't', 'x']
        self.context_ws_woa_values = {
            ('n', 'u'): 1,
            ('n', 'e'): 0,
            ('t', 'u'): 0,
            ('t', 'e'): 1,
            ('x', 'u'): 0,
            ('x', 'e'): 1,
        }
        self.context_ws_woa_header_row_id = '__unit__'
        self.context_ws_woa_header_row_type = 'string'
        self.context_ws_woa_header_col_id = '__unit2__'
        self.context_ws_woa_header_col_type = 'string'
        self.context_ws_woa_col_type = {
            col_id: 'continuous' for col_id in self.context_ws_woa_col_ids
            }
        self.context_ws_woa_ref = IntPivotCrosstab(
            self.context_ws_woa_row_ids,
            self.context_ws_woa_col_ids,
            self.context_ws_woa_values,
            self.context_ws_woa_header_row_id,
            self.context_ws_woa_header_row_type,
            self.context_ws_woa_header_col_id,
            self.context_ws_woa_header_col_type,
            self.context_ws_woa_col_type,
        )
        # Create the cooccurrence matrix for cooccurrence in context
        # with the secondary unit (ws) and with annotation (wa):
        self.context_ws_wa_row_ids = ['C']
        self.context_ws_wa_col_ids = ['V']
        self.context_ws_wa_values = {
            ('C', 'V'): 2,
        }
        self.context_ws_wa_header_row_id = '__unit__'
        self.context_ws_wa_header_row_type = 'string'
        self.context_ws_wa_header_col_id = '__unit2__'
        self.context_ws_wa_header_col_type = 'string'
        self.context_ws_wa_col_type = {
            col_id: 'continuous' for col_id in self.context_ws_wa_col_ids
            }
        self.context_ws_wa_ref = IntPivotCrosstab(
            self.context_ws_wa_row_ids,
            self.context_ws_wa_col_ids,
            self.context_ws_wa_values,
            self.context_ws_wa_header_row_id,
            self.context_ws_wa_header_row_type,
            self.context_ws_wa_header_col_id,
            self.context_ws_wa_header_col_type,
            self.context_ws_wa_col_type,
        )
        self.output_cooc_in_window_woa = Processor.cooc_in_window(
            units={'segmentation': letter_seg},
            window_size=3,
        )
        self.output_cooc_in_window_wa = Processor.cooc_in_window(
            units={'segmentation': letter_seg, 'annotation_key': 'type'},
            window_size=3,
        )
        self.output_cooc_in_context_wos_woa = Processor.cooc_in_context(
            units={'segmentation': letter_seg},
            contexts={'segmentation': word_seg},
            units2=None,
        )
        self.output_cooc_in_context_wos_wa = Processor.cooc_in_context(
            units={'segmentation': letter_seg, 'annotation_key': 'type'},
            contexts={'segmentation': word_seg},
            units2=None,
        )
        self.output_cooc_in_context_ws_woa = Processor.cooc_in_context(
            units={'segmentation': vowel_seg},
            contexts={'segmentation': word_seg},
            units2={'segmentation': consonant_seg},
        )
        self.output_cooc_in_context_ws_wa = Processor.cooc_in_context(
            units={'segmentation': vowel_seg, 'annotation_key': 'type'},
            contexts={'segmentation': word_seg},
            units2={'segmentation': consonant_seg, 'annotation_key': 'type'},
        )