Python Tagger.annotate примеры использования

Язык программирования: Python

Пространство имен/Пакет: pandora.tagger

Класс/Тип: Tagger

Метод/Функция: annotate

Примеров на hotexamples.com: 4

Python Tagger.annotate - 4 примера найдено. Это лучшие примеры Python кода для pandora.tagger.Tagger.annotate, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Tagger(6)

annotate(3)

setup_from_disk(2)

epoch(1)

save(1)

setup_to_train(1)

Пример #1

Показать файл

def tag_dir(model, input_dir, output_dir, tokenized_input, string=None, **kwargs):
    """ Tag a directory of texts

    :param model: Path to a model file
    :param input_dir: Path to a directory containing text files
    :param output_dir: Path to output tagged text files
    """
    print('::: started :::')

    tagger = Tagger(load=True, model_dir=model, overwrite=kwargs)
    print('Tagger loaded, now annotating...')

    orig_path = input_dir
    new_path = output_dir

    for filename in os.listdir(orig_path):
        if not filename.endswith('.txt'):
            continue

        print('\t +', filename)
        unseen_tokens = pandora.utils.load_unannotated_file(
            orig_path + filename,
            nb_instances=None,
            tokenized_input=tokenized_input
        )

        annotations = tagger.annotate(unseen_tokens)
        keys = list(annotations.keys())
        print("Keys :" + "\t".join(keys))
        with codecs.open(new_path + filename + ".tsv", 'w', 'utf8') as f:
            f.write("\t".join(keys) + "\n")
            for x in zip(*tuple([annotations[k] for k in keys])):
                f.write('\t'.join(list(x)) + '\n')

    print('::: ended :::')

Пример #2

Показать файл

Файл: unseen.py Проект: PonteIneptique/pandora

def main():
    print('::: started :::')
    
    tagger = Tagger(load=True, model_dir='models/wilhelmus_full')

    print('Tagger loaded, now annotating...')

    orig_path = 'data/wilhelmus/orig/'
    new_path = 'data/wilhelmus/tagged/'

    for filename in os.listdir(orig_path):
        if not filename.endswith('.txt'):
            continue

        print('\t +', filename)
        unseen_tokens = pandora.utils.load_unannotated_file(orig_path + filename,
                                                         nb_instances=None,
                                                         tokenized_input=False)

        annotations = tagger.annotate(unseen_tokens)
        with codecs.open(new_path + filename, 'w', 'utf8') as f:
            #for t, l, p in zip(annotations['tokens'], annotations['postcorrect_lemmas'], annotations['pos']):
            for t, l, p in zip(annotations['tokens'], annotations['lemmas'], annotations['pos']):
            #for t, l in zip(annotations['tokens'], annotations['lemmas']):
                f.write('\t'.join((t, l, p))+'\n')
    
    print('::: ended :::')

Пример #3

Показать файл

def tag_string(model, input_dir, output_dir=None, string=None, **kwargs):
    """ Tag a directory of texts

    :param model: Path to a model file
    :param input_dir: Untokenized string to tag
    """

    print('::: started :::')

    tagger = Tagger(load=True, model_dir=model, overwrite=kwargs)

    print('Tagger loaded, now annotating...')

    unseen_tokens = tokenize.split(input_dir)
    print(unseen_tokens)

    annotations = tagger.annotate(unseen_tokens)

    keys = list(annotations.keys())
    print("--------------------")
    print('\t'.join(keys))
    print("--------------------")
    for x in zip(*tuple([annotations[k] for k in keys])):
        print('\t'.join(list(x)))

    print('::: ended :::')

Пример #4

Показать файл

def main():
    print('::: started :::')

    tagger = Tagger(load=True, model_dir='models/wilhelmus_full')

    print('Tagger loaded, now annotating...')

    orig_path = 'data/wilhelmus/orig/'
    new_path = 'data/wilhelmus/tagged/'

    for filename in os.listdir(orig_path):
        if not filename.endswith('.txt'):
            continue

        print('\t +', filename)
        unseen_tokens = pandora.utils.load_unannotated_file(
            orig_path + filename, nb_instances=None, tokenized_input=False)

        annotations = tagger.annotate(unseen_tokens)
        with codecs.open(new_path + filename, 'w', 'utf8') as f:
            #for t, l, p in zip(annotations['tokens'], annotations['postcorrect_lemmas'], annotations['pos']):
            for t, l, p in zip(annotations['tokens'], annotations['lemmas'],
                               annotations['pos']):
                #for t, l in zip(annotations['tokens'], annotations['lemmas']):
                f.write('\t'.join((t, l, p)) + '\n')

    print('::: ended :::')