Python Lexicon примеры использования

Язык программирования: Python

Пространство имен/Пакет: src.processing.spmrl.lexicon

Класс/Тип: Lexicon

Примеров на hotexamples.com: 4

Python Lexicon - 4 примера найдено. Это лучшие примеры Python кода для src.processing.spmrl.lexicon.Lexicon, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

load(2)

entry(2)

Lexicon(1)

save(1)

Основные методы

load (2)

entry (2)

Lexicon (1)

save (1)

Пример #1

Показать файл

Файл: morph_dataset.py Проект: aseker00/meryl

def main():
    home_path = Path('.')
    src_tokens_idx = '01'
    src_suffix = '10'
    tb_files = {'train-hebtb.tokens': f'data/clean/spmrl/hebtb/train-hebtb-{src_tokens_idx}-tokens.txt',
                'train-hebtb-gold.lattices': f'data/clean/spmrl/hebtb/train-hebtb-{src_suffix}-gold.lattices',
                'dev-hebtb.tokens': f'data/clean/spmrl/hebtb/dev-hebtb-{src_tokens_idx}-tokens.txt',
                'dev-hebtb-gold.lattices': f'data/clean/spmrl/hebtb/dev-hebtb-{src_suffix}-gold.lattices',
                'test-hebtb.tokens': f'data/clean/spmrl/hebtb/test-hebtb-{src_tokens_idx}-tokens.txt',
                'test-hebtb-gold.lattices': f'data/clean/spmrl/hebtb/test-hebtb-{src_suffix}-gold.lattices'}
    lex_files = {'pref-lex': 'data/raw/spmrl/bgulex/bgupreflex_withdef.utf8.hr',
                 'lex': 'data/clean/spmrl/bgulex/bgulex-03.hr'}
    bgulex_file_path = Path('data/processed/spmrl/bgulex.pickle')
    hebtb_file_path = Path('data/processed/spmrl/hebtb.pickle')
    vocab_file_path = Path('data/processed/spmrl/hebtb-morph-vocab/vocab.pickle')
    if bgulex_file_path.exists():
        bgulex = Lexicon.load(bgulex_file_path)
    else:
        bgulex = Lexicon(lex_files)
        bgulex.save(bgulex_file_path)
    if hebtb_file_path.exists():
        hebtb = Treebank.load(hebtb_file_path)
    else:
        hebtb = Treebank(bgulex, tb_files)
        hebtb.save(hebtb_file_path)
    tb_train_size = len(hebtb.infused_train_sentences)
    tb_dev_size = len(hebtb.infused_dev_sentences)
    tb_test_size = len(hebtb.infused_test_sentences)
    print(f"Train sentences: {tb_train_size}")
    print(f"Dev sentences: {tb_dev_size}")
    print(f"Test sentences: {tb_test_size}")
    tb_sentences = (hebtb.infused_train_sentences + hebtb.infused_dev_sentences + hebtb.infused_test_sentences)
    if vocab_file_path.exists():
        tb_vocab = MorphVocab.load(vocab_file_path)
    else:
        tb_vocab = MorphVocab(tb_sentences)
        tb_vocab.save(vocab_file_path)
    print("Vocab tokens: {}".format(len(tb_vocab.tokens)))
    print("Vocab forms: {}".format(len(tb_vocab.forms)))
    print("Vocab lemmas: {}".format(len(tb_vocab.lemmas)))
    print("Vocab tags: {}".format(len(tb_vocab.tags)))
    print("Vocab feats: {}".format(len(tb_vocab.feats)))

    train_ds = get_morph_dataset_partition('train-inf', home_path, tb_vocab, hebtb)
    dev_inf_ds = get_morph_dataset_partition('dev-inf', home_path, tb_vocab, hebtb)
    test_inf_ds = get_morph_dataset_partition('test-inf', home_path, tb_vocab, hebtb)
    dev_uninf_ds = get_morph_dataset_partition('dev-uninf', home_path, tb_vocab, hebtb)
    test_uninf_ds = get_morph_dataset_partition('test-uninf', home_path, tb_vocab, hebtb)
    print("Train infused dataset: {}".format(len(train_ds)))
    print("Dev infused dataset: {}".format(len(dev_inf_ds)))
    print("Test infused dataset: {}".format(len(test_inf_ds)))
    print("Dev uninfused dataset: {}".format(len(dev_uninf_ds)))
    print("Test uninfused dataset: {}".format(len(test_uninf_ds)))

Пример #2

Показать файл

Файл: ma.py Проект: aseker00/meryl

def lattice(tokens: list, lex: Lexicon) -> morph.Lattice:
    lex_entries = [lex.entry(token) for token in tokens]
    lex_lattice = morph.Lattice()
    for tid, token in enumerate(tokens):
        token_id = tid + 1
        lex_lattice[token_id] = lex_entries[tid].analyses
    return lex_lattice

Пример #3

Показать файл

 def _create_sentence(self, lexicon: lex.Lexicon, tokens: list, md_lattice: conllx.LatticeGraph) -> nlp.Sentence:
     lex_entries = [lexicon.entry(token) for token in tokens]
     lattice = morph.Lattice()
     for i, entry in enumerate(lex_entries):
         lattice[i + 1] = entry.analyses
     gold_lattice = morph.Lattice()
     for tid, token in enumerate(tokens):
         token_id = tid + 1
         gold_analysis = []
         for path in md_lattice._token_paths[token_id]:
             gold_analysis.append(self._create_analysis(path))
         if len(gold_analysis) != 1:
             raise ValueError("token gold analysis: {}".format(gold_analysis))
         gold_lattice[token_id] = gold_analysis
     return nlp.Sentence(tokens, lattice, gold_lattice)

Пример #4

Показать файл

    'dev-hebtb-gold.lattices':
    f'{home_path}/data/clean/spmrl/hebtb/dev-hebtb-{src_lattice_idx}-gold.lattices',
    'test-hebtb.tokens':
    f'{home_path}/data/clean/spmrl/hebtb/test-hebtb-{src_tokens_idx}-tokens.txt',
    'test-hebtb-gold.lattices':
    f'{home_path}/data/clean/spmrl/hebtb/test-hebtb-{src_lattice_idx}-gold.lattices'
}
lex_files = {
    'pref-lex': 'data/raw/spmrl/bgulex/bgupreflex_withdef.utf8.hr',
    'lex': 'data/clean/spmrl/bgulex/bgulex-03.hr'
}
bgulex_file_path = Path(f'{home_path}/data/processed/spmrl/bgulex.pickle')
hebtb_file_path = Path(f'{home_path}/data/processed/spmrl/hebtb.pickle')
vocab_file_path = Path(
    f'{home_path}/data/processed/spmrl/hebtb-token-vocab/vocab.pickle')
bgulex = Lexicon.load(bgulex_file_path)
hebtb = Treebank.load(hebtb_file_path)
hebtb_partition = {
    'train-inf': hebtb.infused_train_sentences,
    'dev-inf': hebtb.infused_dev_sentences,
    'test-inf': hebtb.infused_test_sentences,
    'dev-uninf': hebtb.uninfused_dev_sentences,
    'test-uninf': hebtb.uninfused_test_sentences
}
tb_vocab = TokenVocab.load(vocab_file_path)

# Data
train_set = get_token_dataset_partition('train-inf', home_path, tb_vocab,
                                        hebtb)
dev_inf_set = get_token_dataset_partition('dev-inf', home_path, tb_vocab,
                                          hebtb)