Exemplos de Vocabulary.fromlist em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: utils.vocabulary

Classe / Tipo: Vocabulary

Método / Função: fromlist

Exemplos em hotexamples.com: 2

Vocabulary.fromlist em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de utils.vocabulary.Vocabulary.fromlist em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

Vocabulary(30)

load(18)

save(14)

build(10)

process_sentence(7)

load_vocabulary(3)

new(3)

size(2)

add_word(2)

add_words(2)

build_vocabulary_from_tokens(2)

compute_frequency(2)

fromlist(2)

load_glove_vocabulary(1)

merge_vocabularies(1)

save_counts(1)

observe_word(1)

setup_corpus_vocabulary(1)

ix2sent_drop_pad(1)

sent2ix(1)

sent2ix_andpad(1)

save_vocab(1)

get_word(1)

index(1)

get_char_vocab(1)

add(1)

add_token(1)

build_from_scratch(1)

construct_embedding_matrix(1)

freeze(1)

from_serializable(1)

get_index(1)

has_word(1)

get_language(1)

get_pad(1)

get_sentence(1)

get_unk(1)

abstract2sents(1)

get_word_vocab(1)

type_to_id(1)

Métodos Frequentes

Vocabulary (30)

load (18)

save (14)

build (10)

process_sentence (7)

load_vocabulary (3)

new (3)

size (2)

add_word (2)

add_words (2)

Métodos Frequentes

build_vocabulary_from_tokens (2)

compute_frequency (2)

fromlist (2)

load_glove_vocabulary (1)

merge_vocabularies (1)

save_counts (1)

observe_word (1)

setup_corpus_vocabulary (1)

ix2sent_drop_pad (1)

sent2ix (1)

sent2ix_andpad (1)

save_vocab (1)

get_word (1)

index (1)

get_char_vocab (1)

add (1)

add_token (1)

build_from_scratch (1)

construct_embedding_matrix (1)

freeze (1)

Métodos Frequentes

sent2ix_andpad (1)

save_vocab (1)

get_word (1)

index (1)

get_char_vocab (1)

add (1)

add_token (1)

build_from_scratch (1)

construct_embedding_matrix (1)

freeze (1)

from_serializable (1)

get_index (1)

has_word (1)

get_language (1)

get_pad (1)

get_sentence (1)

get_unk (1)

abstract2sents (1)

get_word_vocab (1)

type_to_id (1)

Métodos Frequentes

from_serializable (1)

get_index (1)

has_word (1)

get_language (1)

get_pad (1)

get_sentence (1)

get_unk (1)

abstract2sents (1)

get_word_vocab (1)

type_to_id (1)

Exemplo n.º 1

0

Exibir arquivo

def build_corpus(self): print(f'Loading training trees from `{self.train_path}`...') if self.multitask == 'ccg': train_treebank = ccg.fromfile(self.train_path) else: with open(self.train_path) as f: train_treebank = [fromstring(line.strip()) for line in f] print(f'Loading development trees from `{self.dev_path}`...') with open(self.dev_path) as f: dev_treebank = [fromstring(line.strip()) for line in f] print(f'Loading test trees from `{self.test_path}`...') with open(self.test_path) as f: test_treebank = [fromstring(line.strip()) for line in f] if self.multitask == 'spans': # need trees with span-information train_treebank = [tree.convert() for tree in train_treebank] dev_treebank = [tree.convert() for tree in dev_treebank] test_treebank = [tree.convert() for tree in test_treebank] print("Constructing vocabularies...") if self.vocab_path is not None: print(f'Using word vocabulary specified in `{self.vocab_path}`') with open(self.vocab_path) as f: vocab = json.load(f) words = [word for word, count in vocab.items() for _ in range(count)] else: words = [word for tree in train_treebank for word in tree.words()] if self.multitask == 'none': labels = [] else: labels = [label for tree in train_treebank for label in tree.labels()] if self.multitask == 'none': words = [UNK, START] + words else: words = [UNK, START, STOP] + words word_vocab = Vocabulary.fromlist(words, unk_value=UNK) label_vocab = Vocabulary.fromlist(labels) self.word_vocab = word_vocab self.label_vocab = label_vocab self.train_treebank = train_treebank self.dev_treebank = dev_treebank self.test_treebank = test_treebank print('\n'.join(( 'Corpus statistics:', f'Vocab: {word_vocab.size:,} words, {label_vocab.size:,} nonterminals', f'Train: {len(train_treebank):,} sentences', f'Dev: {len(dev_treebank):,} sentences', f'Test: {len(test_treebank):,} sentences')))

Exemplo n.º 2

0

Exibir arquivo

Arquivo: supervised.py Projeto: daandouwe/thesis

def build_corpus(self): print(f'Loading training trees from `{self.train_path}`...') with open(self.train_path) as f: train_treebank = [fromstring(line.strip()) for line in f] print(f'Loading development trees from `{self.dev_path}`...') with open(self.dev_path) as f: dev_treebank = [fromstring(line.strip()) for line in f] print(f'Loading test trees from `{self.test_path}`...') with open(self.test_path) as f: test_treebank = [fromstring(line.strip()) for line in f] if self.unlabeled: print(f'Converting trees to unlabeled form...') for tree in train_treebank: tree.unlabelize() if self.model_type == 'crf': print(f'Converting trees to CNF...') train_treebank = [tree.cnf() for tree in train_treebank] if self.unlabeled: for tree in train_treebank: tree.remove_chains() print("Constructing vocabularies...") if self.vocab_path is not None: print(f'Using word vocabulary specified in `{self.vocab_path}`') with open(self.vocab_path) as f: vocab = json.load(f) words = [ word for word, count in vocab.items() for _ in range(count) ] else: words = [word for tree in train_treebank for word in tree.words()] if self.max_sent_len > 0: filtered_treebank = [ tree for tree in train_treebank if len(tree.words()) <= self.max_sent_len ] print( "Using sentences with length <= {}: {:.1%} of all training trees." .format(self.max_sent_len, len(filtered_treebank) / len(train_treebank))) train_treebank = filtered_treebank if self.min_label_count > 1: counted_labels = Counter( [label for tree in train_treebank for label in tree.labels()]) filtered_labels = [ label for label, count in counted_labels.most_common() if count >= self.min_label_count ] filtered_treebank = [ tree for tree in train_treebank if all(label in filtered_labels for label in tree.labels()) ] print( "Using labels with count >= {}: {}/{} ({:.1%}) of all labels and {:.1%} of all training trees." .format(self.min_label_count, len(filtered_labels), len(counted_labels), len(filtered_labels) / len(counted_labels), len(filtered_treebank) / len(train_treebank))) train_treebank = filtered_treebank labels = [label for tree in train_treebank for label in tree.labels()] if self.model_type == 'crf': words = [UNK, START, STOP] + words else: words = [UNK] + words word_vocab = Vocabulary.fromlist(words, unk_value=UNK) label_vocab = Vocabulary.fromlist(labels) ## # counted_labels = Counter(label_vocab.counts).most_common() # pprint(counted_labels) ## if self.model_type.endswith('rnng'): # Order is very important! See DiscParser/GenParser classes to know why. if self.model_type == 'disc-rnng': actions = [SHIFT, REDUCE ] + [NT(label) for label in label_vocab] elif self.model_type == 'gen-rnng': actions = [REDUCE] + [NT(label) for label in label_vocab ] + [GEN(word) for word in word_vocab] action_vocab = Vocabulary() for action in actions: action_vocab.add(action) else: action_vocab = Vocabulary() self.word_vocab = word_vocab self.label_vocab = label_vocab self.action_vocab = action_vocab self.train_treebank = train_treebank self.dev_treebank = dev_treebank self.test_treebank = test_treebank print('\n'.join(( 'Corpus statistics:', f'Vocab: {word_vocab.size:,} words, {label_vocab.size:,} nonterminals, {action_vocab.size:,} actions', f'Train: {len(train_treebank):,} sentences', f'Dev: {len(dev_treebank):,} sentences', f'Test: {len(test_treebank):,} sentences')))