Exemplos de Vocabulary.add em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: vocabulary

Classe / Tipo: Vocabulary

Método / Função: add

Exemplos em hotexamples.com: 4

Vocabulary.add em Python - 4 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de vocabulary.Vocabulary.add em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

Vocabulary(30)

add_word(15)

clean_text(8)

build_vocab(8)

add_words(8)

deserialize(7)

compile(4)

add(4)

antonym(4)

auto_punctuate(3)

add_token(3)

encode(3)

add_from_file(2)

decode_output(2)

getUniGrams(2)

from_documents(2)

build_corpus(2)

getVocabularyByDocument(2)

getBiGrams(2)

get_id_from_token(2)

add_a_word(2)

add_text(2)

add_many(2)

getFullDict(2)

gen_DAG(1)

from_text_files(1)

from_text(1)

from_serializable(1)

from_sentences(1)

get(1)

add_constant(1)

getPTStopWords(1)

getQuestions(1)

getVocabularySize(1)

get_all_source_words(1)

get_all_translations(1)

get_pos(1)

get_term_text(1)

make_dictionary(1)

seg_content(1)

from_nlp_data(1)

encode_sent(1)

from_idx2word_dict(1)

convert_sentence(1)

add_new_word(1)

add_sentence(1)

add_chunk(1)

add_word_lst(1)

append(1)

build(1)

Métodos Frequentes

Vocabulary (30)

add_word (15)

clean_text (8)

build_vocab (8)

add_words (8)

deserialize (7)

compile (4)

add (4)

antonym (4)

auto_punctuate (3)

Métodos Frequentes

add_token (3)

encode (3)

add_from_file (2)

decode_output (2)

getUniGrams (2)

from_documents (2)

build_corpus (2)

getVocabularyByDocument (2)

getBiGrams (2)

get_id_from_token (2)

add_a_word (2)

add_text (2)

add_many (2)

getFullDict (2)

gen_DAG (1)

from_text_files (1)

from_text (1)

from_serializable (1)

from_sentences (1)

get (1)

Métodos Frequentes

add_a_word (2)

add_text (2)

add_many (2)

getFullDict (2)

gen_DAG (1)

from_text_files (1)

from_text (1)

from_serializable (1)

from_sentences (1)

get (1)

add_constant (1)

getPTStopWords (1)

getQuestions (1)

getVocabularySize (1)

get_all_source_words (1)

get_all_translations (1)

get_pos (1)

get_term_text (1)

make_dictionary (1)

seg_content (1)

from_nlp_data (1)

encode_sent (1)

from_idx2word_dict (1)

convert_sentence (1)

add_new_word (1)

add_sentence (1)

add_chunk (1)

add_word_lst (1)

append (1)

build (1)

Métodos Frequentes

add_constant (1)

getPTStopWords (1)

getQuestions (1)

getVocabularySize (1)

get_all_source_words (1)

get_all_translations (1)

get_pos (1)

get_term_text (1)

make_dictionary (1)

seg_content (1)

from_nlp_data (1)

encode_sent (1)

from_idx2word_dict (1)

convert_sentence (1)

add_new_word (1)

add_sentence (1)

add_chunk (1)

add_word_lst (1)

append (1)

build (1)

addSentence (1)

build_vocabulary (1)

calculate (1)

count_longest_sentence (1)

from_dict (1)

create_vocabulary (1)

dataset (1)

decode (1)

__dict__ (1)

encode_fast (1)

expand_vocab (1)

featurize (1)

featurize_reviews (1)

fit (1)

sort (1)

Exemplo n.º 1

0

Exibir arquivo

def from_corpus(cls, corpus, vocab_size): vocab = Vocabulary() for token in corpus: vocab.add(token) vocab_subset = vocab.get_topk_subset(vocab_size) vocab_subset.shuffle() return cls(vocab_subset)

Exemplo n.º 2

0

Exibir arquivo

def make_hash_embeddings(igor, vocab): assert os.path.exists(igor.target_glove), "You need to specify a real file" fileiter = open(igor.target_glove).readlines() hash_vocab = Vocabulary() hash_vocab.use_mask = True hash_vocab.add(hash_vocab.mask_symbol) hash_vocab.add(hash_vocab.unk_symbol) word2hash = {} for word, v_id in vocab.items(): ids = hash_vocab.add_many(hash_word(word)) word2hash[v_id] = ids embeddings = np.zeros((len(hash_vocab), igor.embedding_size)) remaining_vocab = set(vocab.keys()) remaining_hashes = set(hash_vocab.values()) for line in tqdm(fileiter): line = line.replace("\n","").split(" ") word, nums = line[0], [float(x.strip()) for x in line[1:]] word_hash = hash_word(word) if word in remaining_vocab: hash_ids = word2hash[vocab[word]] remaining_vocab.remove(word) remaining_hashes.difference_update(hash_ids) embeddings[hash_ids] += np.array(nums) / len(hash_ids) print("{} words were not seen. {} hashes were not seen".format(len(remaining_vocab), len(remaining_hashes))) for hash_id in remaining_hashes: embeddings[hash_id] = np.asarray(glorot_uniform((igor.embedding_size,)).eval()) glove_name = igor.target_glove[igor.target_glove.find("glove"):].replace("/","") hash_vocab.save('hash_embedding_{}.vocab'.format(glove_name)) with open(path.join(igor.save_dir, "hash_embedding_{}.npy".format(glove_name)), "wb") as fp: np.save(fp, embeddings) with open(path.join(igor.save_dir, "word2hash.json".format(glove_name)), "w") as fp: json.dump(word2hash, fp)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: parse_convert.py Projeto: devanshi1999/FYP

for q, qid in zip(questions, qids): if qid not in seen_qid: seen_qid.add(qid) unique_questions.append(q) # print len(unique_questions), len(questions) #considering document to be collection of all answers and unique question docs = answers + unique_questions #stores document frequency of each word word2dfs = compute_dfs(docs) # print word2dfs.items()[:10] #creating vocabulary vocabulary = Vocabulary(start_feature_id=0) vocabulary.add('UNKNOWN_WORD_IDX') add_to_vocab(answers, vocabulary) add_to_vocab(questions, vocabulary) basename = os.path.basename(train) cPickle.dump(vocabulary, open(os.path.join(outdir, 'vocab.pickle'), 'w')) # print "vocabulary", len(vocabulary) dummy_word_idx = vocabulary.fid #longest answer and question length q_max_length = max(map(lambda x: len(x), questions)) a_max_length = max(map(lambda x: len(x), answers)) print q_max_length, a_max_length ''' for fname in [train, dev, test]:

Exemplo n.º 4

0

Exibir arquivo

word_vocab = Vocabulary( os.path.join(args.wiki_preprocess, 'word_vocab.txt')) entity_vocab = Vocabulary( os.path.join(args.wiki_preprocess, 'entity_vocab.txt')) print(f"# word in dataset: {len(word_vocab)}") print(f"# entity in dataset: {len(entity_vocab)}") path = os.path.join(args.wiki_preprocess, 'inlinks.txt') with open(path, 'r') as f: for line in tqdm(f, leave=False, dynamic_ncols=True, desc="Read inlniks"): links = json.loads(line) for word in links['inlinks']: word_vocab.add(word) print(f"# word in dataset + inlinks: {len(word_vocab)}") wiki2vec = Wikipedia2Vec.load(args.wiki2vec) inwiki_words_num = 0 word_vecs = [] word_vocab_path = os.path.join(args.wiki_preprocess, 'word_vocab.txt') with open(word_vocab_path, 'w') as f: for word in tqdm(sorted(list(word_vocab)), leave=False, dynamic_ncols=True, desc="Filter in-wiki words"): try: vec = wiki2vec.get_word_vector(word) word_vecs.append(vec) f.write(word + "\n")