示例#1
0
 def _train_model(self, model: Word2Vec, texts):
     tokenizer = Tokenizer()
     tokenizer.fit_on_texts(texts)
     texts_seq = tokenizer.sequences_to_texts(
         tokenizer.texts_to_sequences(texts))
     texts_seq = [f.split(" ") for f in texts_seq]
     print("Adding to word2vec vocabulary...")
     model.min_count = 2
     model.build_vocab(texts_seq, update=True)
     print("Training word2vec ...")
     model.train(texts_seq,
                 total_examples=len(texts_seq),
                 epochs=model.epochs)
示例#2
0
def train_model_w2vec(
    sentences: Iterable,
    model: Word2Vec,
    trigram_model_path: Path,
    save_model_path: Path,
    epochs: int,
):
    """Entrena Word2Vec."""

    # TODO: Agregar kwargs para pasarlo a los metodos de gensim

    trigram = Phraser.load(str(trigram_model_path))
    sentences = trigram[sentences]
    model.build_vocab(sentences, progress_per=10000)
    model.train(
        sentences, total_examples=model.corpus_count, epochs=epochs
    )
    model.init_sims(replace=True)
    model.save(str(save_model_path))
示例#3
0
def train(model: Word2Vec, word_lists, num_epoches: int = None):
    model.build_vocab(word_lists, progress_per=10000)
    model.train(word_lists, total_examples=len(word_lists), epochs=num_epoches)
    return model