Exemplo n.º 1
0
    parameters.init()

    # Prepare data for training the seq2seq
    prepare = DataPreparation()
    text = prepare.make_disintegration
    sent = prepare.get_sentences(text)
    dicc = prepare.get_dictionary(text, stopwords, vocab_size)
    data = prepare.get_word_list(sent,
                                 stopwords,
                                 window_size=Word2Vec_window_size)

    print('Propiedades del corpus: \n')
    print('\tDiccionario con %d palabras' % (len(dicc['w2i'])))

    word_to_vec = Word2Vec(vocab_size, Word2Vec_embedding_dim,
                           Word2Vec_optimizer_step)
    x_train, y_train = word_to_vec.training_data(data)
    W1, b1 = word_to_vec.train(x_train, y_train)
    vocab_vectors = W1 + b1

    conversations = []
    for i in range(len(sent) - 2):
        if len(sent[i + 1]) != 0 and len(
                sent[i + 2]) != 0:  # to avoid empty sentences
            conversations.append([sent[i + 1], sent[i + 2]])

    # TRAIN THE MODEL

    # Initialize all the variables
    session = tf.Session()
    init_variables = tf.global_variables_initializer()
Exemplo n.º 2
0
# -*- coding: utf-8 -*-

import Word2Vec


#load = ["6CM00079.txt","6CM00080.txt","6CM00082.txt","6CM00083.txt","6CM00088.txt","6CM00090.txt","6CM00092.txt","6CM00093.txt","6CM00094.txt","6CM00095.txt"]
load = ["6CM00080.txt"]

# 호출 및 벡터 사이즈 설정 
vector_size = 10
#word2vec = Word2Vec.Word2Vec(pos,vector_size)
word2vec = Word2Vec.Word2Vec(vector_size,load)
final_embeddings, datas, count, dictionary, reverse_dictionary = word2vec.output()
# 유사한 단어 불러오기 
#print(dictionary)
result = word2vec.similarity("군대",100)
print(result)

# 1. 키워드 입력시 유사단어 뽑기
# 2. 주요키워드에서 보여주기 
# -*- coding: utf-8 -*

import sys
sys.path.insert(
    0,
    '/Users/davichiar/Documents/ADDAVICHI/Python/Sentimental-Analysis-master/Bidirectional_LSTM'
)

import os
import tensorflow as tf
import Bi_LSTM
import Word2Vec
import gensim
import numpy as np

W2V = Word2Vec.Word2Vec()

Batch_size = 1
Vector_size = 300
Maxseq_length = 2600
learning_rate = 0.001
lstm_units = 128
num_class = 2
keep_prob = 1.0

X = tf.placeholder(tf.float32,
                   shape=[None, Maxseq_length, Vector_size],
                   name='X')
Y = tf.placeholder(tf.float32, shape=[None, num_class], name='Y')
seq_len = tf.placeholder(tf.int32, shape=[None])
Exemplo n.º 4
0
from Word2Vec import *
import pymongo
db = pymongo.MongoClient().travel.articles


class texts:
    def __iter__(self):
        for t in db.find().limit(30000):
            yield t['words']


wv = Word2Vec(texts(),
              model='cbow',
              nb_negative=16,
              shared_softmax=True,
              epochs=2)  #建立并训练模型
wv.save_model('myvec')  #保存到当前目录下的myvec文件夹

#训练完成后可以这样调用
wv = Word2Vec()  #建立空模型
wv.load_model('myvec')  #从当前目录下的myvec文件夹加载模型