示例#1
0
def ScbowTrain(file):
	corpus , word_to_id , id_to_word = preprocess(file)

	contexts, target = create_context_target(corpus, window_size = 1)
	vocab_size = len(word_to_id)


	target = one_hot_v(target,vocab_size)
	contexts = one_hot_v(contexts,vocab_size)

	model = scbow(vocab_size , hidden_size)
	optimizer = adam()
	train = Trainer(model, optimizer)



	
	train.fit(contexts, target, max_epoch, batch_size)
	train.plot()

	word_vecs = model.word_vecs

	for word_id, word in id_to_word.items():
		print(word,word_vecs[word_id])

	C = co_mat(corpus,vocab_size,window_size = 1)
	ms('彼女',word_to_id,id_to_word,C,top = 10)
示例#2
0
config.GPU = True
from common.np import *
import matplotlib.pyplot as plt
from tangoatume import preprocess
#from dataset import sequence
from common.optimizer import Adam
from common.trainer1 import RnnlmTrainer
from common.util import eval_seq2seq
from seq2seq import Seq2seq
from peeky_seq2seq import PeekySeq2seq
import time
from common.util import eval_perplexity, to_gpu
# データセットの読み込み
file_name = "./text/owakachi.txt"
file = open(file_name, encoding='utf-8')
corpus, word_to_id, id_to_word = preprocess(file)

if config.GPU:
    corpus = to_gpu(corpus)

xs = corpus[:-1]
ts = corpus[1:]

# ハイパーパラメータの設定
vocab_size = len(word_to_id)
wordvec_size = 16
hidden_size = 128
batch_size = 1
max_epoch = 50
max_grad = 5.0
sample_size = 100