def ScbowTrain(file): corpus , word_to_id , id_to_word = preprocess(file) contexts, target = create_context_target(corpus, window_size = 1) vocab_size = len(word_to_id) target = one_hot_v(target,vocab_size) contexts = one_hot_v(contexts,vocab_size) model = scbow(vocab_size , hidden_size) optimizer = adam() train = Trainer(model, optimizer) train.fit(contexts, target, max_epoch, batch_size) train.plot() word_vecs = model.word_vecs for word_id, word in id_to_word.items(): print(word,word_vecs[word_id]) C = co_mat(corpus,vocab_size,window_size = 1) ms('彼女',word_to_id,id_to_word,C,top = 10)
config.GPU = True from common.np import * import matplotlib.pyplot as plt from tangoatume import preprocess #from dataset import sequence from common.optimizer import Adam from common.trainer1 import RnnlmTrainer from common.util import eval_seq2seq from seq2seq import Seq2seq from peeky_seq2seq import PeekySeq2seq import time from common.util import eval_perplexity, to_gpu # データセットの読み込み file_name = "./text/owakachi.txt" file = open(file_name, encoding='utf-8') corpus, word_to_id, id_to_word = preprocess(file) if config.GPU: corpus = to_gpu(corpus) xs = corpus[:-1] ts = corpus[1:] # ハイパーパラメータの設定 vocab_size = len(word_to_id) wordvec_size = 16 hidden_size = 128 batch_size = 1 max_epoch = 50 max_grad = 5.0 sample_size = 100