示例#1
0
for i in xrange(500):
	mlp_bigram.traintext(train_text, add_se=False)
	print "Error rate: %0.5f. Epoch: %s. Training time so far: %0.1fm" % (mlp_bigram.testtext(test_text), i+1, (time.clock()-s_time)/60.)
	if (i+1) % 50 == 0:
		mlp_bigram.savemodel("./data/MlpBigram.model.epoch%s.obj" % i)

e_time = time.clock()

duration = e_time - s_time

print "MlpBigram train over!! The total training time is %.2fm." % (duration / 60.) 

#############
#  Testing  #
#############

theano.sandbox.cuda.use('cpu')
mlp_bigram = MlpBigram(nlpdict, backup_file_path="./data/MlpBigram.model.epoch499.obj")
print mlp_bigram.likelihood(u"国家主席江泽")

print nlpdict.gettoken(mlp_bigram.predict(u"国家主席江"))

# test text
f = file('./data/pku_test.txt')
test_text = unicode(f.read(), 'utf-8')
f.close()

print "Test size is: %s" % len(test_text)
ce, logs = mlp_bigram.crossentropy(test_text)
print "Cross-entropy is:", ce
print "Perplexity is:", numpy.exp2(ce)
示例#2
0
# Trainging #
#############
# text
f = file('./data/text.txt')
text = unicode(f.read(), 'utf-8')
text = text.replace(" ", "")
f.close()

len_text = len(text)

print "Train size is: %s" % len_text

rnnlm = RnnLM(nlpdict, n_hidden=30, lr=0.09, batch_size=50)

print "Rnn training start!"

train_text = text[:100]
test_text = text[:100]

s_time = time.clock()
for i in xrange(50):
	rnnlm.traintext(train_text)
	test_res = rnnlm.testtext(test_text)
	print "Error rate: %.5f. Epoch: %s. Training time so far: %0.1fm" % (test_res[0], i+1, (time.clock()-s_time)/60.)
	print ''.join(nlpdict.gettoken(i) for i in test_res[1])

e_time = time.clock()

duration = e_time - s_time

print "RnnLM train over!! The total training time is %.2fm." % (duration / 60.)