gradients = T.grad(cost, wrt=params) gradient_acc = [theano.shared(0 * p.get_value()) for p in params] counter = theano.shared(np.float32(0.)) acc = theano.function(inputs=[X, Y], outputs=cost, updates=[(a, a + g) for a, g in zip(gradient_acc, gradients)] + [(counter, counter + np.float32(1.))]) update = theano.function( inputs=[],outputs=[], updates = updates.momentum(params,[ g / counter for g in gradient_acc ]) \ + [ (a, np.float32(0) * a) for a in gradient_acc ] \ + [ (counter,np.float32(0.)) ] ) test = theano.function(inputs=[X, Y], outputs=probs[:, Y]) training_examples = [word.strip() for word in open('dictionary.txt')] import random for _ in xrange(1500): random.shuffle(training_examples) for i, string in enumerate(training_examples): print acc(font.imagify(string), label_seq(string)) if i % 20 == 0: update() if i % 100 == 0: hinton.plot(test(font.imagify("test"), label_seq("test")).T, max_arr=1.) hinton.plot(font.imagify("test").T[::-1].astype('float32')) P.save('model.pkl')
import theano.tensor as T import numpy as np from theano_toolkit import utils as U from theano_toolkit import hinton from theano_toolkit import updates from theano_toolkit.parameters import Parameters import ctc import font import lstm from ocr import * if __name__ == "__main__": import sys test_word = sys.argv[1] P = Parameters() X = T.matrix('X') predict = build_model(P, 8, 512, len(font.chars) + 1) probs = predict(X) test = theano.function(inputs=[X], outputs=probs) P.load('model.pkl') image = font.imagify(test_word) hinton.plot(image.astype(np.float32).T[::-1]) y_seq = label_seq(test_word) probs = test(image) print " ", ' '.join(font.chars[i] if i < len(font.chars) else "_" for i in np.argmax(probs, axis=1)) hinton.plot(probs[:, y_seq].T, max_arr=1.)
acc = theano.function( inputs=[X, Y], outputs=cost, updates = [ (a,a + g) for a,g in zip(gradient_acc,gradients) ] + [(counter,counter + np.float32(1.))] ) update = theano.function( inputs=[],outputs=[], updates = updates.momentum(params,[ g / counter for g in gradient_acc ]) \ + [ (a, np.float32(0) * a) for a in gradient_acc ] \ + [ (counter,np.float32(0.)) ] ) test = theano.function( inputs=[X,Y], outputs=probs[:,Y] ) training_examples = [ word.strip() for word in open('dictionary.txt') ] import random for _ in xrange(1500): random.shuffle(training_examples) for i,string in enumerate(training_examples): print acc(font.imagify(string),label_seq(string)) if i % 20 == 0: update() if i % 100 == 0: hinton.plot(test(font.imagify("test"),label_seq("test")).T,max_arr=1.) hinton.plot(font.imagify("test").T[::-1].astype('float32')) P.save('model.pkl')
import theano.tensor as T import numpy as np from theano_toolkit import utils as U from theano_toolkit import hinton from theano_toolkit import updates from theano_toolkit.parameters import Parameters import ctc import font import lstm from ocr import * if __name__ == "__main__": import sys test_word = sys.argv[1] P = Parameters() X = T.matrix('X') predict = build_model(P,8,512,len(font.chars)+1) probs = predict(X) test = theano.function(inputs=[X],outputs=probs) P.load('model.pkl') image = font.imagify(test_word) hinton.plot(image.astype(np.float32).T[::-1]) y_seq = label_seq(test_word) probs = test(image) print " ", ' '.join(font.chars[i] if i < len(font.chars) else "_" for i in np.argmax(probs,axis=1)) hinton.plot(probs[:,y_seq].T,max_arr=1.)
print ' '.join(rev_map[i] for i in question_data) for idx in ans_evd: print sentences[idx] print rev_map[ans_w] evidence_answer = answer(input_data,idxs,question_data) evd_prob = evidence_answer[:evidence_count] ans_prob = evidence_answer[-1] print "Evidences:" for i,e in enumerate(evd_prob): print e print "predicted", hinton.plot(e,max_arr=1) correct = np.zeros((e.shape[0],)) correct[ans_evd[i]] = 1 print "correct ", hinton.plot(correct) print "Answer:" print "predicted", hinton.plot(ans_prob,max_arr=1) correct = np.zeros((ans_prob.shape[0],)) correct[ans_w] = 1 print "correct ", hinton.plot(correct,max_arr=1)
print "Compiling functions..." train, validate, sample = prepare_functions( input_size=train_X_data.shape[1], hidden_size=64, latent_size=16, step_count=10, batch_size=batch_size, train_X=train_X, valid_X=valid_X) batches = int(math.ceil(train_X_data.shape[0] / float(batch_size))) print "Starting training..." best_score = np.inf for epoch in xrange(epochs): vlb = validate() print vlb, if vlb < best_score: x, x_samples, max_component, pi_samples = sample() plot_samples(x, x_samples, max_component) best_score = vlb print "Saved." hinton.plot(pi_samples.T) print np.sum(pi_samples, axis=0) else: print np.random.shuffle(train_X_data) train_X.set_value(train_X_data) for i in xrange(batches): vals = train(i) print ' '.join(map(str, vals))