def visualize_graphs(self, monitored, out_dir): id_tag = (self.id + '.') if self.id else '' for tag, graph in monitored.iteritems(): tag = tag.replace('/', '.') pydotprint(graph, outfile=os.path.join(out_dir, id_tag + tag + '.svg'), format='svg', var_with_name_simple=True)
def TestMLP(): X = T.matrix('X') f = T.nnet.sigmoid mlp, W, b = ConstructMLP(X, 10, [5, 4], f) out = function( inputs=[X], outputs=mlp) pydotprint(mlp, 'mlptest.png')
def print_function_png(o): handle, fn = tempfile.mkstemp(suffix='.png') try: os.close(handle) pydotprint(o, outfile=fn, format='png', print_output_file=False) with open(fn) as f: return f.read() finally: os.remove(fn)
def view_graph(self, width='100%', res=60): path = 'examples' name = 'mlp.png' path_name = path + '/' + name if not os.path.exists(path): os.mkdirs(path) pydotprint(self.loss, path_name) plt.figure(figsize=(res, res), dpi=80) plt.subplots_adjust(left=0.0, right=1.0, bottom=0.0, top=1.0, hspace=0.0, wspace=0.0) plt.axis('off') plt.imshow(np.array(Image.open(path_name))) plt.show()
def inference(self): # A bit hacky # Re-initialize the visible unit (avoid copying useless dimshuffle # part of the graph computation of v) self.v = self.v_init # We have to dimshuffle so that time is the first dimension self.v = self.v.dimshuffle((1,0,2)) # Write the recurrence to get the bias for the RBM (_, bv_t, bh_t), updates_inference = theano.scan( fn=self.recurrence, sequences=self.v, outputs_info=[self.u0, None, None]) # Reshuffle the variables self.bv_dynamic = bv_t.dimshuffle((1,0,2)) self.bh_dynamic = bh_t.dimshuffle((1,0,2)) self.v = self.v.dimshuffle((1,0,2)) v_loop = self.v_init # Train the RBMs by blocks # Perform k-step gibbs sampling (v_chain, mean_chain), updates_rbm = theano.scan( fn=lambda v: self.gibbs_step(v), outputs_info=[v_loop, None], # non_sequences=[self.bv_dynamic, self.bh_dynamic], n_steps=self.k ) # Add updates of the rbm updates_inference.update(updates_rbm) # Get last sample of the gibbs chain v_sample = v_chain[-1] pydotprint(v_loop, '../Debug/v_loop.html') pydotprint(v_sample, '../Debug/v_sample.html') import pdb; pdb.set_trace() mean_v = self.gibbs_step(v_sample,self.bv_dynamic,self.bh_dynamic)[0] return v_sample, mean_v, updates_inference
out = T.nnet.sigmoid(dot + b) from theano.printing import debugprint debugprint(dot) debugprint(out) f = theano.function(inputs=[x, W], outputs=dot) g = theano.function([x, W, b], out) h = theano.function([x, W, b], [dot, out]) i = theano.function([x, W, b], [dot + b, out]) debugprint(f) debugprint(g) from theano.printing import pydotprint import pydot import graphviz import pydot_ng as pydot pydotprint(f, outfile='pydotprint_f.png') # from IPython.display import Image # Image('pydotprint_f.png', width=1000) # # # pydotprint(g, outfile='pydotprint_g.png') # Image('pydotprint_g.png', width=1000)
import theano.tensor as T from theano import function from theano.printing import pydotprint # binary cross entropy a1 = T.dmatrix('a1') a2 = T.dmatrix('a2') f_a = T.nnet.binary_crossentropy(a1, a2).mean() f_sigmoid = function([a1, a2], [f_a]) print "Binary Cross Entropy [[0.01,0.01,0.01]],[[0.99,0.99,0.01]]:", f_sigmoid( [[0.01, 0.01, 0.01]], [[0.99, 0.99, 0.01]]) pydotprint(f_sigmoid, outfile="s7-1.png", var_with_name_simple=True) # categorical cross entropy b1 = T.dmatrix('b1') b2 = T.dmatrix('b2') f_b = T.nnet.categorical_crossentropy(b1, b2) f_sigmoid = function([b1, b2], [f_b]) print "Categorical Cross Entropy [[0.01,0.01,0.01]],[[0.99,0.99,0.01]]:", f_sigmoid( [[0.01, 0.01, 0.01]], [[0.99, 0.99, 0.01]]) pydotprint(f_sigmoid, outfile="s7-2.png", var_with_name_simple=True) # squared error def squared_error(x, y): return (x - y)**2 c1 = T.dmatrix('b1') c2 = T.dmatrix('b2') f_c = squared_error(c1, c2)
def hinge_c(x, y): return T.switch(T.lt(1 - x * y, 0), 0 * x, 1 - x * y) x = T.dscalar('x') y = T.dscalar('y') z1 = hinge_a(x, y) z2 = hinge_b(x, y) z3 = hinge_b(x, y) f1 = theano.function([x, y], z1) f2 = theano.function([x, y], z2) f3 = theano.function([x, y], z3) pydotprint(f1, outfile="s13-1.png", var_with_name_simple=True) pydotprint(f2, outfile="s13-2.png", var_with_name_simple=True) pydotprint(f3, outfile="s13-3.png", var_with_name_simple=True) print "f(-2, 1) =", f1(-2, 1), f2(-2, 1), f3(-2, 1) print "f(-1,1 ) =", f1(-1, 1), f2(-1, 1), f3(-1, 1) print "f(0,1) =", f1(0, 1), f2(0, 1), f3(0, 1) print "f(1, 1) =", f1(1, 1), f2(1, 1), f3(1, 1) print "f(2, 1) =", f1(2, 1), f2(2, 1), f3(2, 1) # f(-2, 1) = 3.0 3.0 3.0 # f(-1,1 ) = 2.0 2.0 2.0 # f(0,1) = 1.0 1.0 1.0 # f(1, 1) = 0.0 0.0 0.0 # f(2, 1) = 0.0 0.0 0.0
lr = 0.01 momentum = 0.999 m = theano.shared(M.get_value() * np.float32(0.)) v = momentum * m - lr * grad updates = [] updates.append((m, v)) updates.append((M, M + momentum * v - lr * grad)) # ------------------------------------------------------------------------- train = theano.function(inputs=[], outputs=[loss_train], updates=updates) valid = theano.function(inputs=[], outputs=[loss_valid], updates=[]) pydotprint(train, outfile='train.png', compact=False) pydotprint(valid, outfile='valid.png', compact=False) epochs = 1000000 fname = 'train.log' print 'Writing:', fname f = open(fname, 'w') print '%-10s %10s %20s %20s' % ('time', 'epoch', 'loss', 'val_loss') try: t0 = time.time() for epoch in range(epochs): loss_value_train = train()[0] loss_value_valid = valid()[0]
def main(): parser = argparse.ArgumentParser() parser.add_argument("name", help="Codename of this run") args = parser.parse_args() logging.basicConfig( level=logging.DEBUG, stream=sys.stdout, format="%(asctime)s: %(name)s: %(levelname)s: %(message)s") # Parameters from an actual machine tranlation run batch_size = 80 seq_len = 50 n_words = 80 * 50 dim = 1000 # Weight matrices U = theano.shared( nr.normal(size=(dim, dim), scale=0.0001).astype("float32")) U.name = 'U' V = theano.shared(U.get_value()) V.name = 'V' W = theano.shared(U.get_value()) W.name = 'W' # Variables and their values x = TT.tensor3('x') x_value = nr.normal(size=(seq_len, batch_size, dim), scale=0.0001).astype("float32") ri = TT.tensor3('ri') ri_value = x_value zi = TT.tensor3('zi') zi_value = x_value init = TT.alloc(numpy.float32(0), batch_size, dim) # My simplified backward pass, that does not # compute gradients w.r. weight matrices. def grad_step( # sequences h, r, z, new_h, # outputs_info e_h_next): # Duplicate forward propagation # pre_r = ri + h.dot(U) # pre_z = zi + h.dot(V) # r = TT.nnet.sigmoid(pre_r) ! # z = TT.nnet.sigmoid(pre_z) ! # after_r = r * h # pre_h = x + after_r.dot(W) # new_h = TT.tanh(pre_h) ! # h_next = z * new_h + (1 - z) * h # Push the gradient through the update gates e_h = (1 - z) * e_h_next e_new_h = z * e_h_next e_z = (new_h - h) * e_h_next # Push the gradient through tanh e_pre_h = e_new_h * (1 - new_h**2) # Push the gradint through the reset gates e_after_r = e_pre_h.dot(W.T) e_h += r * e_after_r e_r = h * e_after_r # Push the gate gradients e_pre_r = r * (1 - r) * e_r e_pre_z = z * (1 - z) * e_z e_h += e_pre_r.dot(U.T) e_h += e_pre_z.dot(V.T) return e_h, e_pre_r, e_pre_z, e_pre_h # Forward pass with no extra outputs def rnn_step1( # sequences x, ri, zi, # outputs_info h): pre_r = ri + h.dot(U) pre_z = zi + h.dot(V) r = TT.nnet.sigmoid(pre_r) z = TT.nnet.sigmoid(pre_z) after_r = r * h pre_h = x + after_r.dot(W) new_h = TT.tanh(pre_h) res_h = z * new_h + (1 - z) * h return res_h # Forward pass with extra outputs def rnn_step3( # sequences x, ri, zi, # outputs_info h): pre_r = ri + h.dot(U) pre_z = zi + h.dot(V) r = TT.nnet.sigmoid(pre_r) z = TT.nnet.sigmoid(pre_z) after_r = r * h pre_h = x + after_r.dot(W) new_h = TT.tanh(pre_h) res_h = z * new_h + (1 - z) * h return res_h, r, z, new_h # Gradient computation - method 1 h, _ = theano.scan(rnn_step1, sequences=[x, ri, zi], n_steps=seq_len, outputs_info=init, name='fpass1') cost = h[-1].sum() grad1 = TT.grad(cost, [U, V, W]) # Gradient computation - method 2 res, _ = theano.scan(rnn_step3, sequences=[x, ri, zi], n_steps=seq_len, outputs_info=[init, None, None, None], name='fpass2') def shift_right(x): return TT.concatenate([TT.shape_padleft(TT.zeros_like(x[0])), x[:-1]]) h, r, z, new_h = res h = shift_right(h) (e_h, e_pre_r, e_pre_z, e_pre_h), _ = theano.scan( grad_step, sequences=[h, r, z, new_h], n_steps=seq_len, go_backwards=True, outputs_info=[TT.ones_like(h[0]), None, None, None], name='bpass2') def reshape(x): return x.dimshuffle(2, 0, 1).reshape((dim, n_words)) (h, r, e_pre_r, e_pre_z, e_pre_h) = map(reshape, [h, r, e_pre_r[::-1], e_pre_z[::-1], e_pre_h[::-1]]) eU = h.dot(e_pre_r.T) eV = h.dot(e_pre_z.T) eW = (h * r).dot(e_pre_h.T) grad2 = [eU, eV, eW] # Gradient computation - method 3 res, _ = theano.scan(rnn_step3, sequences=[x, ri, zi], n_steps=seq_len, outputs_info=[init, None, None, None], name='fpass3') h = res[0] cost = h[-1].sum() grad3 = TT.grad(cost, [U, V, W]) logger.info("Compile functions") func1 = theano.function(inputs=[x, ri, zi], outputs=grad1, name="grad1") func2 = theano.function(inputs=[x, ri, zi], outputs=grad2, name="grad2") func3 = theano.function(inputs=[x, ri, zi], outputs=grad3, name="grad3") logger.info("Run") on_gpu = theano.config.device == 'gpu' times = 1 if on_gpu: times = 50 for i in range(times): g1 = func1(x_value, ri_value, zi_value) g2 = func2(x_value, ri_value, zi_value) g3 = func3(x_value, ri_value, zi_value) if not on_gpu: for g in [g1, g2, g3]: print map(lambda x: x.sum(), g) for v1, v2 in zip(g1, g2): print numpy.sum(numpy.abs(v1 - v2)) TP.pydotprint(func1, outfile=args.name + "1", scan_graphs=True) TP.pydotprint(func2, outfile=args.name + "2", scan_graphs=True) TP.pydotprint(func3, outfile=args.name + "3", scan_graphs=True) logger.info("Finished")
import theano.tensor as T from theano import function from theano.tensor.shared_randomstreams import RandomStreams import numpy from theano.printing import pydotprint random = RandomStreams(seed=42) a = random.normal((1, 3)) b = T.dmatrix('a') f1 = a * b g1 = function([b], f1) pydotprint(g1, outfile="s9.png", var_with_name_simple=True) print "Invocation 1:", g1(numpy.ones((1, 3))) print "Invocation 2:", g1(numpy.ones((1, 3))) print "Invocation 3:", g1(numpy.ones((1, 3))) # Invocation 1: [[ 1.25614218 -0.53793023 -0.10434045]] # Invocation 2: [[ 0.66992188 -0.70813926 0.99601177]] # Invocation 3: [[ 0.0724739 -0.66508406 0.93707751]]
import theano.tensor as T from theano import function from theano.printing import pydotprint # sigmoid a = T.dmatrix('a') f_a = T.nnet.sigmoid(a) f_sigmoid = function([a], [f_a]) print "sigmoid:", f_sigmoid([[-1, 0, 1]]) pydotprint(f_sigmoid, outfile="s4-1.png", var_with_name_simple=True) # tanh b = T.dmatrix('b') f_b = T.tanh(b) f_tanh = function([b], [f_b]) print "tanh:", f_tanh([[-1, 0, 1]]) pydotprint(f_tanh, outfile="s4-2.png", var_with_name_simple=True) # fast sigmoid c = T.dmatrix('c') f_c = T.nnet.ultra_fast_sigmoid(c) f_fast_sigmoid = function([c], [f_c]) print "fast sigmoid:", f_fast_sigmoid([[-1, 0, 1]]) pydotprint(f_fast_sigmoid, outfile="s4-3.png", var_with_name_simple=True) # softplus d = T.dmatrix('d') f_d = T.nnet.softplus(d) f_softplus = function([d], [f_d]) print "soft plus:", f_softplus([[-1, 0, 1]]) pydotprint(f_softplus, outfile="s4-4.png", var_with_name_simple=True)
def main(args): logging.basicConfig( level=logging.INFO, format="%(asctime)s: %(name)s: %(levelname)s: %(message)s") state = eval(args.prototype)() timings = init_timings() if args.resume != "": logger.debug("Resuming %s" % args.resume) state_file = args.resume + '_state.pkl' timings_file = args.resume + '_timing.npz' if os.path.isfile(state_file) and os.path.isfile(timings_file): logger.debug("Loading previous state") state = cPickle.load(open(state_file, 'r')) timings = dict(numpy.load(open(timings_file, 'r'))) for x, y in timings.items(): timings[x] = list(y) else: raise Exception("Cannot resume, cannot find files!") logger.info("State:\n{}".format(pprint.pformat(state))) logger.info("Timings:\n{}".format(pprint.pformat(timings))) model = SessionEncoderDecoder(state) rng = model.rng if args.resume != "": filename = args.resume + '_model.npz' if os.path.isfile(filename): logger.info("Loading previous model") load(model, filename) else: raise Exception("Cannot resume, cannot find model file!") else: # assign new run_id key model.state['run_id'] = RUN_ID logger.info("Compile trainer") train_batch = model.build_train_function() logger.info("Visualizing") pydotprint(train_batch, 'visualize.png') logger.info("Compile eval") eval_batch = model.build_eval_function() random_sampler = search.RandomSampler(model) logger.info("Load data") train_data, valid_data = get_batch_iterator(rng, state) train_data.start() # Start looping through the dataset step = 0 patience = state['patience'] start_time = time.time() train_cost = 0 train_done = 0 ex_done = 0 while step < state['loop_iters'] and patience >= 0: # Sample stuff if step % 200 == 0: for param in model.params: print "%s = %.4f" % (param.name, numpy.sum(param.get_value()** 2)**0.5) samples, costs = random_sampler.sample([[]], n_samples=1, n_turns=3) print "Sampled : {}".format(samples[0]) # Training phase batch = train_data.next() # Train finished if not batch: # Restart training logger.debug("Got None...") break c = train_batch(batch['x'], batch['y'], batch['max_length'], batch['x_mask']) if numpy.isinf(c) or numpy.isnan(c): logger.warn("Got NaN cost .. skipping") continue train_cost += c train_done += batch['num_preds'] this_time = time.time() if step % state['train_freq'] == 0: elapsed = this_time - start_time h, m, s = ConvertTimedelta(this_time - start_time) print ".. %.2d:%.2d:%.2d %4d mb # %d bs %d maxl %d acc_cost = %.4f" % (h, m, s,\ state['time_stop'] - (time.time() - start_time)/60.,\ step, \ batch['x'].shape[1], \ batch['max_length'], \ float(train_cost/train_done)) if valid_data is not None and\ step % state['valid_freq'] == 0 and step > 1: valid_data.start() valid_cost = 0 valid_done = 0 logger.debug("[VALIDATION START]") while True: batch = valid_data.next() # Train finished if not batch: break if numpy.isinf(c) or numpy.isnan(c): continue c = eval_batch(batch['x'], batch['y'], batch['max_length'], batch['x_mask']) valid_cost += c valid_done += batch['num_preds'] logger.debug("[VALIDATION END]") valid_cost /= valid_done if len(timings["valid"]) == 0 or valid_cost < numpy.min( numpy.array(timings["valid"])): patience = state['patience'] # Saving model if decrease in validation cost save(model, timings) elif valid_cost >= timings["valid"][-1] * state['cost_threshold']: patience -= 1 print "** validation error = %.4f, patience = %d" % ( float(valid_cost), patience) timings["train"].append(train_cost / train_done) timings["valid"].append(valid_cost) # Reset train cost and train done train_cost = 0 train_done = 0 step += 1 logger.debug("All done, exiting...")
from theano.printing import pydotprint a = T.dmatrix('a') b = T.dmatrix('b') c = T.dmatrix('c') d = T.dmatrix('d') p = T.dscalar('p') q = T.dscalar('q') r = T.dscalar('r') s = T.dscalar('s') u = T.dscalar('u') e = (((a * p) + (b - q) - (c + r )) * d/s) * u f = function([a,b,c,d,p,q,r,s,u], e) a_data = numpy.array([[1,1],[1,1]]) b_data = numpy.array([[2,2],[2,2]]) c_data = numpy.array([[5,5],[5,5]]) d_data = numpy.array([[3,3],[3,3]]) print "Expected:", (((a_data * 1.0) + (b_data - 2.0) - (c_data + 3.0 )) * d_data/4.0) * 5.0 print "Via Theano:", f(a_data,b_data,c_data,d_data,1,2,3,4,5) pydotprint(f, outfile="s3.png", var_with_name_simple=True) # Expected: [[-26.25 -26.25] # [-26.25 -26.25]] # Via Theano: [[-26.25 -26.25] # [-26.25 -26.25]]
def main(): numpy.random.seed(1) parser = argparse.ArgumentParser() parser.add_argument('name') args = parser.parse_args() logging.basicConfig(level=logging.DEBUG, stream=sys.stdout, format="%(asctime)s: %(name)s: %(levelname)s: %(message)s") # Parameters from an actual machine tranlation run batch_size = 80 seq_len = 50 n_words = batch_size * seq_len dim = 1000 # Weight matrices W = theano.shared(nr.normal(size=(dim, dim), scale=0.0001).astype("float32")) W.name = 'W' WT = theano.shared(W.get_value().T) WT.name = 'WT' # Variables and their values x = TT.tensor3('x') x_value = nr.normal(size=(seq_len, batch_size, dim), scale=0.0001).astype("float32") # Backward pass def grad_step( # sequences h, mult, # outputs_info e_h_next): h.name = 'h' mult.name = 'mul' e_h_next.name = 'e_h_next' e_pre_h = e_h_next * mult e_pre_h.name = 'e_pre_h' e_h = e_pre_h.dot(WT) e_h.name = 'e_h' return e_h, e_pre_h # Forward pass def rnn_step( # sequences x, # outputs_info h): x.name = 'x' h.name = 'h' pre_h = x + h.dot(W) pre_h.name = 'pre_h' new_h = TT.tanh(pre_h) new_h.name = 'new_h' return new_h h, _ = theano.scan(rnn_step, sequences=[x], n_steps=seq_len, outputs_info=[TT.zeros_like(x[0])], name='fpass') cost = h[-1].sum() grad1 = TT.grad(cost, [W]) mult = 1 - h ** 2 mult.name = 'mult' h = TT.concatenate([ TT.shape_padleft(TT.zeros_like(h[0])), h[:-1]]) h.name = 'h*' (_1, e_pre_h), _2 = theano.scan(grad_step, sequences=[h, mult], n_steps=seq_len, outputs_info=[TT.ones_like(x[0]), None], go_backwards=True, name='bpass') h = h.dimshuffle(2, 0, 1).reshape((dim, n_words)) h.name = 'h_shu' e_pre_h = e_pre_h[::-1].dimshuffle(2, 0, 1).reshape((dim, n_words)).T e_pre_h.name = 'e_pre_h_shu' eW = h.dot(e_pre_h) eW.name = 'eW' grad2 = [eW] logger.info("Compile a function") func1 = theano.function(inputs=[x], outputs=grad1, name="grad1") TP.pydotprint(func1, outfile=args.name, scan_graphs=True) func2 = theano.function(inputs=[x], outputs=grad2, name="grad2") logger.info("Run the function") on_gpu = theano.config.device == 'gpu' times = 1 if on_gpu: times = 50 for i in range(times): g1 = func1(x_value)[0] g2 = func2(x_value)[0] if not on_gpu: print numpy.abs(g1).mean(), numpy.abs(g2).mean(), numpy.abs(g1 - g2).mean() logger.info("Finished")
import theano import theano.tensor as T import numpy as np from theano.printing import pydotprint # define tensor variables X = T.matrix("X") W = T.matrix("W") b_sym = T.vector("b_sym") # define shared random stream trng = T.shared_randomstreams.RandomStreams(1234) d = trng.binomial(size=W[1].shape) results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym) * d, sequences=X) compute_with_bnoise = theano.function(inputs=[X, W, b_sym], outputs=results, updates=updates, allow_input_downcast=True) x = np.eye(10, 2, dtype=theano.config.floatX) w = np.ones((2, 2), dtype=theano.config.floatX) b = np.ones((2), dtype=theano.config.floatX) print(x) print(updates) print(compute_with_bnoise(x, w, b)) pydotprint(results, outfile="results.png", var_with_name_simple=True) pydotprint(compute_with_bnoise, outfile="compute_with_bnoise.png", var_with_name_simple=True)
def TestNormalPrior(): theta = biases + weights s2 = [1.0] * len(theta) logprior = ConstructNormalPrior(theta, s2) pydotprint(logprior, 'priortest.png')
def TestMLP(): X = T.matrix('X') f = T.nnet.sigmoid mlp, W, b = ConstructMLP(X, 10, [5, 4], f) out = function(inputs=[X], outputs=mlp) pydotprint(mlp, 'mlptest.png')
import numpy as np from pprint import pprint ninputs = 1000 nfeatures = 100 noutputs = 10 nhiddens = 50 rng = np.random.RandomState(0) x = T.dmatrix('x') wh = th.shared(rng.normal(0, 1, (nfeatures, nhiddens)), borrow=True) bh = th.shared(np.zeros(nhiddens), borrow=True) h = T.nnet.sigmoid(T.dot(x, wh) + bh) wy = th.shared(rng.normal(0, 1, (nhiddens, noutputs))) by = th.shared(np.zeros(noutputs), borrow=True) y = T.nnet.softmax(T.dot(h, wy) + by) predict = th.function([x], y) pprint(predict) from theano.printing import pydotprint import os if not os.path.exists('examples'): os.makedirs('examples') pydotprint(predict, 'examples/mlp.png') import theano.d3viz as d3v d3v.d3viz(predict, 'examples/mlp.html')
x = T.dmatrix("x") y = T.dvector("y") w = theano.shared(numpy.random.randn(features), name="w") b = theano.shared(0., name="b") p = 1 / (1 + T.exp(-T.dot(x, w) - b)) error = T.nnet.binary_crossentropy(p, y) loss = error.mean() + 0.01 * l2(w) prediction = p > 0.5 gw, gb = T.grad(loss, [w, b]) train = theano.function(inputs=[x, y], outputs=[p, error], updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb))) predict = theano.function(inputs=[x], outputs=prediction) print "Accuracy before Training:", sklearn.metrics.accuracy_score( D[1], predict(D[0])) for i in range(training_steps): prediction, error = train(D[0], D[1]) print "Accuracy before Training:", sklearn.metrics.accuracy_score( D[1], predict(D[0])) pydotprint(predict, outfile="s10.png", var_with_name_simple=True) # Accuracy before Training: 0.481 # Accuracy before Training: 0.629
# theta = pm.Dirichlet('theta', data_exp[:, 0], shape=(1, 4)) # same shape as "beta" before # theta = np.reshape(E_exp, (1, 4)) # def joint(gamma, theta): # return gamma * theta # # L = pm.DensityDist('L', joint, observed={'gamma': gamma, 'theta': theta}) #%% model.check_test_point() #%% p_exp.tag.test_value #%% # graph = pm.graph.graph(model) pydotprint(model.logpt) plt.show() #%% # OLD MODEL # model = pm.Model() # with model: # # # Priors for unknown model parameters # hyper_mu = pm.Normal('hyper_mu', mu=0, sd=10) # hyper_sd = pm.Gamma('hyper_sd', alpha=0.01, beta=0.001) # c = pm.Normal('c', mu=hyper_mu, sd=hyper_sd, shape=(config.K, config.K)) # # p = config.sigmoid(c) #
import theano import theano.tensor as T import theano.printing from theano.printing import pydotprint k = T.iscalar("k") a = T.dscalar("a") result, updates = theano.scan(fn=lambda prior_result, a: prior_result * a, outputs_info=a, non_sequences=a, n_steps=k - 1) final_result = result[-1] a_pow_k = theano.function(inputs=[a, k], outputs=final_result, updates=updates) pydotprint(a_pow_k, outfile="s14.png", var_with_name_simple=True) print a_pow_k(2, 5), 2**5 print a_pow_k(2, 5), 2**5 # 32.0 32
print 'momentum:', momentum updates = [] m = theano.shared(M.get_value() * np.float32(0.)) v = momentum * m - learning_rate * grad updates.append((m, v)) updates.append((M, M + momentum * v - learning_rate * grad)) # ------------------------------------------------------------------------- # Run gradient decent for a given number of epochs train = theano.function(inputs=[], outputs=[loss], updates=updates) pydotprint(train, outfile=save_path + 'train.png', compact=False) epochs = int(1e5) # saves the loss functions values to a *.log file fname = save_path + 'train.log' print 'Writing:', fname f = open(fname, 'w') # training can be interrupted at any time by pressing ^c # all logs until that point and curret matrix obtained will be saved try: for epoch in range(epochs): if epoch == 0: s = '%-10s %10s %10s' % ('time', 'epoch', 'loss (kW/m^3)') print s
def main(): parser = argparse.ArgumentParser() parser.add_argument("name", help="Codename of this run") args = parser.parse_args() logging.basicConfig(level=logging.DEBUG, stream=sys.stdout, format="%(asctime)s: %(name)s: %(levelname)s: %(message)s") # Parameters from an actual machine tranlation run batch_size = 80 seq_len = 50 n_words = 80 * 50 dim = 1000 # Weight matrices U = theano.shared(nr.normal(size=(dim, dim), scale=0.0001).astype("float32")) U.name = 'U' V = theano.shared(U.get_value()) V.name = 'V' W = theano.shared(U.get_value()) W.name = 'W' # Variables and their values x = TT.tensor3('x') x_value = nr.normal(size=(seq_len, batch_size, dim), scale=0.0001).astype("float32") ri = TT.tensor3('ri') ri_value = x_value zi = TT.tensor3('zi') zi_value = x_value init = TT.alloc(numpy.float32(0), batch_size, dim) # My simplified backward pass, that does not # compute gradients w.r. weight matrices. def grad_step( # sequences h, r, z, new_h, # outputs_info e_h_next): # Duplicate forward propagation # pre_r = ri + h.dot(U) # pre_z = zi + h.dot(V) # r = TT.nnet.sigmoid(pre_r) ! # z = TT.nnet.sigmoid(pre_z) ! # after_r = r * h # pre_h = x + after_r.dot(W) # new_h = TT.tanh(pre_h) ! # h_next = z * new_h + (1 - z) * h # Push the gradient through the update gates e_h = (1 - z) * e_h_next e_new_h = z * e_h_next e_z = (new_h - h) * e_h_next # Push the gradient through tanh e_pre_h = e_new_h * (1 - new_h ** 2) # Push the gradint through the reset gates e_after_r = e_pre_h.dot(W.T) e_h += r * e_after_r e_r = h * e_after_r # Push the gate gradients e_pre_r = r * (1 - r) * e_r e_pre_z = z * (1 - z) * e_z e_h += e_pre_r.dot(U.T) e_h += e_pre_z.dot(V.T) return e_h, e_pre_r, e_pre_z, e_pre_h # Forward pass with no extra outputs def rnn_step1( # sequences x, ri, zi, # outputs_info h): pre_r = ri + h.dot(U) pre_z = zi + h.dot(V) r = TT.nnet.sigmoid(pre_r) z = TT.nnet.sigmoid(pre_z) after_r = r * h pre_h = x + after_r.dot(W) new_h = TT.tanh(pre_h) res_h = z * new_h + (1 - z) * h return res_h # Forward pass with extra outputs def rnn_step3( # sequences x, ri, zi, # outputs_info h): pre_r = ri + h.dot(U) pre_z = zi + h.dot(V) r = TT.nnet.sigmoid(pre_r) z = TT.nnet.sigmoid(pre_z) after_r = r * h pre_h = x + after_r.dot(W) new_h = TT.tanh(pre_h) res_h = z * new_h + (1 - z) * h return res_h, r, z, new_h # Gradient computation - method 1 h, _ = theano.scan(rnn_step1, sequences=[x, ri, zi], n_steps=seq_len, outputs_info=init, name='fpass1') cost = h[-1].sum() grad1 = TT.grad(cost, [U, V, W]) # Gradient computation - method 2 res, _ = theano.scan(rnn_step3, sequences=[x, ri, zi], n_steps=seq_len, outputs_info=[init, None, None, None], name='fpass2') def shift_right(x): return TT.concatenate([ TT.shape_padleft(TT.zeros_like(x[0])), x[:-1]]) h, r, z, new_h = res h = shift_right(h) (e_h, e_pre_r, e_pre_z, e_pre_h), _ = theano.scan(grad_step, sequences=[h, r, z, new_h], n_steps=seq_len, go_backwards=True, outputs_info=[TT.ones_like(h[0]), None, None, None], name='bpass2') def reshape(x): return x.dimshuffle(2, 0, 1).reshape((dim, n_words)) (h, r, e_pre_r, e_pre_z, e_pre_h) = map(reshape, [h, r, e_pre_r[::-1], e_pre_z[::-1], e_pre_h[::-1]]) eU = h.dot(e_pre_r.T) eV = h.dot(e_pre_z.T) eW = (h * r).dot(e_pre_h.T) grad2 = [eU, eV, eW] # Gradient computation - method 3 res, _ = theano.scan(rnn_step3, sequences=[x, ri, zi], n_steps=seq_len, outputs_info=[init, None, None, None], name='fpass3') h = res[0] cost = h[-1].sum() grad3 = TT.grad(cost, [U, V, W]) logger.info("Compile functions") func1 = theano.function(inputs=[x, ri, zi], outputs=grad1, name="grad1") func2 = theano.function(inputs=[x, ri, zi], outputs=grad2, name="grad2") func3 = theano.function(inputs=[x, ri, zi], outputs=grad3, name="grad3") logger.info("Run") on_gpu = theano.config.device == 'gpu' times = 1 if on_gpu: times = 50 for i in range(times): g1 = func1(x_value, ri_value, zi_value) g2 = func2(x_value, ri_value, zi_value) g3 = func3(x_value, ri_value, zi_value) if not on_gpu: for g in [g1, g2, g3]: print map(lambda x : x.sum(), g) for v1, v2 in zip(g1, g2): print numpy.sum(numpy.abs(v1 - v2)) TP.pydotprint(func1, outfile=args.name + "1", scan_graphs=True) TP.pydotprint(func2, outfile=args.name + "2", scan_graphs=True) TP.pydotprint(func3, outfile=args.name + "3", scan_graphs=True) logger.info("Finished")
import theano.tensor as T from theano import function from theano.printing import pydotprint # L1 Regularization def l1(x): return T.sum(abs(x)) # L2 Regularization def l2(x): return T.sum(x**2) a = T.dmatrix('a') f_a = l1(a) f_l1 = function([a], f_a) print "L1 Regularization:", f_l1([[0, 1, 3]]) pydotprint(f_l1, outfile="s8-1.png", var_with_name_simple=True) b = T.dmatrix('b') f_b = l2(b) f_l2 = function([b], f_b) print "L2 Regularization:", f_l2([[0, 1, 3]]) pydotprint(f_l2, outfile="s8-2.png", var_with_name_simple=True) # L1 Regularization: 4.0 # L2 Regularization: 10.0