def test_mixed_dtype(self): x1 = tensor.vector('x1', dtype='int32') x2 = tensor.vector('x2', dtype=self.dtype) y1 = tensor.vector('y1', dtype='int32') y2 = tensor.vector('y2', dtype=self.dtype) c = tensor.iscalar('c') f = theano.function([c, x1, x2, y1, y2], ifelse(c, (x1, x2), (y1, y2)), mode=self.mode) self.assertFunctionContains1(f, self.get_ifelse(2)) rng = numpy.random.RandomState(utt.fetch_seed()) xlen = rng.randint(200) ylen = rng.randint(200) vx1 = numpy.asarray(rng.uniform(size=(xlen,)) * 3, 'int32') vx2 = numpy.asarray(rng.uniform(size=(xlen,)), self.dtype) vy1 = numpy.asarray(rng.uniform(size=(ylen,)) * 3, 'int32') vy2 = numpy.asarray(rng.uniform(size=(ylen,)), self.dtype) o1, o2 = f(1, vx1, vx2, vy1, vy2) assert numpy.allclose(vx1, o1) assert numpy.allclose(vx2, o2) o1, o2 = f(0, vx1, vx2, vy1, vy2) assert numpy.allclose(vy1, o1) assert numpy.allclose(vy2, o2)
def test_multiple_out(self): x1 = tensor.vector('x1', dtype=self.dtype) x2 = tensor.vector('x2', dtype=self.dtype) y1 = tensor.vector('y1', dtype=self.dtype) y2 = tensor.vector('y2', dtype=self.dtype) c = tensor.iscalar('c') z = ifelse(c, (x1, x2), (y1, y2)) f = theano.function([c, x1, x2, y1, y2], z, mode=self.mode) self.assertFunctionContains1(f, self.get_ifelse(2)) ifnode = [x for x in f.maker.fgraph.toposort() if isinstance(x.op, IfElse)][0] assert len(ifnode.outputs) == 2 rng = numpy.random.RandomState(utt.fetch_seed()) x1len = rng.randint(200) x2len = rng.randint(200) y1len = rng.randint(200) y2len = rng.randint(200) vx1 = numpy.asarray(rng.uniform(size=(x1len,)), self.dtype) vx2 = numpy.asarray(rng.uniform(size=(x2len,)), self.dtype) vy1 = numpy.asarray(rng.uniform(size=(y1len,)), self.dtype) vy2 = numpy.asarray(rng.uniform(size=(y2len,)), self.dtype) ovx1, ovx2 = f(1, vx1, vx2, vy1, vy2) ovy1, ovy2 = f(0, vx1, vx2, vy1, vy2) assert numpy.allclose(vx1, ovx1) assert numpy.allclose(vy1, ovy1) assert numpy.allclose(vx2, ovx2) assert numpy.allclose(vy2, ovy2)
def test_multiple_out_grad(self): # Tests that we can compute the gradients through lazy if x1 = tensor.vector('x1') x2 = tensor.vector('x2') y1 = tensor.vector('y1') y2 = tensor.vector('y2') c = tensor.iscalar('c') z = ifelse(c, (x1, x2), (y1, y2)) grads = tensor.grad(z[0].sum() + z[1].sum(), [x1, x2, y1, y2]) f = theano.function([c, x1, x2, y1, y2], grads) rng = numpy.random.RandomState(utt.fetch_seed()) lens = [rng.randint(200) for i in range(4)] values = [numpy.asarray(rng.uniform(size=(l,)), theano.config.floatX) for l in lens] outs_1 = f(1, *values) assert all([x.shape[0] == y for x, y in zip(outs_1, lens)]) assert numpy.all(outs_1[0] == 1.) assert numpy.all(outs_1[1] == 1.) assert numpy.all(outs_1[2] == 0.) assert numpy.all(outs_1[3] == 0.) outs_0 = f(0, *values) assert all([x.shape[0] == y for x, y in zip(outs_1, lens)]) assert numpy.all(outs_0[0] == 0.) assert numpy.all(outs_0[1] == 0.) assert numpy.all(outs_0[2] == 1.) assert numpy.all(outs_0[3] == 1.)
def calculate(w1, w2, data, display): x = T.vector('x') w = T.vector('w') s = 1 / (1 + T.exp(-T.dot(x, w))) logistic = theano.function([x, w], s) if display: print("With: w1 = %f and w2 = %f" % (w1, w2)) sum_error = 0 sum_error_square = 0 if isinstance(data, str) or not len(data): if not len(data): data = 'Data.txt' with open('dataFiles/' + data) as fp: reader = csv.reader(fp, delimiter=',') for line in reader: data.append([int(line[0]), float(line[1]), float(line[2])]) if display: print('y\t\tf(x)\t\tE\t\tE^2') for i in range(0, len(data)): x1 = data[i][1] x2 = data[i][2] f = logistic([x1, x2], [w1, w2]) e = data[i][0] - f e2 = e ** 2 sum_error += e sum_error_square += e2 if display: print('%f\t%f\t%f\t%f' % (data[i][0], f, e, e2)) if display: print("\nSum:\t\t\t\t%f\t%f" % (sum_error, sum_error_square)) return sum_error_square
def test_grad_lazy_if(self): # Tests that we can compute the gradients through lazy if x = tensor.vector('x', dtype=self.dtype) y = tensor.vector('y', dtype=self.dtype) c = tensor.iscalar('c') z = ifelse(c, x, y) gx, gy = tensor.grad(z.sum(), [x, y]) f = theano.function([c, x, y], [self.cast_output(gx), self.cast_output(gy)], mode=self.mode) # There is only 2 of the 3 ifelse that are moved on the GPU. # The one that stay on the CPU is for the shape. self.assertFunctionContains(f, self.get_ifelse(1), min=2, max=3) rng = numpy.random.RandomState(utt.fetch_seed()) xlen = rng.randint(200) ylen = rng.randint(200) vx = numpy.asarray(rng.uniform(size=(xlen,)), self.dtype) vy = numpy.asarray(rng.uniform(size=(ylen,)), self.dtype) gx0, gy0 = f(1, vx, vy) assert numpy.allclose(gx0.shape, vx.shape) assert numpy.allclose(gy0.shape, vy.shape) assert numpy.all(numpy.asarray(gx0) == 1.) assert numpy.all(numpy.asarray(gy0) == 0.) gx0, gy0 = f(0, vx, vy) assert numpy.allclose(gx0.shape, vx.shape) assert numpy.allclose(gy0.shape, vy.shape) assert numpy.all(numpy.asarray(gx0) == 0.) assert numpy.all(numpy.asarray(gy0) == 1.)
def test_bincountOp(self): w = T.vector('w') for dtype in ('int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'): # uint64 always fails # int64 and uint32 also fail if python int are 32-bit int_bitwidth = theano.gof.python_int_bitwidth() if int_bitwidth == 64: numpy_unsupported_dtypes = ('uint64',) if int_bitwidth == 32: numpy_unsupported_dtypes = ('uint32', 'int64', 'uint64') x = T.vector('x', dtype=dtype) if dtype in numpy_unsupported_dtypes: self.assertRaises(TypeError, bincount, x) else: a = np.random.random_integers(50, size=(25)).astype(dtype) weights = np.random.random((25,)).astype(config.floatX) f1 = theano.function([x], bincount(x)) f2 = theano.function([x, w], bincount(x, weights=w)) assert (np.bincount(a) == f1(a)).all() assert np.allclose(np.bincount(a, weights=weights), f2(a, weights)) if not numpy_16: continue f3 = theano.function([x], bincount(x, minlength=23)) f4 = theano.function([x], bincount(x, minlength=5)) assert (np.bincount(a, minlength=23) == f3(a)).all() assert (np.bincount(a, minlength=5) == f4(a)).all()
def test_softmax_optimizations_w_bias2(self): x = tensor.matrix('x') b = tensor.vector('b') c = tensor.vector('c') one_of_n = tensor.lvector('one_of_n') op = crossentropy_categorical_1hot env = gof.Env( [x, b, c, one_of_n], [op(softmax(T.add(x,b,c)), one_of_n)]) assert env.outputs[0].owner.op == op print 'BEFORE' for node in env.toposort(): print node.op print '----' theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(env) print 'AFTER' for node in env.toposort(): print node.op print '====' assert len(env.toposort()) == 3 assert str(env.outputs[0].owner.op) == 'OutputGuard' assert env.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
def __init__(self, vocab_size, dim, lr=0.5): W = np.asarray(np.random.rand(vocab_size, dim), dtype=theano.config.floatX) / float(dim) W1 = np.asarray((np.random.rand(vocab_size, dim)), dtype=theano.config.floatX) / float(dim) self.W = theano.shared(W, name='W', borrow=True) self.W1 = theano.shared(W1, name='W1', borrow=True) gW = np.asarray(np.ones((vocab_size, dim)), dtype=theano.config.floatX) gW1 = np.asarray( np.ones((vocab_size, dim)), dtype=theano.config.floatX) self.gW = theano.shared(gW, name='gW', borrow=True) self.gW1 = theano.shared(gW1, name='gW1', borrow=True) X = T.vector() fX = T.vector() ind_W = T.ivector() ind_W1 = T.ivector() w = self.W[ind_W, :] w1 = self.W1[ind_W1, :] cost = T.sum(fX * ((T.sum(w * w1, axis=1) - X) ** 2)) grad = T.clip(T.grad(cost, [w, w1]), -5.0, 5.0) updates1 = [(self.gW, T.inc_subtensor(self.gW[ind_W, :], grad[0] ** 2))] updates2 = [(self.gW1, T.inc_subtensor(self.gW1[ind_W1, :], grad[1] ** 2))] updates3 = [(self.W, T.inc_subtensor(self.W[ind_W, :], - (lr / T.sqrt(self.gW[ind_W, :])) * grad[0]))] updates4 = [(self.W1, T.inc_subtensor(self.W1[ind_W1, :], - (lr / T.sqrt(self.gW1[ind_W1, :])) * grad[1]))] updates = updates1 + updates2 + updates3 + updates4 self.cost_fn = theano.function( inputs=[ind_W, ind_W1, X, fX], outputs=cost, updates=updates)
def _compile_func(): beta = T.vector('beta') b = T.scalar('b') X = T.matrix('X') y = T.vector('y') C = T.scalar('C') params = [beta, b, X, y, C] cost = 0.5 * (T.dot(beta, beta) + b * b) + C * T.sum( T.nnet.softplus( -T.dot(T.diag(y), T.dot(X, beta) + b) ) ) # Function computing in one go the cost, its gradient # with regard to beta and with regard to the bias. cost_grad = theano.function(params,[ cost, T.grad(cost, beta), T.grad(cost, b) ]) # Function for computing element-wise sigmoid, used for # prediction. log_predict = theano.function( [beta, b, X], T.nnet.sigmoid(b + T.dot(X, beta)), on_unused_input='warn' ) return (cost_grad, log_predict)
def test_wrong_coefficient_matrix(self): x = tensor.vector() y = tensor.vector() z = tensor.scalar() b = theano.tensor.nlinalg.lstsq()(x, y, z) f = function([x, y, z], b) self.assertRaises(np.linalg.linalg.LinAlgError, f, [2, 1], [2, 1], 1)
def test_wrong_rcond_dimension(self): x = tensor.vector() y = tensor.vector() z = tensor.vector() b = theano.tensor.nlinalg.lstsq()(x, y, z) f = function([x, y, z], b) self.assertRaises(np.linalg.LinAlgError, f, [2, 1], [2, 1], [2, 1])
def _compile_bp(self): ''' compile backpropagation foreach of the dqns. ''' self.bprop_by_goal = {} for (goal, dqn) in self.dqn_by_goal.items(): states = dqn.states action_values = dqn.action_values params = dqn.params targets = T.vector('target') shared_values = T.vector('shared_values') last_actions = T.lvector('action') # loss function. mse = layers.MSE(action_values[T.arange(action_values.shape[0]), last_actions], targets) \ + T.mean(abs(action_values[T.arange(action_values.shape[0]), last_actions] - shared_values)) # l2 penalty. l2_penalty = 0. for param in params: l2_penalty += (param ** 2).sum() cost = mse + self.l2_reg * l2_penalty # back propagation. updates = optimizers.Adam(cost, params, alpha=self.lr) td_errors = T.sqrt(mse) self.bprop_by_goal[goal] = theano.function(inputs=[states, last_actions, targets, shared_values], outputs=td_errors, updates=updates)
def test_tagging(): brick = TestBrick(0) x = tensor.vector('x') y = tensor.vector('y') z = tensor.vector('z') def check_output_variable(o): assert get_application_call(o).application.brick is brick assert (get_application_call(o.owner.inputs[0]).application.brick is brick) # Case 1: both positional arguments are provided. u, v = brick.apply(x, y) for o in [u, v]: check_output_variable(o) # Case 2: `b` is given as a keyword argument. u, v = brick.apply(x, y=y) for o in [u, v]: check_output_variable(o) # Case 3: two positional and one keyword argument. u, v, w = brick.apply(x, y, z=z) for o in [u, v, w]: check_output_variable(o) # Case 4: one positional argument. u, v = brick.apply(x) check_output_variable(u) assert v == 1 # Case 5: variable was wrapped in a list. We can not handle that. u, v = brick.apply([x]) assert_raises(AttributeError, check_output_variable, u)
def main(save_to, num_batches, continue_=False): mlp = MLP([Tanh(), Identity()], [1, 10, 1], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), seed=1) mlp.initialize() x = tensor.vector('numbers') y = tensor.vector('roots') cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None])) cost.name = "cost" main_loop = MainLoop( GradientDescent( cost=cost, params=ComputationGraph(cost).parameters, step_rule=Scale(learning_rate=0.001)), get_data_stream(range(100)), model=Model(cost), extensions=([LoadFromDump(save_to)] if continue_ else []) + [Timing(), FinishAfter(after_n_batches=num_batches), DataStreamMonitoring( [cost], get_data_stream(range(100, 200)), prefix="test"), TrainingDataMonitoring([cost], after_epoch=True), Dump(save_to), Printing()]) main_loop.run() return main_loop
def test_infer_shape(self): for ndim in [1, 3]: x = T.TensorType(config.floatX, [False] * ndim)() shp = (np.arange(ndim) + 1) * 3 a = np.random.random(shp).astype(config.floatX) for axis in self._possible_axis(ndim): for dtype in ["int8", "uint8", "uint64"]: r_var = T.scalar(dtype=dtype) r = np.asarray(3, dtype=dtype) if dtype in self.numpy_unsupported_dtypes: r_var = T.vector(dtype=dtype) self.assertRaises(TypeError, repeat, x, r_var) else: self._compile_and_check([x, r_var], [RepeatOp(axis=axis)(x, r_var)], [a, r], self.op_class) r_var = T.vector(dtype=dtype) if axis is None: r = np.random.randint( 1, 6, size=a.size).astype(dtype) elif a.size > 0: r = np.random.randint( 1, 6, size=a.shape[axis]).astype(dtype) else: r = np.random.randint( 1, 6, size=(10,)).astype(dtype) self._compile_and_check( [x, r_var], [RepeatOp(axis=axis)(x, r_var)], [a, r], self.op_class)
def test_check_theano_variable(): check_theano_variable(None, 3, 'float') check_theano_variable([[1, 2]], 2, 'int') assert_raises(ValueError, check_theano_variable, tensor.vector(), 2, 'float') assert_raises(ValueError, check_theano_variable, tensor.vector(), 1, 'int')
def test_softmax_optimizations_w_bias_vector(self): x = tensor.vector('x') b = tensor.vector('b') one_of_n = tensor.lvector('one_of_n') op = crossentropy_categorical_1hot fgraph = gof.FunctionGraph( [x, b, one_of_n], [op(softmax(x + b), one_of_n)]) assert fgraph.outputs[0].owner.op == op #print 'BEFORE' #for node in fgraph.toposort(): # print node.op #print printing.pprint(node.outputs[0]) #print '----' theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(fgraph) #print 'AFTER' #for node in fgraph.toposort(): # print node.op #print '====' assert len(fgraph.toposort()) == 3 assert str(fgraph.outputs[0].owner.op) == 'OutputGuard' assert (fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias)
def test_swap_SharedVariable_with_given(self): # A special testcase for logistic_sgd.py in Deep Learning Tutorial # This test assert that SharedVariable in different function have same storage train_x = theano.shared(value=np.random.rand(10, 10).astype(config.floatX)) test_x = theano.shared(value=np.random.rand(10, 10).astype(config.floatX)) train_y = theano.shared(value=np.random.rand(10, 1).astype(config.floatX)) test_y = theano.shared(value=np.random.rand(10, 1).astype(config.floatX)) i = T.iscalar('index') x = T.vector('x') y = T.vector('y') # this formular has no sense but for a test out = (T.sum(x) - y) ** 2 train = theano.function([i], out, givens={x: train_x[i], y: train_y[i]}, updates={train_x: train_x + 0.1}) test_def = theano.function([i], out, givens={x: test_x[i], y: test_y[i]}) test_cpy = train.copy(swap={train_x: test_x, train_y: test_y}, delete_updates=True) for in1, in2 in zip(test_def.maker.inputs, test_cpy.maker.inputs): assert in1.value is in2.value
def test_multiple_outputs(self): m = tensor.matrix('m') v = tensor.vector('v') m_ = tensor.matrix('m_') v_ = tensor.vector('v_') mval = self.rng.uniform(size=(3, 7)).astype(theano.config.floatX) vval = self.rng.uniform(size=(7,)).astype(theano.config.floatX) m_val = self.rng.uniform(size=(3, 7)).astype(theano.config.floatX) v_val = self.rng.uniform(size=(7,)).astype(theano.config.floatX) rop_out1 = tensor.Rop([m, v, m + v], [m, v], [m_, v_]) assert isinstance(rop_out1, list) assert len(rop_out1) == 3 rop_out2 = tensor.Rop((m, v, m + v), [m, v], [m_, v_]) assert isinstance(rop_out2, tuple) assert len(rop_out2) == 3 lop_out1 = tensor.Lop([m, v, m + v], (m, v), [m_, v_]) assert isinstance(lop_out1, tuple) assert len(lop_out1) == 2 lop_out2 = tensor.Lop((m, v, m + v), [m, v], [m_, v_]) assert isinstance(lop_out2, list) assert len(lop_out2) == 2 all_outs = [] for o in rop_out1, rop_out2, lop_out1, lop_out2: all_outs.extend(o) f = theano.function([m, v, m_, v_], all_outs) f(mval, vval, m_val, v_val)
def test_normal_vector(self): rng_R = random_state_type() avg = tensor.vector() std = tensor.vector() post_r, out = normal(rng_R, avg=avg, std=std) assert out.ndim == 1 f = compile.function([rng_R, avg, std], [post_r, out], accept_inplace=True) def as_floatX(thing): return numpy.asarray(thing, dtype=theano.config.floatX) avg_val = [1, 2, 3] std_val = as_floatX([0.1, 0.2, 0.3]) rng = numpy.random.RandomState(utt.fetch_seed()) numpy_rng = numpy.random.RandomState(utt.fetch_seed()) # Arguments of size (3,) rng0, val0 = f(rng, avg_val, std_val) numpy_val0 = as_floatX(numpy_rng.normal(loc=as_floatX(avg_val), scale=as_floatX(std_val))) assert numpy.all(val0 == numpy_val0) # arguments of size (2,) rng1, val1 = f(rng0, avg_val[:-1], std_val[:-1]) numpy_val1 = numpy.asarray(numpy_rng.normal(loc=avg_val[:-1], scale=std_val[:-1]), dtype=theano.config.floatX) assert numpy.all(val1 == numpy_val1) # Specifying the size explicitly g = compile.function([rng_R, avg, std], normal(rng_R, avg=avg, std=std, size=(3,)), accept_inplace=True) rng2, val2 = g(rng1, avg_val, std_val) numpy_val2 = numpy.asarray(numpy_rng.normal(loc=avg_val, scale=std_val, size=(3,)), dtype=theano.config.floatX) assert numpy.all(val2 == numpy_val2) self.assertRaises(ValueError, g, rng2, avg_val[:-1], std_val[:-1])
def test_theano(): import theano import theano.tensor as tt import matplotlib.pyplot as plt lif_params = dict(tau_rc=0.02, tau_ref=0.002, gain=1, bias=1, amp=1. / 63.04) softlif_params = dict(lif_params) softlif_params['sigma'] = 0.01 x = np.linspace(-1, 1) lif = get_theano_fn('lif', lif_params) sx = tt.vector() lif = theano.function([sx], lif(sx)) softlif = get_theano_fn('softlif', softlif_params) sx = tt.vector() softlif = theano.function([sx], softlif(sx)) y_lif = lif(x) y_softlif = softlif(x) plt.figure() plt.plot(x, y_lif) plt.plot(x, y_softlif) plt.show()
def build_model(reg_constant=0.1, var1_name='var1', var2_name='var2'): """ Build MF model in theano :param reg_constant: Regularization constant :param var1_name: Name of first variable (e.g. users) :param var2_name: Name of second variable (e.g. items) :return: theano function implementing MF model """ ratings = T.vector('ratings') var1_vector = T.vector('{}_vector'.format(var1_name)) var2_matrix = T.matrix('{}_matrix'.format(var2_name)) predictions = T.dot(var2_matrix[:, 1:], var1_vector[1:]) + var2_matrix[:, 0] + var1_vector[0] prediction_error = ((ratings - predictions) ** 2).sum() l2_penalty = (var1_vector ** 2).sum() + (var2_matrix ** 2).sum().sum() cost = prediction_error + reg_constant * l2_penalty var1_grad, var2_grad = T.grad(cost, [var1_vector, var2_matrix]) var1_grad /= var2_matrix.shape[0] f = theano.function(inputs=[ratings, var1_vector, var2_matrix], outputs=[cost, var1_grad, var2_grad]) return f
def test_grad_lazy_if(self): # Tests that we can compute the gradients through lazy if x = tensor.vector('x') y = tensor.vector('y') c = tensor.iscalar('c') z = ifelse(c, x, y) gx, gy = tensor.grad(z.sum(), [x, y]) f = theano.function([c, x, y], [gx, gy]) rng = numpy.random.RandomState(utt.fetch_seed()) xlen = rng.randint(200) ylen = rng.randint(200) vx = numpy.asarray(rng.uniform(size=(xlen,)), theano.config.floatX) vy = numpy.asarray(rng.uniform(size=(ylen,)), theano.config.floatX) gx0, gy0 = f(1, vx, vy) assert numpy.allclose(gx0.shape, vx.shape) assert numpy.allclose(gy0.shape, vy.shape) assert numpy.all(gx0 == 1.) assert numpy.all(gy0 == 0.) gx0, gy0 = f(0, vx, vy) assert numpy.allclose(gx0.shape, vx.shape) assert numpy.allclose(gy0.shape, vy.shape) assert numpy.all(gx0 == 0.) assert numpy.all(gy0 == 1.)
def __init__(self,dic_size,window,unit_id,tag_num,net_size,weight_decay,word_dim = 50, learning_rate = 0.1): def f_softplus(x): return T.log(T.exp(x) + 1)# - np.log(2) def f_rectlin(x): return x*(x>0) def f_rectlin2(x): return x*(x>0) + 0.01 * x nonlinear = {'tanh': T.tanh, 'sigmoid': T.nnet.sigmoid, 'softplus': f_softplus, 'rectlin': f_rectlin, 'rectlin2': f_rectlin2} self.non_unit = nonlinear[unit_id] self.weight_decay = weight_decay self.tag_num = tag_num self.window_size = window self.learning_rate = learning_rate self.worddim = word_dim self.w, self.b, self.A = self.init_w(net_size,tag_num) self.w2vtable = self.init_wtable(word_dim,dic_size)#table of word vectors x = T.vector('x') w = [] b = [] for i in range(len(self.w)): w.append(T.matrix()) b.append(T.vector()) output = self.network(x,w,b) og = [] for j in range(self.tag_num): og.extend(T.grad(output[j],w+b+[x])) self.outfunction = theano.function([x]+w+b, output) self.goutfunction = theano.function([x]+w+b,[output]+og)
def test_logpy(): x = tensor.vector() y = tensor.vector() z = tensor.inc_subtensor(x[1:3], y) node = z.owner # otw theano chokes on var attributes when nose tries to print a traceback # XXX this should be un-monkey-patched after the test runs by e.g. a # context manager decorator theano.gof.Apply.__repr__ = object.__repr__ theano.gof.Apply.__str__ = object.__str__ w = dict((name, var(name)) for name in [ 'start', 'stop', 'step', 'set_instead_of_inc', 'inputs', 'outputs', 'inplace', 'whole_op', 'dta', ]) pattern = raw_init(theano.Apply, op=raw_init(theano.tensor.IncSubtensor, idx_list=[slice(w['start'], w['stop'], w['step'])], inplace=w['inplace'], set_instead_of_inc=w['set_instead_of_inc'], destroyhandler_tolerate_aliased=w['dta']), inputs=w['inputs'], outputs=w['outputs']) match, = run(0, w, (eq, node, pattern)) assert match['stop'] == 3 assert match['inputs'] == [x, y]
def test_rmsprop_0(): # input x = TT.vector(name='x') B = theano.shared(floatX(np.ones((3, 5))), name='B') c = theano.shared(floatX(np.ones(3)), name='c') params = [B, c] # output y_pred = TT.nnet.softmax(TT.dot(B, x.T).T + c) y_gold = TT.vector(name="y_gold") # cost and grads cost = TT.sum((y_pred - y_gold)**2) grads = TT.grad(cost, wrt=params) # funcs cost_func, update_func, rms_params = rmsprop(params, grads, [x], y_gold, cost) # check return values assert len(rms_params) == 4 assert isinstance(rms_params[0][0], TT.sharedvar.TensorSharedVariable) assert not np.any(rms_params[0][0].get_value()) # check convergence X = [floatX(np.random.rand(5)) for _ in xrange(N)] Y = [floatX(np.random.rand(3)) for _ in xrange(N)] icost = init_cost = end_cost = 0. for i in xrange(MAX_I): icost = 0. for x, y in zip(X, Y): icost += cost_func(x, y) update_func() if i == 0: init_cost = icost elif i == MAX_I - 1: end_cost = icost assert end_cost < init_cost
def __init__(self, C, D): self.W = theano.shared(np.ones((C,D), dtype='float32')) t_M = T.matrix('M', dtype='float32') t_vM = T.vector('M', dtype='float32') t_Y = T.vector('Y', dtype='float32') t_I = T.vector('I', dtype='float32') t_s = T.vector('s', dtype='float32') t_eps = T.scalar('epsilon', dtype='float32') self.input_integration = theano.function( [t_Y], T.dot(T.log(self.W),t_Y), allow_input_downcast=True ) self.M_summation = theano.function( [t_M], T.sum(t_M, axis=0), allow_input_downcast=True ) self.recurrent_softmax = theano.function( [t_I,t_vM], t_vM*T.exp(t_I)/T.sum(t_vM*T.exp(t_I)), allow_input_downcast=True ) self.weight_update = theano.function( [t_Y,t_s,t_eps], self.W, updates={ self.W: self.W + t_eps*(T.outer(t_s,t_Y) - t_s[:,np.newaxis]*self.W) }, allow_input_downcast=True ) self.epsilon = None self._Y = None self._s = None
def test_lop_override(self, cls_ofg): x = T.vector() y = 1. / (1. + T.exp(-x)) def lop_ov(inps, outs, grads): y_, = outs dedy_, = grads return [2. * y_ * (1. - y_) * dedy_] y_, dedy = T.vector(), T.vector() op_lop_ov = cls_ofg([x, y_, dedy], [2. * y_ * (1. - y_) * dedy]) xx = T.vector() yy1 = T.sum(T.nnet.sigmoid(xx)) gyy1 = 2. * T.grad(yy1, xx) for ov in [lop_ov, op_lop_ov]: op = cls_ofg([x], [y], lop_overrides=ov) yy2 = T.sum(op(xx)) gyy2 = T.grad(yy2, xx) fn = function([xx], [gyy1, gyy2]) xval = np.random.rand(32).astype(config.floatX) y1val, y2val = fn(xval) assert np.allclose(y1val, y2val)
def setup_decoder_step(self): """Advance the decoder by one step. Used at test time.""" y_t = T.lscalar('y_t_for_dec') c_prev = T.vector('c_prev_for_dec') h_prev = T.vector('h_prev_for_dec') h_t = self.spec.f_dec(y_t, c_prev, h_prev) self._decoder_step = theano.function(inputs=[y_t, c_prev, h_prev], outputs=h_t)
def test_uniform_vector(self): rng_R = random_state_type() low = tensor.vector() high = tensor.vector() post_r, out = uniform(rng_R, low=low, high=high) assert out.ndim == 1 f = compile.function([rng_R, low, high], [post_r, out], accept_inplace=True) def as_floatX(thing): return numpy.asarray(thing, dtype=theano.config.floatX) low_val = as_floatX([0.1, 0.2, 0.3]) high_val = as_floatX([1.1, 2.2, 3.3]) rng = numpy.random.RandomState(utt.fetch_seed()) numpy_rng = numpy.random.RandomState(utt.fetch_seed()) # Arguments of size (3,) rng0, val0 = f(rng, low_val, high_val) numpy_val0 = as_floatX(numpy_rng.uniform(low=low_val, high=high_val)) assert numpy.all(val0 == numpy_val0) # arguments of size (2,) rng1, val1 = f(rng0, low_val[:-1], high_val[:-1]) numpy_val1 = as_floatX(numpy_rng.uniform(low=low_val[:-1], high=high_val[:-1])) assert numpy.all(val1 == numpy_val1) # Specifying the size explicitly g = compile.function([rng_R, low, high], uniform(rng_R, low=low, high=high, size=(3,)), accept_inplace=True) rng2, val2 = g(rng1, low_val, high_val) numpy_val2 = as_floatX(numpy_rng.uniform(low=low_val, high=high_val, size=(3,))) assert numpy.all(val2 == numpy_val2) self.assertRaises(ValueError, g, rng2, low_val[:-1], high_val[:-1])
def test_gru_init_val_error(): # check if errors are raised when init is non matrix tensorVariable vector = T.vector() with pytest.raises(ValueError): l_rec = GRULayer(InputLayer((2, 2, 3)), 5, hid_init=vector)
#!/usr/bin/env python # -*- coding: utf-8 -*- import numpy as np import theano import theano.tensor as T # 共有変数の定義 W = theano.shared(np.array([[1, 2, 3], [4, 5, 6]], dtype=theano.config.floatX), name='W', borrow=True) b = theano.shared(np.array([1, 1], dtype=theano.config.floatX), name='b', borrow=True) # 共有変数の取得 print W.get_value() print b.get_value() # シンボルの作成 x = T.vector('x') # シンボルと共有変数を組み立てて数式を定義 y = T.dot(W, x) + b print type(y) # 関数を定義してコンパイル f = theano.function(inputs=[x], outputs=y) print f([1, 1, 1])
def __init__(self, rng, n_in, n_out, n_h, dropout=0, sigma_g=sigmoid, sigma_c=hyperbolic_tangent, sigma_h=hyperbolic_tangent, sigma_y=softmax, dropout_rate=0, obj='c'): ''' :param rng: Numpy RandomState :param n_in: Input dimension (int) :param n_out: Output dimension (int) :param n_h: Hidden dimension (int) :param sigma_g, sigma_c, sigma_h, sigma_y: activation functions :param dropout_rate: dropout rate (float) :param obj: objective type, 'c' for classification with cross entropy loss, 'r' for regression with MSE loss. (['c','r']) ''' Wf_ = rng.uniform(-np.sqrt(6. / (n_in + n_h)), np.sqrt(6. / (n_in + n_h)), (n_h, n_in)) Uf_ = rng.uniform(-np.sqrt(6. / (n_h + n_h)), np.sqrt(6. / (n_h + n_h)), (n_h, n_h)) bf_ = np.zeros(n_h) Wi_ = rng.uniform(-np.sqrt(6. / (n_in + n_h)), np.sqrt(6. / (n_in + n_h)), (n_h, n_in)) Ui_ = rng.uniform(-np.sqrt(6. / (n_h + n_h)), np.sqrt(6. / (n_h + n_h)), (n_h, n_h)) bi_ = np.zeros(n_h) Wo_ = rng.uniform(-np.sqrt(6. / (n_in + n_h)), np.sqrt(6. / (n_in + n_h)), (n_h, n_in)) Uo_ = rng.uniform(-np.sqrt(6. / (n_h + n_h)), np.sqrt(6. / (n_h + n_h)), (n_h, n_h)) bo_ = np.zeros(n_h) Wc_ = rng.uniform(-np.sqrt(6. / (n_in + n_h)), np.sqrt(6. / (n_in + n_h)), (n_h, n_in)) Uc_ = rng.uniform(-np.sqrt(6. / (n_h + n_h)), np.sqrt(6. / (n_h + n_h)), (n_h, n_h)) bc_ = np.zeros(n_h) Wy_ = rng.uniform(-np.sqrt(6. / (n_out + n_h)), np.sqrt(6. / (n_out + n_h)), (n_out, n_h)) by_ = np.zeros(n_out) h0_ = rng.uniform(-np.sqrt(3. / (2. * n_h)), np.sqrt(3. / (2. * n_h)), n_h) c0_ = rng.uniform(-np.sqrt(3. / (2. * n_h)), np.sqrt(3. / (2. * n_h)), n_h) # Theano: Created shared variables Wf = theano.shared(name='Wf', value=Wf_.astype(theano.config.floatX)) Uf = theano.shared(name='Uf', value=Uf_.astype(theano.config.floatX)) bf = theano.shared(name='bf', value=bf_.astype(theano.config.floatX)) Wi = theano.shared(name='Wi', value=Wi_.astype(theano.config.floatX)) Ui = theano.shared(name='Ui', value=Ui_.astype(theano.config.floatX)) bi = theano.shared(name='bi', value=bi_.astype(theano.config.floatX)) Wo = theano.shared(name='Wo', value=Wo_.astype(theano.config.floatX)) Uo = theano.shared(name='Uo', value=Uo_.astype(theano.config.floatX)) bo = theano.shared(name='bo', value=bo_.astype(theano.config.floatX)) Wc = theano.shared(name='Wc', value=Wc_.astype(theano.config.floatX)) Uc = theano.shared(name='Uc', value=Uc_.astype(theano.config.floatX)) bc = theano.shared(name='bc', value=bc_.astype(theano.config.floatX)) Wy = theano.shared(name='Wy', value=Wy_.astype(theano.config.floatX)) by = theano.shared(name='by', value=by_.astype(theano.config.floatX)) h0 = theano.shared(name='h0', value=h0_.astype(theano.config.floatX)) c0 = theano.shared(name='c0', value=c0_.astype(theano.config.floatX)) self.p = [ Wf, Uf, bf, Wi, Ui, bi, Wo, Uo, bo, Wc, Uc, bc, Wy, by, c0, h0 ] seq_len = T.iscalar('seq_len') self.seq_len = seq_len self.x = T.vector() x_scan = T.reshape(self.x, [seq_len, n_in], ndim=2) if dropout_rate > 0: np.random.seed(int(time.time())) # for training def masked_forward_prop_step(x_t, h_t_prev, c_t_prev): f_t = sigma_g(Wf.dot(x_t) + Uf.dot(h_t_prev) + bf) i_t = sigma_g(Wi.dot(x_t) + Ui.dot(h_t_prev) + bi) o_t = sigma_g(Wo.dot(x_t) + Uo.dot(h_t_prev) + bo) c_t = i_t * sigma_c(Wc.dot(x_t) + Uc.dot(h_t_prev) + bc) c_t += c_t_prev * f_t h_t = o_t * sigma_h(c_t) y_t = Wy.dot(h_t) + by mask = np.random.binomial(np.ones(n_h, dtype=int), 1.0 - dropout_rate) masked_h_t = h_t * T.cast(mask, theano.config.floatX) return [y_t, masked_h_t, c_t] # for testing def forward_prop_step(x_t, h_t_prev, c_t_prev): f_t = sigma_g(Wf.dot(x_t) + Uf.dot(h_t_prev) + bf) i_t = sigma_g(Wi.dot(x_t) + Ui.dot(h_t_prev) + bi) o_t = sigma_g(Wo.dot(x_t) + Uo.dot(h_t_prev) + bo) c_t = i_t * sigma_c(Wc.dot(x_t) + Uc.dot(h_t_prev) + bc) c_t += c_t_prev * f_t h_t = o_t * sigma_h(c_t) h_t = (1.0 - dropout_rate) * h_t y_t = Wy.dot(h_t) + by return [y_t, h_t, c_t] [o_train, _, _], _ = theano.scan(masked_forward_prop_step, sequences=[x_scan], outputs_info=[None, h0, c0], n_steps=seq_len) [o_test, _, _], _ = theano.scan(forward_prop_step, sequences=[x_scan], outputs_info=[None, h0, c0], n_steps=seq_len) else: def forward_prop_step(x_t, h_t_prev, c_t_prev): f_t = sigma_g(Wf.dot(x_t) + Uf.dot(h_t_prev) + bf) i_t = sigma_g(Wi.dot(x_t) + Ui.dot(h_t_prev) + bi) o_t = sigma_g(Wo.dot(x_t) + Uo.dot(h_t_prev) + bo) c_t = i_t * sigma_c(Wc.dot(x_t) + Uc.dot(h_t_prev) + bc) c_t += c_t_prev * f_t h_t = o_t * sigma_h(c_t) y_t = Wy.dot(h_t) + by return [y_t, h_t, c_t] [o_train, _, _], _ = theano.scan(forward_prop_step, sequences=[x_scan], outputs_info=[None, h0, c0], n_steps=seq_len) o_test = o_train if obj == 'c': # classification task self.y = T.bscalar('y') self.o_train = sigma_y(o_train[-1]) self.o_test = sigma_y(o_test[-1]) #obj function to compute grad, use dropout self.cost = T.nnet.categorical_crossentropy( self.o_train, T.eye(n_out)[self.y]) #compute accuracy use average of dropout rate self.accuracy = T.switch(T.eq(T.argmax(self.o_test), self.y), 1., 0.) self.prediction = np.argmax(self.o_test) elif obj == 'r': # regression task self.y = T.dscalar('y') self.o_train = o_train[-1] self.o_test = o_test[-1] #obj function to compute grad, use dropout self.cost = (self.o_train[0] - self.y)**2 #compute accuracy use average of dropout rate self.accuracy = (self.o_test[0] - self.y)**2 self.prediction = self.o_test[0] self.optimiser = sgd_optimizer(self, 'LSTM')
import theano import theano.tensor as tt rng = np.random N = 400 feats = 784 D = ( rng.randn(N, feats).astype(theano.config.floatX), rng.randint(size=N, low=0, high=2).astype(theano.config.floatX), ) training_steps = 10000 # Declare Theano symbolic variables x = tt.matrix("x") y = tt.vector("y") w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w") b = theano.shared(np.asarray(0.0, dtype=theano.config.floatX), name="b") x.tag.test_value = D[0] y.tag.test_value = D[1] # print "Initial model:" # print w.get_value(), b.get_value() # Construct Theano expression graph p_1 = 1 / (1 + tt.exp(-tt.dot(x, w) - b)) # Probability of having a one prediction = p_1 > 0.5 # The prediction that is done: 0 or 1 xent = -y * tt.log(p_1) - (1 - y) * tt.log(1 - p_1) # Cross-entropy cost = xent.mean() + 0.01 * (w ** 2).sum() # The cost to optimize gw, gb = tt.grad(cost, [w, b])
def init_opt(self): is_recurrent = int(self.policy.recurrent) # Init dual param values self.param_eta = 15. # Adjust for linear feature vector. self.param_v = np.random.rand(self.env.observation_space.flat_dim * 2 + 4) # Theano vars obs_var = self.env.observation_space.new_tensor_variable( 'obs', extra_dims=1 + is_recurrent, ) action_var = self.env.action_space.new_tensor_variable( 'action', extra_dims=1 + is_recurrent, ) rewards = ext.new_tensor( 'rewards', ndim=1 + is_recurrent, dtype=theano.config.floatX, ) # Feature difference variable representing the difference in feature # value of the next observation and the current observation \phi(s') - # \phi(s). feat_diff = ext.new_tensor('feat_diff', ndim=2 + is_recurrent, dtype=theano.config.floatX) param_v = TT.vector('param_v') param_eta = TT.scalar('eta') valid_var = TT.matrix('valid') state_info_vars = { k: ext.new_tensor(k, ndim=2 + is_recurrent, dtype=theano.config.floatX) for k in self.policy.state_info_keys } state_info_vars_list = [ state_info_vars[k] for k in self.policy.state_info_keys ] # Policy-related symbolics dist_info_vars = self.policy.dist_info_sym(obs_var, state_info_vars) dist = self.policy.distribution # log of the policy dist logli = dist.log_likelihood_sym(action_var, dist_info_vars) # Symbolic sample Bellman error delta_v = rewards + TT.dot(feat_diff, param_v) # Policy loss (negative because we minimize) if is_recurrent: loss = -TT.sum(logli * TT.exp(delta_v / param_eta - TT.max(delta_v / param_eta)) * valid_var) / TT.sum(valid_var) else: loss = -TT.mean(logli * TT.exp(delta_v / param_eta - TT.max(delta_v / param_eta))) # Add regularization to loss. reg_params = self.policy.get_params(regularizable=True) loss += self.L2_reg_loss * TT.sum( [TT.mean(TT.square(param)) for param in reg_params]) / len(reg_params) # Policy loss gradient. loss_grad = TT.grad(loss, self.policy.get_params(trainable=True)) if is_recurrent: recurrent_vars = [valid_var] else: recurrent_vars = [] input = [ rewards, obs_var, feat_diff, action_var ] + state_info_vars_list + recurrent_vars + [param_eta, param_v] # if is_recurrent: # input += f_loss = ext.compile_function( inputs=input, outputs=loss, ) f_loss_grad = ext.compile_function( inputs=input, outputs=loss_grad, ) # Debug prints old_dist_info_vars = { k: ext.new_tensor('old_%s' % k, ndim=2 + is_recurrent, dtype=theano.config.floatX) for k in dist.dist_info_keys } old_dist_info_vars_list = [ old_dist_info_vars[k] for k in dist.dist_info_keys ] if is_recurrent: mean_kl = TT.sum( dist.kl_sym(old_dist_info_vars, dist_info_vars) * valid_var) / TT.sum(valid_var) else: mean_kl = TT.mean(dist.kl_sym(old_dist_info_vars, dist_info_vars)) f_kl = ext.compile_function( inputs=[obs_var, action_var] + state_info_vars_list + old_dist_info_vars_list + recurrent_vars, outputs=mean_kl, ) # Dual-related symbolics # Symbolic dual if is_recurrent: dual = param_eta * self.epsilon + \ param_eta * TT.log( TT.sum( TT.exp( delta_v / param_eta - TT.max(delta_v / param_eta) ) * valid_var ) / TT.sum(valid_var) ) + param_eta * TT.max(delta_v / param_eta) else: dual = param_eta * self.epsilon + \ param_eta * TT.log( TT.mean( TT.exp( delta_v / param_eta - TT.max(delta_v / param_eta) ) ) ) + param_eta * TT.max(delta_v / param_eta) # Add L2 regularization. dual += self.L2_reg_dual * \ (TT.square(param_eta) + TT.square(1 / param_eta)) # Symbolic dual gradient dual_grad = TT.grad(cost=dual, wrt=[param_eta, param_v]) # Eval functions. f_dual = ext.compile_function(inputs=[rewards, feat_diff] + state_info_vars_list + recurrent_vars + [param_eta, param_v], outputs=dual) f_dual_grad = ext.compile_function( inputs=[rewards, feat_diff] + state_info_vars_list + recurrent_vars + [param_eta, param_v], outputs=dual_grad) self.opt_info = dict(f_loss_grad=f_loss_grad, f_loss=f_loss, f_dual=f_dual, f_dual_grad=f_dual_grad, f_kl=f_kl)
import numpy import theano import theano.tensor as T rng = numpy.random N = 400 # training sample size feats = 784 # number of input variables # generate a dataset: D = (input_values, target_class) D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2)) training_steps = 10000 # Declare Theano symbolic variables x = T.matrix("x") y = T.vector("y") # initialize the weight vector w randomly # # this and the following bias variable b # are shared so they keep their values # between training iterations (updates) w = theano.shared(rng.randn(feats), name="w") # initialize the bias term b = theano.shared(0., name="b") print("Initial model:") print(w.get_value()) print(b.get_value())
def test_recurrent_init_val_error(): # check if errors are raised when init is non matrix tensor hid_init = T.vector() with pytest.raises(ValueError): l_rec = RecurrentLayer(InputLayer((2, 2, 3)), 5, hid_init=hid_init)
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, initial_momentum = 0.5, datasets="datasets", train_batch_size=20, n_hidden=[500,200,100], p=0.5, dropout=False, input_p=None, drug_name=None, OUT_FOLDER="OUT_FOLDER"): #Demonstrate stochastic gradient descent optimization for a multilayer #perceptron train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] #erlo_x, erlo_y = datasets[3] #MODIFIED valid_batch_size = valid_set_x.get_value(borrow=True).shape[0] test_batch_size= test_set_x.get_value(borrow=True).shape[0] N_IN=valid_set_x.get_value(borrow=True).shape[1] train_samples = train_set_x.get_value(borrow=True).shape[0] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / train_batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / valid_batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / test_batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar("i") # index to a [mini]batch vector = T.vector("v", dtype='int32') x = T.matrix('x') y = T.vector('y') is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction rng = np.random.RandomState(1234) # construct the MLP class N_HIDDEN = ".".join([str(NN) for NN in n_hidden]) classifier = MLP( rng=rng, is_train = is_train, input=x, n_in=N_IN, #FIXED !!!!!! n_hidden=n_hidden, n_out=2, p=p, dropout=dropout, input_p=input_p #, batch_size=batch_size ) #classifier.negative_log_likelihood(y) cost = ( classifier.errors(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), #negative_log_likelihood(y) givens={ x: valid_set_x[index * valid_batch_size:(index + 1) * valid_batch_size], y: valid_set_y[index * valid_batch_size:(index + 1) * valid_batch_size], is_train: np.cast['int32'](0) }, on_unused_input='warn', ) test_cor = theano.function( inputs=[index], outputs=classifier.loss(y), givens={ x: test_set_x[index * test_batch_size:(index + 1) * test_batch_size], y: test_set_y[index * test_batch_size:(index + 1) * test_batch_size], is_train: np.cast['int32'](0) }, on_unused_input='warn', ) test_nrmse = theano.function( inputs=[index], outputs=classifier.NRMSE(y), givens={ x: test_set_x[index * test_batch_size:(index + 1) * test_batch_size], y: test_set_y[index * test_batch_size:(index + 1) * test_batch_size], is_train: np.cast['int32'](0) }, on_unused_input='warn', ) test_pred = theano.function( inputs=[index], outputs=classifier.pred(y), givens={ x: test_set_x[index * test_batch_size:(index + 1) * test_batch_size], y: test_set_y[index * test_batch_size:(index + 1) * test_batch_size], is_train: np.cast['int32'](0) }, on_unused_input='warn', ) ################################### #learning rate to shared learning_rate = theano.shared(np.cast[theano.config.floatX](learning_rate) ) # momentum implementation stolen from # http://nbviewer.ipython.org/github/craffel/theano-tutorial/blob/master/Theano%20Tutorial.ipynb assert initial_momentum >= 0. and initial_momentum < 1. momentum =theano.shared(np.cast[theano.config.floatX](initial_momentum), name='momentum', borrow=True) # List of update steps for each parameter updates = [] #Just gradient descent on cost for param in classifier.params: # For each parameter, we'll create a param_update shared variable. # This variable will keep track of the parameter's update step across iterations. # We initialize it to 0 param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable, borrow=True) # Each parameter is updated by taking a step in the direction of the gradient. # However, we also "mix in" the previous step according to the given momentum value. # Note that when updating param_update, we are using its old value and also the new gradient step. updates.append((param, param - learning_rate*param_update)) # Note that we don't need to derive backpropagation to compute updates - just use T.grad! updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param)/(2*train_batch_size) )) """ gparams = [T.grad(cost, param) for param in classifier.params] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] """ train_model = theano.function( inputs=[vector], outputs=cost, updates=updates, givens={ x: train_set_x[vector,], y: train_set_y[vector,], is_train: np.cast['int32'](1) }, on_unused_input='warn', ) train_error = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: train_set_x[index * train_batch_size:(index + 1) * train_batch_size], y: train_set_y[index * train_batch_size:(index + 1) * train_batch_size], is_train: np.cast['int32'](0) }, on_unused_input='warn', ) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 18000000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is found improvement_threshold = 0.995 # a relative improvement of this much is considered significant (default = 0.995) validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = np.inf best_iter = 0 start_time = timeit.default_timer() epoch = 0 done_looping = False test_loss = 1 test_pear = 0 LR_COUNT = 1 # STORE_FILE="_LR"+str(learning_rate)+"_EPOCHS"+str(n_epochs) + "_BATCH_SIZE"+str(train_batch_size) + \ # "_N_HIDDEN"+str(N_HIDDEN)+"_DROPOUT"+str(dropout)+"_P"+str(p)+"_IP"+str(input_p) # # STORE_RESULTS=open(OUT_FOLDER +"/"+ drug_name + STORE_FILE, "w") # STORE_RESULTS.write("LR"+"\t"+"EPOCHS"+"\t"+"BATCH_SIZE"+"\t"+ # "L1"+"\t"+"L2"+"\t"+"N_HIDDEN"+"\t"+"P_HIDDEN"+"\t"+"DROPOUT"+"\t"+ "INPUT_DROPOUT"+"\t"+ # "EPOCH_N"+"\t"+"BATCH_TYPE" + "\t" +"LOSS") FILE_OUT = open(OUT_FOLDER + "/combined_D." + drug_name + ".txt", "w") FILE_OUT.write("EPOCH" + "\t" + "TRAIN"+ "\t"+"VALID.ERROR" + "\t" + "TEST.COR" + "\t" + "TEST.NRMSE") FILE_OUT.close() FILE_OUT_val = open(OUT_FOLDER + "/combined_D_values." + drug_name + ".txt", "w") FILE_OUT_val.write("EPOCH" +"\t" + "ACTUAL" +"\t"+"PREDICTED") FILE_OUT_val.close() with open(OUT_FOLDER + "/log." + drug_name + ".txt", "w") as logfile: logfile.write("") EPOCH_SIZE = n_train_batches while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 # print "momentum: ", momentum.get_value() # print "learning rate: ", learning_rate.get_value() log = "momentum: " + str(momentum.get_value()) + "; learning_rate: " + str(learning_rate.get_value()) with open(OUT_FOLDER + "/log." + drug_name + ".txt", "a") as logfile: logfile.write(log + "\n") # if LR_COUNT==1000: # new_learning_rate = learning_rate.get_value() * 0.2 # print new_learning_rate # learning_rate.set_value(np.cast[theano.config.floatX](new_learning_rate)) #for minibatch_index in xrange(n_train_batches): for minibatch_index in xrange(EPOCH_SIZE): ran_index = list(np.random.randint(low=0, high=train_samples-1, size=train_batch_size)) minibatch_avg_cost = train_model(ran_index) rescale_weights(classifier.param_to_scale, 15.) # iteration number #iter = (epoch - 1) * n_train_batches + minibatch_index #if (iter + 1) % validation_frequency == 0: if (minibatch_index + 1) % EPOCH_SIZE == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) this_train_error = [train_error(i) for i in xrange(n_train_batches)] this_train_error = np.mean(this_train_error) log = ('epoch %i, minibatch %i/%i, train error %f ,validation error %f %%' % ( epoch, minibatch_index + 1, EPOCH_SIZE, this_train_error , this_validation_loss )) # print(log) with open(OUT_FOLDER + "/log." + drug_name + ".txt", "a") as logfile: logfile.write(log + "\n") with open(OUT_FOLDER + "/combined_D." + drug_name + ".txt", "a") as FILE_OUT: FILE_OUT.write("\n"+ str(epoch) + "\t" + str(this_train_error) + "\t"+ str(this_validation_loss) \ +"\t" +str(test_pear) + "\t" + str(test_loss)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: LR_COUNT = 0 #improve patience if loss improvement is good enough # if ( # this_validation_loss < best_validation_loss * # improvement_threshold # ): # patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [test_nrmse(i) for i in xrange(n_test_batches)] test_loss = np.mean(test_losses) test_pears = [test_cor(i) for i in xrange(n_test_batches)] test_pear = np.mean(test_pears) log = ((' epoch %i, minibatch %i/%i, test error of ' 'best nrmse and pear %f,%f %%') % (epoch, minibatch_index + 1, EPOCH_SIZE, test_loss, test_pear)) # print(log) with open(OUT_FOLDER + "/log." + drug_name + ".txt", "a") as logfile: logfile.write(log + "\n") #ONLY SAVE MODEL if validation improves MODEL = [classifier.linearRegressionLayer] for e in xrange(len(n_hidden)): MODEL = MODEL + [getattr(classifier, "layer_" + str(e))] MODEL = MODEL + [rng] with open(OUT_FOLDER + "/" + drug_name + ".pkl", "wb") as f: cPickle.dump(MODEL, f) #Only write if validation improvement ACTUAL = test_set_y.get_value() PREDICTED = [test_pred(i) for i in xrange(n_test_batches)][0] with open(OUT_FOLDER + "/combined_D_values." + drug_name + ".txt", "a") as FILE_OUT_val: for l in xrange(len(ACTUAL)): FILE_OUT_val.write("\n" + str(epoch) + "\t" + str(ACTUAL[l]) + "\t" + str(PREDICTED[l])) else: LR_COUNT = LR_COUNT+1 # if patience <= iter: # done_looping = True # break # if LR_COUNT==100: # done_looping = True # break # adaption of momentum if momentum.get_value() < 0.99: new_momentum = 1. - (1. - momentum.get_value()) * 0.999 momentum.set_value(np.cast[theano.config.floatX](new_momentum)) # adaption of learning rate new_learning_rate = learning_rate.get_value() * 0.998 learning_rate.set_value(np.cast[theano.config.floatX](new_learning_rate)) # if epoch%500 == 0: # new_learning_rate = learning_rate.get_value() * 0.1 # learning_rate.set_value(np.cast[theano.config.floatX](new_learning_rate)) end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss, best_iter, test_pear )) print >> sys.stderr, ('The code for file ' + os.path.split("__file__")[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
import theano import theano.tensor as T import numpy as np import random x = T.vector() w1 = theano.shared(np.array([1.,1.])) w2 = theano.shared(np.array([1.,1.])) b1 = theano.shared(1.) b2 = theano.shared(1.) z1 = T.dot(w1,x) + b1 z2 = T.dot(w2,x) + b2 a1 = 1/(1 + T.exp(-z1)) a2 = 1/(1 + T.exp(-z2)) w3 = theano.shared(np.array([1.,1.])) b3 = theano.shared(1.) z3 = T.dot(w3,[a1,a2]) + b3 y = 1/(1 + T.exp(-z3)) y_hat = T.scalar() neuron = theano.function(inputs = [x],outputs = y) cost = T.sum((y-y_hat)**2) cost_function = theano.function(inputs = [y,y_hat],outputs = cost) dw1,db1,dw2,db2,dw3,db3 = T.grad(cost,[w1,b1,w2,b2,w3,b3]) gradient = theano.function( inputs = [x,y_hat], updates = [(w1,w1-1*dw1),(b1,b1-1*db1), (w2,w2-1*dw2),(b2,b2-1*db2),
import numpy as np import theano.tensor as T import theano as th th.config.openmp_elemwise_minsize = 1000 th.config.openmp = True x = T.vector('x') y = T.vector('y') hit_test = x**2 + y**2 < 1 hits = hit_test.sum() misses = x.shape[0] pi_est = 4 * hits / misses calculate_pi = th.function([x, y], pi_est) x_val = np.random.uniform(-1, 1, 30000) y_val = np.random.uniform(-1, 1, 30000) import timeit res = timeit.timeit("calculate_pi(x_val, y_val)", "from __main__ import x_val, y_val, calculate_pi", number=100000) print(res)
mnist = MNIST() # create the basic layer layer1 = Dense(inputs=((None, 28 * 28), matrix("x")), outputs=1000, activation='linear') layer1_act = Activation(inputs=((None, 1000), layer1.get_outputs()), activation='relu') # create the softmax classifier layer2 = Softmax(inputs=((None, 1000), layer1_act.get_outputs()), outputs=10, out_as_probs=True) # create the mlp from the two layers mlp = Prototype(layers=[layer1, layer1_act, layer2]) # define the loss function loss = Neg_LL(inputs=mlp.get_outputs(), targets=vector("y", dtype="int64"), one_hot=False) #plot the loss if BOKEH_AVAILABLE: plot = Plot("mlp_mnist", monitor_channels=Monitor("loss", loss.get_loss()), open_browser=True) else: plot = None # make an optimizer to train it (AdaDelta is a good default) # optimizer = AdaDelta(model=mlp, dataset=mnist, n_epoch=20) optimizer = AdaDelta(dataset=mnist, loss=loss, epochs=20) # perform training! # optimizer.train()
from sklearn.datasets import make_classification from sklearn.cross_validation import train_test_split from sklearn.metrics import classification_report rng = np.random.RandomState(1999) X, y = make_classification(n_samples=400, n_features=25, n_informative=10, n_classes=2, n_clusters_per_class=2, random_state=1999) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=.8) n_samples, n_features = X_train.shape x = T.matrix('x') y = T.vector('y') w = theano.shared(rng.randn(n_features), name='w') b = theano.shared(0., name='b') print("Initial model") print(w.get_value(), b.get_value()) learning_rate = 0.01 reg = .1 n_iter = 10000 prob = 1 / (1 + T.exp(-T.dot(x, w) - b)) pred = prob > 0.5 loss = -y * T.log(prob) - (1 - y) * T.log(1 - prob) # l2 # penalty = reg * (w ** 2).sum()
def convert_variable(x): if x.ndim == 1: return T.vector(x.name, dtype=x.dtype) else: return T.matrix(x.name, dtype=x.dtype)
def __init__(self, D, K, hidden_layer_sizes, gamma, max_experiences=10000, min_experiences=100, batch_sz=32): self.K = K lr = 10e-3 mu = 0. decay = 0.99 # create the graph self.layers = [] M1 = D for M2 in hidden_layer_sizes: layer = HiddenLayer(M1, M2) self.layers.append(layer) M1 = M2 # final layer layer = HiddenLayer(M1, K, lambda x: x) self.layers.append(layer) # collect params for copy self.params = [] for layer in self.layers: self.params += layer.params caches = [theano.shared(np.ones_like(p.get_value())*0.1) for p in self.params] velocities = [theano.shared(p.get_value()*0) for p in self.params] # inputs and targets X = T.matrix('X') G = T.vector('G') actions = T.ivector('actions') # calculate output and cost Z = X for layer in self.layers: Z = layer.forward(Z) Y_hat = Z selected_action_values = Y_hat[T.arange(actions.shape[0]), actions] cost = T.sum((G - selected_action_values)**2) # create train function grads = T.grad(cost, self.params) g_update = [(p, p + v) for p, v, g in zip(self.params, velocities, grads)] c_update = [(c, decay*c + (1 - decay)*g*g) for c, g in zip(caches, grads)] v_update = [(v, mu*v - lr*g / T.sqrt(c)) for v, c, g in zip(velocities, caches, grads)] # v_update = [(v, mu*v - lr*g) for v, g in zip(velocities, grads)] # c_update = [] updates = c_update + g_update + v_update # compile functions self.train_op = theano.function( inputs=[X, G, actions], updates=updates, allow_input_downcast=True ) self.predict_op = theano.function( inputs=[X], outputs=Y_hat, allow_input_downcast=True ) # create replay memory self.experience = {'s': [], 'a': [], 'r': [], 's2': [], 'done': []} self.max_experiences = max_experiences self.min_experiences = min_experiences self.batch_sz = batch_sz self.gamma = gamma
def augment_system(ode_func, n_states, n_theta): """ Function to create augmented system. Take a function which specifies a set of differential equations and return a compiled function which allows for computation of gradients of the differential equation's solition with repsect to the parameters. Uses float64 even if floatX=float32, because the scipy integrator always uses float64. Parameters ---------- ode_func : function Differential equation. Returns array-like. n_states : int Number of rows of the sensitivity matrix. (n_states) n_theta : int Number of ODE parameters Returns ------- system : function Augemted system of differential equations. """ # Present state of the system t_y = tt.vector("y", dtype='float64') t_y.tag.test_value = np.ones((n_states, ), dtype='float64') # Parameter(s). Should be vector to allow for generaliztion to multiparameter # systems of ODEs. Is m dimensional because it includes all initial conditions as well as ode parameters t_p = tt.vector("p", dtype='float64') t_p.tag.test_value = np.ones((n_states + n_theta, ), dtype='float64') # Time. Allow for non-automonous systems of ODEs to be analyzed t_t = tt.scalar("t", dtype='float64') t_t.tag.test_value = 2.459 # Present state of the gradients: # Will always be 0 unless the parameter is the inital condition # Entry i,j is partial of y[i] wrt to p[j] dydp_vec = tt.vector("dydp", dtype='float64') dydp_vec.tag.test_value = make_sens_ic(n_states, n_theta, 'float64') dydp = dydp_vec.reshape((n_states, n_states + n_theta)) # Get symbolic representation of the ODEs by passing tensors for y, t and theta yhat = ode_func(t_y, t_t, t_p[n_states:]) # Stack the results of the ode_func into a single tensor variable if not isinstance(yhat, (list, tuple)): yhat = (yhat, ) t_yhat = tt.stack(yhat, axis=0) # Now compute gradients J = tt.jacobian(t_yhat, t_y) Jdfdy = tt.dot(J, dydp) grad_f = tt.jacobian(t_yhat, t_p) # This is the time derivative of dydp ddt_dydp = (Jdfdy + grad_f).flatten() system = theano.function(inputs=[t_y, t_t, t_p, dydp_vec], outputs=[t_yhat, ddt_dydp], on_unused_input="ignore") return system
def build(self): E, V, U, W, b, c = self.E, self.V, self.U, self.W, self.b, self.c x = T.ivector('x') y = T.ivector('y') # negy is the negative sampling for blackout # shape (len(y),k) negy = T.imatrix('negy') q_w = T.vector('q_w') def _recurrence(x_t, y_t, neg_y_t, s_t1_prev, s_t2_prev, q_w): # Word embedding layer # E hidden word_dim/vocab_dim x_e = E[:, x_t] # GRU Layer 1 z_t1 = T.nnet.hard_sigmoid(U[0].dot(x_e) + W[0].dot(s_t1_prev) + b[0]) r_t1 = T.nnet.hard_sigmoid(U[1].dot(x_e) + W[1].dot(s_t1_prev) + b[1]) c_t1 = T.tanh(U[2].dot(x_e) + W[2].dot(s_t1_prev * r_t1) + b[2]) s_t1 = (T.ones_like(z_t1) - z_t1) * c_t1 + z_t1 * s_t1_prev # GRU Layer 2 z_t2 = T.nnet.hard_sigmoid(U[3].dot(s_t1) + W[3].dot(s_t2_prev) + b[3]) r_t2 = T.nnet.hard_sigmoid(U[4].dot(s_t1) + W[4].dot(s_t2_prev) + b[4]) c_t2 = T.tanh(U[5].dot(s_t1) + W[5].dot(s_t2_prev * r_t2) + b[5]) s_t2 = (T.ones_like(z_t2) - z_t2) * c_t2 + z_t2 * s_t2_prev # probability of output o_t # o_t = T.nnet.softmax(V.dot(s_t2) + c)[0] # blackout version output probability # correct word probability (1,1) c_o_t = T.exp(V[y_t].dot(s_t2) + c[y_t]) # negative word probability (k,1) n_o_t = T.exp(V[neg_y_t].dot(s_t2) + c[neg_y_t]) # sample set probability t_o = (q_w[y_t] * c_o_t) + T.sum(q_w[neg_y_t] * n_o_t) # positive probability c_o_p = q_w[y_t] * c_o_t / t_o # negative probability (k,1) n_o_p = q_w[neg_y_t] * n_o_t / t_o # cost for each y in blackout J_dis = -(T.log(c_o_p) + T.sum(T.log(T.ones_like(n_o_p) - n_o_p))) # blackout version discriminative objective function return [J_dis, s_t1, s_t2] [J, _, _], updates = theano.scan(fn=_recurrence, sequences=[x, y, negy], truncate_gradient=self.bptt_truncate, outputs_info=[ None, dict(initial=T.zeros(self.hidden_dim)), dict(initial=T.zeros(self.hidden_dim)) ], non_sequences=q_w) cost = T.sum(J) lr = T.scalar("lr") gparams = [T.clip(T.grad(cost, p), -10, 10) for p in self.params] updates = sgd(self.params, gparams, lr) self.train = theano.function(inputs=[x, y, negy, q_w, lr], outputs=cost, updates=updates)
def build_model(self, tparams, optionsInp): trng = RandomStreams(1234) options = copy(optionsInp) if 'en_aux_inp' in options: options.pop('en_aux_inp') # Used for dropout. self.use_noise = theano.shared(numpy_floatX(0.)) xW = T.matrix('xW', dtype='int64') mask = T.vector('mask', dtype='int64') n_Rwords = xW.shape[0] n_samples = xW.shape[1] embW = tparams['Wemb'][xW.flatten()].reshape( [n_Rwords, n_samples, options['word_encoding_size']]) xI = T.matrix('xI', dtype=config.floatX) if options.get('multimodal_lstm', 0) == 1: embImg = T.dot(xI, tparams['WIemb']) + tparams['b_Img'] embImg = T.shape_padleft(T.extra_ops.repeat(embImg, n_samples, axis=0), n_ones=1) emb = T.concatenate([embImg, embW], axis=0) else: emb = embW #This is implementation of input dropout !! if options['use_dropout']: emb = dropout_layer(emb, self.use_noise, trng, options['drop_prob_encoder'], shp=emb.shape) # This implements core lstm rval, updatesLSTM = basic_lstm_layer(tparams, emb, [], self.use_noise, options, prefix='lstm') if options['use_dropout']: p = dropout_layer( sliceT( rval[0][mask + options.get('multimodal_lstm', 0), T.arange(mask.shape[0]), :], options.get('hidden_depth', 1) - 1, options['hidden_size']), self.use_noise, trng, options['drop_prob_decoder'], (n_samples, options['hidden_size'])) else: p = sliceT( rval[0][mask + options.get('multimodal_lstm', 0), T.arange(mask.shape[0]), :], options.get('hidden_depth', 1) - 1, options['hidden_size']) if options.get('multimodal_lstm', 0) == 0: sent_emb = (T.dot(p, tparams['Wd']) + tparams['bd']) probMatch, sim_score = multimodal_cosine_sim_softmax( xI, sent_emb, tparams, options.get('sim_smooth_factor', 1.0)) else: sent_emb = T.sum(p, axis=1).T #(T.dot(p,tparams['Wd'])).T sim_score = sent_emb #T.maximum(0.0, sent_emb) #T.tanh(sent_emb) smooth_factor = T.as_tensor_variable(numpy_floatX( options.get('sim_smooth_factor', 1.0)), name='sm_f') probMatch = T.nnet.softmax(sim_score * smooth_factor) inp_list = [xW, mask, xI] if options.get('mode', 'batchtrain') == 'batchtrain': # In train mode we compare a batch of images against each others captions. batch_size = options['batch_size'] cost = -(T.log(probMatch.diagonal()).sum()) / batch_size else: # In predict mode we compare multiple captions against a single image posSamp = T.ivector('posSamp') batch_size = posSamp.shape[0] cost = -(T.log(probMatch[0, posSamp]).sum()) / batch_size inp_list.append(posSamp) f_pred_sim_prob = theano.function(inp_list[:3], probMatch, name='f_pred_sim_prob') f_pred_sim_scr = theano.function(inp_list[:3], sim_score, name='f_pred_sim_scr') if options.get('multimodal_lstm', 0) == 1: f_sent_emb = theano.function([inp_list[0], inp_list[2]], [rval[0], emb], name='f_sent_emb') else: f_sent_emb = theano.function([inp_list[0]], [rval[0], emb], name='f_sent_emb') return self.use_noise, inp_list, [ f_pred_sim_prob, f_pred_sim_scr, f_sent_emb, updatesLSTM ], cost, sim_score, tparams
latent_size = 20 nhidden = 512 lr = 0.001 num_epochs = 20 #50 model_filename_read = "mnist_ae" classifier_filename_read = "mnist_classifier" nonlin = lasagne.nonlinearities.rectify np.random.seed(1234) # reproducibility #SYMBOLIC VARS sym_x = T.matrix() sym_lr = T.scalar('lr') sym_z = T.matrix() sym_y = T.matrix() sym_target = T.vector() ### LOAD DATA print("Using MNIST dataset") #load adversarial examples ''' adv_train_x = [] orig_train_x = [] adv_img_num_train = 5000 for img_num in range(0,adv_img_num_train): fadv = os.path.join('dataset/train/adversarial_images',"img_"+str(img_num)+".png") forig = os.path.join('dataset/train/original_images',"img_"+str(img_num)+".png") adv_train = Image.open(fadv) orig_train = Image.open(forig) adv_train.load()
import theano import theano.tensor as T import numpy coefficients = T.vector("coefficients") x = T.scalar("x") max_coefficients_supported = 10000 result, updates = theano.scan( # lambda函数的参数顺序为sequences, prior results, non-sequences, 上述任何一个为集合则展开 fn=lambda coefficient, power, prior_sum, free_variable: [ coefficient * (free_variable**power), prior_sum + coefficient * (free_variable**power) ], outputs_info=[None, T.zeros_like(x) ], # 第一个输出非累积,无需feedback回scan;第二个输出需要accumulation sequences=[coefficients, T.arange(max_coefficients_supported) ], # scan取决于这里最短的sequence,自动截断 non_sequences=x) final_result = [result[0].sum(), result[1][-1]] # Compile a function calculate_polynomial = theano.function(inputs=[coefficients, x], outputs=final_result, updates=updates) # Test test_coefficients = numpy.array([1, 0, 2]) test_value = 3
def OptimalNode(x_train, y_train, Regression=True, Classification=False, bias=False, n_iter=5, alpha=0.01, minibatch=False): ''' inputs x_train: training features y_train: response variable n_iter: # of iterations for SGD alpha: strength of L2 penalty (default penalty for now) outputs Node: dictionary with Node parameters an predict method ''' rng = numpy.random feats = len(x_train[0, :]) D = [x_train, y_train] training_steps = n_iter #print "training steps: ", training_steps #print "penalty strength: ", alpha #print "Uses bias: ", bias # Declare Theano symbolic variables x = T.matrix("x") y = T.vector("y") w = theano.shared(rng.uniform(low=-0.25, high=0.25, size=feats), name="w") b = theano.shared(rng.randn(1)[0], name="b") a = theano.shared(abs(rng.randn(1)[0]), name="a") #print "Initialize node as:" #print w.get_value(), b.get_value(), a.get_value() # Construct Theano expression graph if bias: p_1 = -0.5 + a / (1 + T.exp(-T.dot(x, w) - b)) else: p_1 = a / (1 + T.exp(-T.dot(x, w))) prediction = p_1 > 0.5 if Classification: xent = -y * T.log(p_1) - (1 - y) * T.log(1 - p_1) # Cross-entropy loss elif Regression: xent = 0.5 * (y - p_1)**2 if alpha == 0: cost = xent.mean() # The cost to minimize else: cost = xent.mean() + alpha * ((w**2).sum()) if bias: gw, gb, ga = T.grad(cost, [w, b, a]) else: gw, ga = T.grad(cost, [w, a]) # Compute the gradient of the cost # Compile Node = {} Node['Path'] = {} NodePath = Node['Path'] if bias: train = theano.function(inputs=[x, y], outputs=[prediction, xent], updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb), (a, a - 0.1 * ga))) else: train = theano.function(inputs=[x, y], outputs=[prediction, xent], updates=((w, w - 0.1 * gw), (a, a - 0.1 * ga))) predict = theano.function(inputs=[x], outputs=p_1) # Train for i in range(training_steps): if minibatch: batch_split = train_test_split(x_train, y_train, test_size=0.2) _, D[0], _, D[1] = batch_split pred, err = train(D[0], D[1]) elif not minibatch: pred, err = train(D[0], D[1]) NodePath[str(i)] = {} NodePath[str(i)]['w'] = w.get_value() NodePath[str(i)]['b'] = b.get_value() NodePath[str(i)]['a'] = a.get_value() Node['w'] = w.get_value() Node['b'] = b.get_value() Node['a'] = a.get_value() Node['predict'] = predict return Node
# http://lazyprogrammer.me # theano scan example - low pass filter import numpy as np import matplotlib.pyplot as plt import theano import theano.tensor as T X = 2*np.random.randn(300) + np.sin(np.linspace(0, 3*np.pi, 300)) plt.plot(X) plt.title("original") plt.show() decay = T.scalar('decay') sequence = T.vector('sequence') def recurrence(x, last, decay): return (1-decay)*x + decay*last outputs, _ = theano.scan( fn=recurrence, sequences=sequence, n_steps=sequence.shape[0], outputs_info=[np.float64(0)], non_sequences=[decay] ) lpf = theano.function( inputs=[sequence, decay], outputs=outputs,
#!/usr/bin/env python # Theano tutorial # Solution to Exercise in section 'Loop' import numpy as np import theano import theano.tensor as tt from six.moves import xrange # 1. First example theano.config.warn.subtensor_merge_bug = False k = tt.iscalar("k") A = tt.vector("A") def inner_fct(prior_result, A): return prior_result * A # Symbolic description of the result result, updates = theano.scan(fn=inner_fct, outputs_info=tt.ones_like(A), non_sequences=A, n_steps=k) # Scan has provided us with A ** 1 through A ** k. Keep only the last # value. Scan notices this and does not waste memory saving them. final_result = result[-1] power = theano.function(inputs=[A, k], outputs=final_result,
learning_rate = 0.00005 #perc estimate perc_est = 0.6 #tot trajectories s_tot = 10000 partition = 3 porz = np.int(perc_est * N) observations_var = env.observation_space.new_tensor_variable( 'observations', # It should have 1 extra dimension since we want to represent a list of observations extra_dims=1) actions_var = env.action_space.new_tensor_variable('actions', extra_dims=1) d_rewards_var = TT.vector('d_rewards') importance_weights_var = TT.vector('importance_weight') # policy.dist_info_sym returns a dictionary, whose values are symbolic expressions for quantities related to the # distribution of the actions. For a Gaussian policy, it contains the mean and (log) standard deviation. dist_info_vars = policy.dist_info_sym(observations_var) snap_dist_info_vars = snap_policy.dist_info_sym(observations_var) surr = TT.sum( -dist.log_likelihood_sym_1traj_GPOMDP(actions_var, dist_info_vars) * d_rewards_var) params = policy.get_params(trainable=True) snap_params = snap_policy.get_params(trainable=True) importance_weights = dist.likelihood_ratio_sym_1traj_GPOMDP(
def main(): args = build_parser().parse_args() assert args.num_individuals >= 1, ( 'Must have at least one member in ensemble') assert args.max_epochs >= 1, ('Must have at least 1 epoch.') assert args.base_power >= 0, ('Cannot have fractional filters!') np.random.seed(args.seed) import lasagne lasagne.random.set_rng(np.random.RandomState(args.seed)) experiment_timestamp = str(time.time()).replace('.', '-') experiment_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'experiments', experiment_timestamp) if os.path.exists(experiment_path): print('Experiment directory exists!') sys.exit(1) else: os.makedirs(experiment_path) # Save the commit hash used for these experiments. commit_hash = str(subprocess.check_output(['git', 'rev-parse', 'HEAD']), 'utf-8') commit_file_path = os.path.join(experiment_path, 'exp_commit.txt') with open(commit_file_path, 'w') as fd: fd.write(commit_hash) args_file_path = os.path.join(experiment_path, 'provided_args.json') with open(args_file_path, 'w') as fd: json.dump(vars(args), fd, indent=4) # Initial dataset setup dataset_mean = load_mean(args.mean_path) X, y = load_data(args.dataset_directory, dataset_mean, mean_normalise=True, four_dim=True) train_X, train_y, val_X, val_y = train_val_split(X, y) print('Train X shape: {}\ttrain y shape: {}' 'Test X shape: {}\tTest y shape: {}' ''.format(*(mat.shape for mat in (train_X, train_y, val_X, val_y)))) # Network setup input_var = T.tensor4('input', dtype=theano.config.floatX) target = T.vector('target', dtype='int32') network_kwargs = {'input_var': input_var, 'base_power': args.base_power} model = MiniVGG(**network_kwargs) model.pretty_print_network() network = model.network prediction = get_output(network['output']) loss = categorical_crossentropy(prediction, target).mean() accuracy = np.array(100., dtype=theano.config.floatX) * ( categorical_accuracy(prediction, target).mean()) params = get_all_params(network['output'], trainable=True) updates = adam(loss, params) print('Starting theano function compliation') train_function = theano.function([input_var, target], loss, updates=updates) loss_function = theano.function([input_var, target], loss) accuracy_function = theano.function([input_var, target], accuracy) pred_function = theano.function([input_var], prediction) print('Finished theano function compliation') ensemble_prediction = make_ens_predictor(network, pred_function, val_X, val_y) train_network = make_training_function(train_function, loss_function, accuracy_function, network, val_X, val_y, args.max_epochs, args.early_stopping_epochs) # Setup bootstraps initialisations = get_k_network_initialisations(args.num_individuals, input_var=input_var, base_power=args.base_power) bootstraps = [ get_bootstrap(train_X, train_y) for _ in range(args.num_individuals) ] ensembles = zip(initialisations, bootstraps) # Train models trained_parameters = [] for index, (initialisation, bootstrap) in enumerate(ensembles): (best_params, training_losses, validation_losses, validation_accuracies) = train_network(*bootstrap, initialisation, True, False) trained_parameters.append(best_params) max_accuracy = validation_accuracies[np.argmin(validation_losses)] ensemble_accuracy = ensemble_prediction(trained_parameters) print('New member at {:.2f}% validation accuracy'.format(max_accuracy)) print('Ensemble at {:.2f}% with {} members' ''.format(ensemble_accuracy, len(trained_parameters))) print() sys.stdout.flush() member_path = os.path.join(experiment_path, 'model_{}'.format(index)) os.makedirs(member_path) stats = { 'training_losses': training_losses, 'validation_losses': validation_losses, 'validation_accuracies': validation_accuracies } with open(os.path.join(member_path, 'train_stats.json'), 'w') as fd: json.dump(stats, fd, indent=4) model_save_path = os.path.join(member_path, 'model.npz') np.savez(model_save_path, *get_all_param_values(model.final_layer)) model_hash = md5(model_save_path) model_hash_path = os.path.join(member_path, 'model_hash.txt') with open(model_hash_path, 'w') as fd: fd.write(model_hash + '\n') ensemble_accuracies = {} for num_models in range(1, args.num_individuals + 1): parameter_combinations = combinations(trained_parameters, num_models) validation_accuracies = [ ensemble_prediction(parameter_combination) for parameter_combination in parameter_combinations ] ensemble_accuracies[num_models] = { 'mean': np.mean(validation_accuracies), 'std': np.std(validation_accuracies), 'raw': validation_accuracies } results_path = os.path.join(experiment_path, 'results.json') with open(results_path, 'w') as fd: json.dump(ensemble_accuracies, fd, indent=4)
def build_model(tparams, options): trng = RandomStreams(SEED) # x0 = tensor.matrix('x0', dtype='int32') # x1 = tensor.matrix('x1', dtype='int32') # mask0 = tensor.matrix('mask0', dtype=config.floatX) mask1 = tensor.matrix('mask1', dtype=config.floatX) y0 = tensor.vector('y0', dtype='int32') #sent level xs0 = tensor.matrix('xs0', dtype='int32') xs1 = tensor.matrix('xs1', dtype='int32') mask_xs0 = tensor.matrix('mask_xs0', dtype=config.floatX) mask_xs1 = tensor.matrix('mask_xs1', dtype=config.floatX) ys0 = tensor.vector('ys0', dtype='int32') #dropout_ratio = tensor.scalar(name='dropout_ratio') #dropout_decay_ratio = tensor.scalar(name='dropout_decay_ratio') ##################################### # p_0 = lstm_layer_0(tparams, input_state=tparams['Wemb'][x0], mask=mask0, options=options) p_1 = lstm_layer_1(tparams, input_state=tparams['Wemb'][x1], mask=mask1, options=options) #p_0 = tensor.max(p_0, axis=0) #p_1 = tensor.max(p_1, axis=0) p_0 = attention_layer_2D_0(tparams, input_state=p_0, options=options) p_1 = attention_layer_2D_1(tparams, input_state=p_1, options=options) proj_0 = tensor.concatenate((p_0, p_1), axis=0) #proj_0 = proj_0 * dropout_mask_1D(proj_0, 1, dropout_ratio, trng) * dropout_decay_ratio pred_0 = tensor.nnet.softmax( tensor.dot(proj_0, tparams['Ws']) + tparams['bs']) pred_0 = pred_0.flatten() f_pred_prob = theano.function(inputs=[x0, x1, mask0, mask1], outputs=pred_0.max(axis=0), name='f_pred_prob') f_pred = theano.function(inputs=[x0, x1, mask0, mask1], outputs=pred_0.argmax(axis=0), name='f_pred') #off = 1e-6 d_cost = -tensor.mean(tensor.log(pred_0[y0[1]] + 1e-6)) ##### p_s0 = lstm_layer_0(tparams, input_state=tparams['Wemb'][xs0], mask=mask_xs0, options=options) p_s1 = lstm_layer_0(tparams, input_state=tparams['Wemb'][xs1], mask=mask_xs1, options=options) proj_s0 = tensor.concatenate((p_s0, p_s1), axis=1) pred_s0 = tensor.nnet.softmax( tensor.dot(proj_s0, tparams['Ws']) + tparams['bs']) f_s_pred_prob = theano.function(inputs=[xs0, xs1, mask_xs0, mask_xs1], outputs=pred_s0.max(axis=1), name='f_s_pred_prob') f_s_pred = theano.function(inputs=[xs0, xs1, mask_xs0, mask_xs1], outputs=pred_s0.argmax(axis=1), name='f_s_pred') s_cost = -tensor.mean( tensor.log(pred_s0[tensor.arange(ys0.shape[0]), ys0] + 1e-6)) ##################################### adv_p_0 = lstm_layer_0(tparams, input_state=tparams['p_Wemb'][x0], mask=mask0, options=options) adv_p_1 = lstm_layer_1(tparams, input_state=tparams['p_Wemb'][x1], mask=mask1, options=options) adv_p_0 = attention_layer_2D_0(tparams, input_state=adv_p_0, options=options) adv_p_1 = attention_layer_2D_1(tparams, input_state=adv_p_1, options=options) adv_proj_0 = tensor.concatenate((adv_p_0, adv_p_1), axis=0) adv_pred_0 = tensor.nnet.softmax( tensor.dot(adv_proj_0, tparams['Ws']) + tparams['bs']).flatten() f_adv_pred_prob = theano.function(inputs=[x0, x1, mask0, mask1], outputs=adv_pred_0.max(axis=0), name='f_adv_pred_prob') f_adv_pred = theano.function(inputs=[x0, x1, mask0, mask1], outputs=adv_pred_0.argmax(axis=0), name='f_adv_pred') adv_d_cost = -tensor.mean(tensor.log(adv_pred_0[0] + 1e-6)) d_cost_2 = 0.2 * d_cost + 0.8 * adv_d_cost ### adv_p_s0 = lstm_layer_0(tparams, input_state=tparams['p_Wemb'][xs0], mask=mask_xs0, options=options) adv_p_s1 = lstm_layer_0(tparams, input_state=tparams['p_Wemb'][xs1], mask=mask_xs1, options=options) proj_adv_s0 = tensor.concatenate((adv_p_s0, adv_p_s1), axis=1) pred_adv_s0 = tensor.nnet.softmax( tensor.dot(proj_s0, tparams['Ws']) + tparams['bs']) f_s_adv_pred_prob = theano.function(inputs=[xs0, xs1, mask_xs0, mask_xs1], outputs=pred_adv_s0.max(axis=1), name='f_s_adv_pred_prob') f_s_adv_pred = theano.function(inputs=[xs0, xs1, mask_xs0, mask_xs1], outputs=pred_adv_s0.argmax(axis=1), name='f_s_adv_pred') adv_s_cost = -tensor.mean( tensor.log(pred_adv_s0[tensor.arange(ys0.shape[0]), ys0] + 1e-6)) s_cost_2 = 0.2 * s_cost + 0.8 * adv_s_cost ##################################### _e = 0.6 mycost = _e * d_cost_2 + (1 - _e) * s_cost_2 return [x0,x1,xs0,xs1], [mask0, mask1, mask_xs0, mask_xs1], [y0,ys0], \ f_pred_prob, f_pred, f_s_pred_prob, f_s_pred, \ f_adv_pred_prob, f_adv_pred, f_s_adv_pred_prob, f_s_adv_pred, \ d_cost, s_cost, adv_d_cost, adv_s_cost, \ d_cost_2, s_cost_2, mycost
def compile(self, options): '''Configure the learning process. ''' # input of model self.X = T.tensor3(name='input_frames', dtype='float32') self.H = T.matrix(name='H', dtype='float32') self.idx = T.vector(name='idx',dtype='int32') netlu = self.nets[0] netru = self.nets[1] netrv = self.nets[2] netrm = self.nets[3] net_hiera = self.nets[4] net_ru_high = self.nets[5] net_rv_high = self.nets[6] net_rm_high = self.nets[7] self.init_state() netlu.set_input([self.init_h,self.init_m]) net_hiera.set_input([self.init_h_high,self.init_m_high]) netru.set_input([self.init_h_ru, self.init_m_ru]) netrv.set_input([self.init_h_rv, self.init_m_rv]) netrm.set_input([self.init_h_rm, self.init_m_rm]) #set the image feature as input idx = 0 for l in netlu.layers: if hasattr(l, 'has_input_frame'): if l.has_input_frame: l.input_frame = self.X[:,idx,:] idx += 1 assert idx == options['v_length'] print "start loading SS matrix..." time1 = time.time() print options['SS_path'] SS = pkl.load(open(options['SS_path'])).astype(np.int8) print 'SS.shape: ',SS.shape # for debug # SS = np.zeros((train_data.data_size_,train_data.data_size_)).astype(np.float32) # SS[:,1] = np.ones((1,train_data.data_size_)) SS_shared = theano.shared(value=SS, name='SS_shared') time2 = time.time() print "load SS matrix costs: ", time2 - time1 #H_shared = theano.shared(value=T.zeros(shape=[])) def comp_(train): netlu.set_out(train=train) #nethiera.set_out(train=train) idx_hie = 0 for i in net_hiera.layers: if hasattr(i,'has_input_frame'): if i.has_input_frame: i.input_frame = netlu.layers[(idx_hie+1)*options['hiera_step']-1].get_output(train=train)[0] idx_hie += 1 net_hiera.set_out(train=train) net_ru_high.set_out(train=train) idx_ru = 0 for i in netru.layers: if hasattr(i,'has_input_frame'): if i.has_input_frame: if netru.layers.index(i)%options['hiera_step']==0: i.input_frame = net_ru_high.layers[idx_ru].get_output(train=train)[0] idx_ru += 1 else: i.input_frame = net_ru_high.layers[idx_ru-1].get_output(train=train)[0] net_rv_high.set_out(train=train) idx_rv = 0 for i in netrv.layers: if hasattr(i, 'has_input_frame'): if i.has_input_frame: if netrv.layers.index(i)%options['hiera_step']==0: i.input_frame = net_rv_high.layers[idx_rv].get_output(train=train)[0] idx_rv += 1 else: i.input_frame = net_ru_high.layers[idx_rv-1].get_output(train=train)[0] net_rm_high.set_out(train=train) netrm.layers[0].input_frame= net_rm_high.layers[0].get_output(train=train)[0] if not train: [my_H, my_M] = net_hiera.get_out_idx(-2) print 'compile encoder...' self._encoder = theano.function([self.X, self.init_h,self.init_m, self.init_h_high,self.init_m_high], my_H) #construct pairwise loss lamb = options['lamb'] [my_H, my_M] = net_hiera.get_out_idx(-2) #get binary code from network my_B = T.sgn(my_H) #self.H_: batch_size * nbits self.H_ = T.set_subtensor(self.H[self.idx,:],my_H[:self.idx.shape[0],:]) #add the hidden state into H #self.SS_ = self.SS[self.idx] if self.idx.shape[0] == options['batch_size']: self.SS_ = SS_shared[self.idx] # SS_: batch_size * train_size else: # SS_: batch_size * train_size self.SS_ = T.set_subtensor(T.zeros((options['batch_size'],SS_shared.shape[1]))[:self.idx.shape[0]],SS_shared[self.idx]) loss_pairwise = T.sum(T.square(T.dot(my_H,self.H_.transpose())/options['dim_proj']-self.SS_)) loss_pairwise +=lamb*(T.sum(T.square(my_H-my_B))) self.y_pred = netru.get_out(train=train) assert len(self.y_pred) == options['v_length'] loss_backward = T.sum(T.sqr(self.X[:,-1,:] - self.y_pred[0])) for i in xrange(1,options['v_length']): loss_backward += T.sum(T.sqr(self.X[:,-1-i,:] - self.y_pred[i])) self.y_pred2 = netrv.get_out(train=train) assert len(self.y_pred2) == options['v_length'] loss_forward = T.sum(T.sqr(self.X[:,0,:] - self.y_pred2[0])) for i in xrange(1,options['v_length']): loss_forward += T.sum(T.sqr(self.X[:,i,:] - self.y_pred2[i])) self.y_mean = netrm.get_out(train=train) assert len(self.y_mean) == 1 loss_mean= options['v_length'] * T.sum(T.sqr(T.mean(self.X, axis=1) - self.y_mean[0])) whts = options['weights'] reconstruction_loss = whts[0]*loss_backward + whts[1]*loss_forward + whts[2]*loss_mean #add pairwise loss loss = loss_pairwise*options['pairwise_weight']+reconstruction_loss*(1-options['pairwise_weight']) #loss = loss_pairwise + reconstruction_loss for r in self.regularizers: loss = r(loss) if train: self.optimizer = eval('optimizer.'+ options['optimizer'])(self.params, lr=options['lrate']) updates = self.optimizer.get_updates(self.params, loss) updates += self.updates print 'compile train...' start_time = time.time() self._train = theano.function([self.X, self.idx, self.H, self.init_h, self.init_m, self.init_h_high, self.init_m_high, self.init_h_ru, self.init_m_ru, self.init_h_rv, self.init_m_rv, self.init_h_rm, self.init_m_rm], [self.H_ ,loss, loss_pairwise,reconstruction_loss], updates=updates) end_time = time.time() print 'spent %f seconds' % (end_time-start_time) else: print 'compile test...' start_time = time.time() self._test = theano.function([self.X,self.idx,self.H, self.init_h, self.init_m, self.init_h_high, self.init_m_high, self.init_h_ru, self.init_m_ru, self.init_h_rv, self.init_m_rv, self.init_h_rm, self.init_m_rm], loss) end_time = time.time() print 'spent %f seconds' % (end_time-start_time) comp_(train=True) comp_(train=False) print "Compile Done!"
def train(self, data1, data2, similarities, miniBatchSize=20, epochs=200): self.miniBatchSize = miniBatchSize nrMiniBatches = len(data1) / miniBatchSize miniBatchIndex = T.lscalar() momentum = T.fscalar() learningRate = T.fscalar() learningRateMiniBatch = np.float32(self.learningRate / miniBatchSize) print "learningRateMiniBatch in similarity net" print learningRateMiniBatch net = self._trainRBM(data1, data2) data1 = theano.shared(np.asarray(data1, dtype=theanoFloat)) data2 = theano.shared(np.asarray(data2, dtype=theanoFloat)) similarities = theano.shared( np.asarray(similarities, dtype=theanoFloat)) # The mini-batch data is a matrix x = T.matrix('x', dtype=theanoFloat) y = T.matrix('y', dtype=theanoFloat) self.x = x self.y = y z = T.vector('z', dtype=theanoFloat) trainer = Trainer(x, y, net) self.trainer = trainer # error = T.sum(T.sqr(trainer.output-z)) error = T.sum(T.nnet.binary_crossentropy(trainer.output, z)) updates = self.buildUpdates(trainer, error, learningRate, momentum) # Now you have to define the theano function discriminativeTraining = theano.function( inputs=[miniBatchIndex, learningRate, momentum], outputs=[trainer.output, trainer.cos, error], updates=updates, givens={ x: data1[miniBatchIndex * miniBatchSize:(miniBatchIndex + 1) * miniBatchSize], y: data2[miniBatchIndex * miniBatchSize:(miniBatchIndex + 1) * miniBatchSize], z: similarities[miniBatchIndex * miniBatchSize:(miniBatchIndex + 1) * miniBatchSize], }) try: for epoch in xrange(epochs): print "epoch", epoch momentum = np.float32( min( np.float32(0.5) + epoch * np.float32(0.05), np.float32(self.maxMomentum))) for miniBatch in xrange(nrMiniBatches): output, cos, error = discriminativeTraining( miniBatch, learningRateMiniBatch, momentum) print error / self.miniBatchSize except KeyboardInterrupt: print "you have decided to interrupt training" print "we continue testing" print trainer.w.get_value() print trainer.b.get_value()
def train_conv_net(datasets, U, word_idx_map, img_w=300, filter_hs=[3, 4, 5], hidden_units=[100, 2], dropout_rate=[0.5], shuffle_batch=True, n_epochs=11, batch_size=50, lr_decay=0.95, conv_non_linear="relu", activations=[Iden], sqr_norm_lim=9, non_static=True, pi_params=[1., 0], C=1.0, patience=20): """ Train a convnet through iterative distillation img_h = sentence length (padded where necessary) img_w = word vector length (300 for word2vec) filter_hs = filter window sizes hidden_units = [x,y] x is the number of feature maps (per filter window), and y is the penultimate layer sqr_norm_lim = s^2 in the paper [Kim, 2014] lr_decay = adadelta decay parameter pi_params = update strategy of imitation parameter \pi C = regularization strength patience = number of iterations without performance improvement before stopping """ rng = np.random.RandomState(3435) img_h = len(datasets[0][0]) - 1 filter_w = img_w feature_maps = hidden_units[0] filter_shapes = [] pool_sizes = [] for filter_h in filter_hs: filter_shapes.append((feature_maps, 1, filter_h, filter_w)) pool_sizes.append((img_h - filter_h + 1, img_w - filter_w + 1)) parameters = [("image shape", img_h, img_w), ("filter shape", filter_shapes), ("hidden_units", hidden_units), ("dropout", dropout_rate), ("batch_size", batch_size), ("non_static", non_static), ("learn_decay", lr_decay), ("conv_non_linear", conv_non_linear), ("non_static", non_static), ("sqr_norm_lim", sqr_norm_lim), ("shuffle_batch", shuffle_batch), ("pi_params", pi_params), ("C", C)] print parameters #define model architecture index = T.lscalar() x = T.matrix('x') y = T.ivector('y') Words = theano.shared(value=U, name="Words") zero_vec_tensor = T.vector() zero_vec = np.zeros(img_w) set_zero = theano.function([zero_vec_tensor], updates=[ (Words, T.set_subtensor(Words[0, :], zero_vec_tensor)) ], allow_input_downcast=True) layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape( (x.shape[0], 1, x.shape[1], Words.shape[1])) conv_layers = [] layer1_inputs = [] for i in xrange(len(filter_hs)): filter_shape = filter_shapes[i] pool_size = pool_sizes[i] conv_layer = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, img_h, img_w), filter_shape=filter_shape, poolsize=pool_size, non_linear=conv_non_linear) layer1_input = conv_layer.output.flatten(2) conv_layers.append(conv_layer) layer1_inputs.append(layer1_input) layer1_input = T.concatenate(layer1_inputs, 1) hidden_units[0] = feature_maps * len(filter_hs) classifier = MLPDropout(rng, input=layer1_input, layer_sizes=hidden_units, activations=activations, dropout_rates=dropout_rate) #build the feature of BUT-rule f_but = T.fmatrix('f_but') f_but_ind = T.fmatrix('f_ind') # indicators f_but_layer0_input = Words[T.cast(f_but.flatten(), dtype="int32")].reshape( (f_but.shape[0], 1, f_but.shape[1], Words.shape[1])) f_but_pred_layers = [] for conv_layer in conv_layers: f_but_layer0_output = conv_layer.predict(f_but_layer0_input, batch_size) f_but_pred_layers.append(f_but_layer0_output.flatten(2)) f_but_layer1_input = T.concatenate(f_but_pred_layers, 1) f_but_y_pred_p = classifier.predict_p(f_but_layer1_input) f_but_full = T.concatenate([f_but_ind, f_but_y_pred_p], axis=1) # batch_size x 1 + batch_size x K f_but_full = theano.gradient.disconnected_grad(f_but_full) #add logic layer nclasses = 2 rules = [FOL_But(nclasses, x, f_but_full)] rule_lambda = [1] new_pi = get_pi(cur_iter=0, params=pi_params) logic_nn = LogicNN(rng, input=x, network=classifier, rules=rules, rule_lambda=rule_lambda, pi=new_pi, C=C) #define parameters of the model and update functions using adadelta params_p = logic_nn.params_p for conv_layer in conv_layers: params_p += conv_layer.params if non_static: #if word vectors are allowed to change, add them as model parameters params_p += [Words] cost_p = logic_nn.negative_log_likelihood(y) dropout_cost_p = logic_nn.dropout_negative_log_likelihood(y) grad_updates_p = sgd_updates_adadelta(params_p, dropout_cost_p, lr_decay, 1e-6, sqr_norm_lim) #shuffle dataset and assign to mini batches. if dataset size is not a multiple of mini batches, replicate #extra data (at random) np.random.seed(3435) # training data if datasets[0].shape[0] % batch_size > 0: extra_data_num = batch_size - datasets[0].shape[0] % batch_size # shuffle both train data and features permutation_order = np.random.permutation(datasets[0].shape[0]) train_set = datasets[0][permutation_order] extra_data = train_set[:extra_data_num] new_data = np.append(datasets[0], extra_data, axis=0) new_fea = {} train_fea = datasets[3] for k in train_fea.keys(): train_fea_k = train_fea[k][permutation_order] extra_fea = train_fea_k[:extra_data_num] new_fea[k] = np.append(train_fea[k], extra_fea, axis=0) train_text = datasets[6][permutation_order] extra_text = train_text[:extra_data_num] new_text = np.append(datasets[6], extra_text, axis=0) else: new_data = datasets[0] new_fea = datasets[3] new_text = datasets[6] # shuffle both training data and features permutation_order = np.random.permutation(new_data.shape[0]) new_data = new_data[permutation_order] for k in new_fea.keys(): new_fea[k] = new_fea[k][permutation_order] new_text = new_text[permutation_order] n_batches = new_data.shape[0] / batch_size n_train_batches = n_batches train_set = new_data train_set_x, train_set_y = shared_dataset( (train_set[:, :img_h], train_set[:, -1])) train_fea = new_fea train_fea_but_ind = train_fea['but_ind'].reshape( [train_fea['but_ind'].shape[0], 1]) train_fea_but_ind = shared_fea(train_fea_but_ind) for k in new_fea.keys(): if k != 'but_text': train_fea[k] = shared_fea(new_fea[k]) # val data if datasets[1].shape[0] % batch_size > 0: extra_data_num = batch_size - datasets[1].shape[0] % batch_size # shuffle both val data and features permutation_order = np.random.permutation(datasets[1].shape[0]) val_set = datasets[1][permutation_order] extra_data = val_set[:extra_data_num] new_val_data = np.append(datasets[1], extra_data, axis=0) new_val_fea = {} val_fea = datasets[4] for k in val_fea.keys(): val_fea_k = val_fea[k][permutation_order] extra_fea = val_fea_k[:extra_data_num] new_val_fea[k] = np.append(val_fea[k], extra_fea, axis=0) val_text = datasets[7][permutation_order] extra_text = val_text[:extra_data_num] new_val_text = np.append(datasets[7], extra_text, axis=0) else: new_val_data = datasets[1] new_val_fea = datasets[4] new_val_text = datasets[7] val_set = new_val_data val_set_x, val_set_y = shared_dataset((val_set[:, :img_h], val_set[:, -1])) n_batches = new_val_data.shape[0] / batch_size n_val_batches = n_batches val_fea = new_val_fea val_fea_but_ind = val_fea['but_ind'].reshape( [val_fea['but_ind'].shape[0], 1]) val_fea_but_ind = shared_fea(val_fea_but_ind) for k in val_fea.keys(): if k != 'but_text': val_fea[k] = shared_fea(val_fea[k]) # test data test_set_x = datasets[2][:, :img_h] test_set_y = np.asarray(datasets[2][:, -1], "int32") test_fea = datasets[5] test_fea_but_ind = test_fea['but_ind'] test_fea_but_ind = test_fea_but_ind.reshape([test_fea_but_ind.shape[0], 1]) test_text = datasets[8] ### compile theano functions to get train/val/test errors val_model = theano.function( [index], logic_nn.errors(y), givens={ x: val_set_x[index * batch_size:(index + 1) * batch_size], y: val_set_y[index * batch_size:(index + 1) * batch_size], f_but: val_fea['but'][index * batch_size:(index + 1) * batch_size], f_but_ind: val_fea_but_ind[index * batch_size:(index + 1) * batch_size, :] }, allow_input_downcast=True, on_unused_input='warn') test_model = theano.function( [index], logic_nn.errors(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], f_but: train_fea['but'][index * batch_size:(index + 1) * batch_size], f_but_ind: train_fea_but_ind[index * batch_size:(index + 1) * batch_size, :] }, allow_input_downcast=True, on_unused_input='warn') train_model = theano.function( [index], cost_p, updates=grad_updates_p, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], f_but: train_fea['but'][index * batch_size:(index + 1) * batch_size], f_but_ind: train_fea_but_ind[index * batch_size:(index + 1) * batch_size, :] }, allow_input_downcast=True, on_unused_input='warn') ### setup testing test_size = test_set_x.shape[0] print 'test size ', test_size test_pred_layers = [] test_layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape( (test_size, 1, img_h, Words.shape[1])) f_but_test_pred_layers = [] f_but_test_layer0_input = Words[T.cast(f_but.flatten(), dtype="int32")].reshape( (test_size, 1, img_h, Words.shape[1])) for conv_layer in conv_layers: test_layer0_output = conv_layer.predict(test_layer0_input, test_size) test_pred_layers.append(test_layer0_output.flatten(2)) f_but_test_layer0_output = conv_layer.predict(f_but_test_layer0_input, test_size) f_but_test_pred_layers.append(f_but_test_layer0_output.flatten(2)) test_layer1_input = T.concatenate(test_pred_layers, 1) f_but_test_layer1_input = T.concatenate(f_but_test_pred_layers, 1) f_but_test_y_pred_p = classifier.predict_p(f_but_test_layer1_input) f_but_test_full = T.concatenate([f_but_ind, f_but_test_y_pred_p], axis=1) # Ns x 1 + Ns x K # transform to shared variables test_set_x_shr, test_set_y_shr = shared_dataset((test_set_x, test_set_y)) test_q_y_pred, test_p_y_pred = logic_nn.predict(test_layer1_input, test_set_x_shr, [f_but_test_full]) test_q_error = T.mean(T.neq(test_q_y_pred, y)) test_p_error = T.mean(T.neq(test_p_y_pred, y)) test_model_all = theano.function([x, y, f_but, f_but_ind], [test_q_error, test_p_error], allow_input_downcast=True, on_unused_input='warn') ### start training over mini-batches print '... training' epoch = 0 batch = 0 best_val_q_perf = 0 val_p_perf = 0 val_q_perf = 0 cost_epoch = 0 stop_count = 0 while (epoch < n_epochs): start_time = time.time() epoch = epoch + 1 # train if shuffle_batch: for minibatch_index in np.random.permutation( range(n_train_batches)): batch = batch + 1 new_pi = get_pi(cur_iter=batch * 1. / n_train_batches, params=pi_params) logic_nn.set_pi(new_pi) cost_epoch = train_model(minibatch_index) set_zero(zero_vec) else: for minibatch_index in xrange(n_train_batches): batch = batch + 1 new_pi = get_pi(cur_iter=batch * 1. / n_train_batches, params=pi_params) logic_nn.set_pi(new_pi) cost_epoch = train_model(minibatch_index) set_zero(zero_vec) # eval train_losses = [test_model(i) for i in xrange(n_train_batches)] train_losses = np.array(train_losses) train_q_perf = 1 - np.mean(train_losses[:, 0]) train_p_perf = 1 - np.mean(train_losses[:, 1]) val_losses = [val_model(i) for i in xrange(n_val_batches)] val_losses = np.array(val_losses) val_q_perf = 1 - np.mean(val_losses[:, 0]) val_p_perf = 1 - np.mean(val_losses[:, 1]) print('epoch: %i, training time: %.2f secs; (q): train perf: %.4f %%, val perf: %.4f %%; (p): train perf: %.4f %%, val perf: %.4f %%' % \ (epoch, time.time()-start_time, train_q_perf * 100., val_q_perf*100., train_p_perf * 100., val_p_perf*100.)) test_loss = test_model_all(test_set_x, test_set_y, test_fea['but'], test_fea_but_ind) test_loss = np.array(test_loss) test_perf = 1 - test_loss print 'test perf: q %.4f %%, p %.4f %%' % (test_perf[0] * 100., test_perf[1] * 100.) if val_q_perf > best_val_q_perf: best_val_q_perf = val_q_perf ret_test_perf = test_perf stop_count = 0 else: stop_count += 1 if stop_count == patience: break return ret_test_perf
Theano basics. For the class Data Science: Practical Deep Learning Concepts in Theano and TensorFlow https://deeplearningcourses.com/c/data-science-deep-learning-in-theano-tensorflow https://www.udemy.com/data-science-deep-learning-in-theano-tensorflow ''' import numpy as np import theano.tensor as T import theano # just some different types of variables c = T.scalar('c') v = T.vector('v') A = T.matrix('A') # we can define a matrix multiplication w = A.dot(v) # how do these variables actually take on values? matrix_times_vector = theano.function(inputs=[A,v], outputs=w) #we can create real arrays A_val = np.array([[1,3],[3,4]]) v_val = np.array([5,6]) w_val = matrix_times_vector(A_val, v_val)
train_word_pos_vec3D = train_word_pos_vec3D[indices] train_sen_length = train_sen_length[indices] train_label_1hot = train_label_1hot[indices] """ new model """ model = Network() # Prepare Theano variables for inputs and targets input_var = T.tensor3('inputs') target_var = T.imatrix('targets') mask_var = T.imatrix('mask_layer') # Pi model variables: if model.network_type == "pi": input_b_var = T.tensor3('inputs_b') mask_train = T.vector('mask_train') unsup_weight_var = T.scalar('unsup_weight') elif model.network_type == "tempens": # tempens model variables: z_target_var = T.matrix('z_targets') mask_train = T.vector('mask_train') unsup_weight_var = T.scalar('unsup_weight') learning_rate_var = T.scalar('learning_rate') adam_beta1_var = T.scalar('adam_beta1') # #Left sdp length # left_sdp_length=T.imatrix('left_sdp_length') # #Sentences length # sen_length=T.imatrix('sen_length')