def early_stop(self, x_validate, y_validate): ''' Creates validation set Evaluates Node's path on validation set Chooses optimal w in Node's path based on validation set ''' x = T.matrix("x") y = T.vector("y") w = T.vector("w") b = T.dscalar("b") a = T.dscalar("a") p_1 = -0.5 + a / (1 + T.exp(-T.dot(x, w) - b)) xent = 0.5 * (y - p_1)**2 cost = xent.mean() loss = theano.function(inputs=[x, y, w, b, a], outputs=cost) Path = self.path.keys() Path = map(int, Path) Path.sort() best_node = {} best_node_ind = 0 best_loss = numpy.mean(y_validate**2) losses = [] for ind in Path: node = self.path[str(ind)] l = loss(x_validate, y_validate, node['w'], node['b'], node['a']) losses.append(l) if l < best_loss: best_node = node best_node_ind = ind best_loss = l self.w = best_node['w'] self.b = best_node['b'] self.a = best_node['a']
def __init__(self,retina=None,config=None,name=None,input_variable=None): self.retina = retina self.config = config self.state = None if name is None: name = str(uuid.uuid4()) self.name = self.config.get('name',name) # 3d version self._I = T.dtensor3(self.name+"_I") self._preceding_V = T.dmatrix(self.name+"_preceding_V") # initial condition for sequence self._b_0 = T.dscalar(self.name+"_b_0") self._a_0 = T.dscalar(self.name+"_a_0") self._a_1 = T.dscalar(self.name+"_a_1") self._k = T.iscalar(self.name+"_k_bip") # number of iteration steps def bipolar_step(input_image, preceding_V,b_0, a_0, a_1): V = (input_image * b_0 - preceding_V * a_1) / a_0 return V # The order in theano.scan has to match the order of arguments in the function bipolar_step self._result, self._updates = theano.scan(fn=bipolar_step, outputs_info=[self._preceding_V], sequences = [self._I], non_sequences=[self._b_0, self._a_0, self._a_1], n_steps=self._k) self.output_varaible = self._result[0] # The order of arguments presented here is arbitrary (will be inferred by the symbols provided), # but function calls to compute_V_bip have to match this order! self.compute_V = theano.function(inputs=[self._I,self._preceding_V, self._b_0, self._a_0, self._a_1, self._k], outputs=self._result, updates=self._updates)
def add_scalars(): x = T.dscalar('x') y = T.dscalar('y') z = x + y f = function([x, y], z) print(f(2, 4)) print(f(5, 4))
def sample_gradient(): print "微分" x, y = T.dscalars("x", "y") z = (x+2*y)**2 # dz/dx gx = T.grad(z, x) fgx = theano.function([x,y], gx) print fgx(1.0, 1.0) # dz/dy gy = T.grad(z, y) fgy = theano.function([x,y], gy) print fgy(1.0, 1.0) # d{sigmoid(x)}/dx x = T.dscalar("x") sig = sigmoid(x) dsig = T.grad(sig, x) f = theano.function([x], dsig) print f(0.0) print f(1.0) # d{sigmoid(<x,w>)}/dx w = T.dscalar("w") sig = sigmoid(T.dot(x,w)) dsig = T.grad(sig, x) f = theano.function([x, w], dsig) print f(1.0, 2.0) print f(3.0, 4.0) print
def make_minimizer(Model): L, y = T.ivector('L'), T.dvector('y') mu, eps = T.dscalar('mu'), T.dscalar('eps') R, eta = T.dtensor3('R'), T.dvector('eta') model = Model(L, y, mu, R, eta, eps) return theano.function([L, y, mu, R, eta, eps], model.minimize())
def leapfrog1_dE(H, q, profile): """Computes a theano function that computes one leapfrog step and the energy difference between the beginning and end of the trajectory. Parameters ---------- H : Hamiltonian q : theano.tensor profile : Boolean Returns ------- theano function which returns q_new, p_new, dE """ p = tt.dvector('p') p.tag.test_value = q.tag.test_value e = tt.dscalar('e') e.tag.test_value = 1 q1, p1 = leapfrog(H, q, p, 1, e) E = energy(H, q1, p1) E0 = tt.dscalar('E0') E0.tag.test_value = 1 dE = E - E0 f = theano.function([q, p, e, E0], [q1, p1, dE], profile=profile) f.trust_input = True return f
def LQLEP_wBarrier( LQLEP = Th.dscalar(), ldet = Th.dscalar(), v1 = Th.dvector(), N_spike = Th.dscalar(), ImM = Th.dmatrix(), U = Th.dmatrix(), V2 = Th.dvector(), u = Th.dvector(), C = Th.dmatrix(), **other): ''' The actual Linear-Quadratic-Exponential-Poisson log-likelihood, as a function of theta and M, with a barrier on the log-det term and a prior. ''' sq_nonlinearity = V2**2.*Th.sum( Th.dot(U,C)*U, axis=[1]) #Th.sum(U**2,axis=[1]) nonlinearity = V2 * Th.sqrt( Th.sum( Th.dot(U,C)*U, axis=[1])) #Th.sum(U**2,axis=[1]) ) if other.has_key('uc'): LQLEP_wPrior = LQLEP + 0.5 * N_spike * ( 1./(ldet+250.)**2. \ - 0.000001 * Th.sum(Th.log(1.-4*sq_nonlinearity))) \ + 10. * Th.sum( (u[2:]+u[:-2]-2*u[1:-1])**2. ) \ + 10. * Th.sum( (other['uc'][2:]+other['uc'][:-2]-2*other['uc'][1:-1])**2. ) \ + 0.000000001 * Th.sum( v1**2. ) # + 100. * Th.sum( v1 ) # + 0.0001*Th.sum( V2**2 ) else: LQLEP_wPrior = LQLEP + 0.5 * N_spike * ( 1./(ldet+250.)**2. \ - 0.000001 * Th.sum(Th.log(1.-4*sq_nonlinearity))) \ + 10. * Th.sum( (u[2:]+u[:-2]-2*u[1:-1])**2. ) \ + 0.000000001 * Th.sum( v1**2. ) # + 100. * Th.sum( v1 ) # + 0.0001*Th.sum( V2**2 ) eigsImM,barrier = eig( ImM ) barrier = 1-(Th.sum(Th.log(eigsImM))>-250) * \ (Th.min(eigsImM)>0) * (Th.max(4*sq_nonlinearity)<1) other.update(locals()) return named( **other )
def init_output_delta_function(self): y = T.dscalar('example_value') a = T.dscalar('actual_value') dg = T.grad(self.activation) delta = dg(a) * (y - a) f = theano.function([a,y], delta) return f
def neural_net( x=T.dmatrix(), #our points, one point per row y=T.dmatrix(), #our targets w=T.dmatrix(), #first layer weights b=T.dvector(), #first layer bias v=T.dmatrix(), #second layer weights c=T.dvector(), #second layer bias step=T.dscalar(), #step size for gradient descent l2_coef=T.dscalar() #l2 regularization amount ): """Idea A: """ hid = T.tanh(T.dot(x, w) + b) pred = T.dot(hid, v) + c sse = T.sum((pred - y) * (pred - y)) w_l2 = T.sum(T.sum(w*w)) v_l2 = T.sum(T.sum(v*v)) loss = sse + l2_coef * (w_l2 + v_l2) def symbolic_params(cls): return [cls.w, cls.b, cls.v, cls.c] def update(cls, x, y, **kwargs): params = cls.symbolic_params() gp = T.grad(cls.loss, params) return [], [In(p, update=p - cls.step * g) for p,g in zip(params, gp)] def predict(cls, x, **kwargs): return cls.pred, [] return locals()
def create_function(): import theano.tensor as T x = T.dscalar('x') y = T.dscalar('y') z = x + y z.eval({x: 16.3, y: 12.1})
def test_default_dtype(self): random = RandomStreams(utt.fetch_seed()) low = tensor.dscalar() high = tensor.dscalar() # Should not silently downcast from low and high out0 = random.uniform(low=low, high=high, size=(42,)) assert out0.dtype == 'float64' f0 = function([low, high], out0) val0 = f0(-2.1, 3.1) assert val0.dtype == 'float64' # Should downcast, since asked explicitly out1 = random.uniform(low=low, high=high, size=(42,), dtype='float32') assert out1.dtype == 'float32' f1 = function([low, high], out1) val1 = f1(-1.1, 1.1) assert val1.dtype == 'float32' # Should use floatX lowf = tensor.fscalar() highf = tensor.fscalar() outf = random.uniform(low=lowf, high=highf, size=(42,)) assert outf.dtype == config.floatX ff = function([lowf, highf], outf) valf = ff(numpy.float32(-0.1), numpy.float32(0.3)) assert valf.dtype == config.floatX
def train_minibatch_fn(self, evaluate=False): """ Initialize this Theano function once """ X = T.lmatrix('X_train') L_x = T.lvector('L_X_train') Y = T.lmatrix('Y_train') L_y = T.lvector('L_y_train') learning_rate = T.dscalar('learning_rate') momentum = T.dscalar('momentum') weight_decay = T.dscalar('weight_decay') loss, accuracy = self.loss(X, L_x, Y, L_y, weight_decay) updates = self.get_sgd_updates(loss, learning_rate, momentum) outputs = [loss, accuracy] if evaluate: precision, recall = self.evaluate(X, L_x, Y, L_y) outputs = outputs + [precision, recall] return theano.function( inputs=[X, L_x, Y, L_y, learning_rate, momentum, weight_decay], outputs=outputs, updates=updates )
def test_divide_floats(self): a = T.dscalar('a') b = T.dscalar('b') c = theano.function([a, b], b / a) d = theano.function([a, b], b // a) assert c(6, 3) == 0.5 assert d(6, 3) == 0.0
def dtw(array1, array2): """ Accepts: two one dimensional arrays Returns: (float) DTW distance between them. """ s = np.zeros((array1.size+1, array2.size+1)) s[:,0] = 1e6 s[0,:] = 1e6 s[0,0] = 0.0 # Set up symbolic variables square = T.dmatrix('square') vec1 = T.dvector('vec1') vec2 = T.dvector('vec2') vec1_length = T.dscalar('vec1_length') vec2_length = T.dscalar('vec2_length') outer_loop = T.arange(vec1_length, dtype='int64') inner_loop = T.arange(vec2_length, dtype='int64') # Run the outer loop path, _ = scan(fn=outer, outputs_info=[dict(initial=square, taps=[-1])], non_sequences=[inner_loop, vec1, vec2], sequences=outer_loop) # Compile the function theano_square = function([vec1, vec2, square, vec1_length, vec2_length], path, on_unused_input='warn') # Call the compiled function and return the actual distance return theano_square(array1, array2, s, array1.size, array2.size)[-1][array1.size, array2.size]
def theano_setup(self): # The matrices Wb and Wc were originally tied. # Because of that, I decided to keep Wb and Wc with # the same shape (instead of being transposed) to # avoid disturbing the code as much as possible. Wb = T.dmatrix('Wb') Wc = T.dmatrix('Wc') b = T.dvector('b') c = T.dvector('c') scale_s = T.dscalar('scale_s') scale_plus_x = T.dscalar('scale_plus_x') x = T.dmatrix('x') h_act = T.dot(x, Wc) + c if self.act_func[0] == 'tanh': h = T.tanh(h_act) elif self.act_func[0] == 'sigmoid': h = T.nnet.sigmoid(h_act) elif self.act_func[0] == 'id': # bad idea h = h_act else: error("Invalid act_func[0]") r_act = T.dot(h, Wb.T) + b if self.act_func[1] == 'tanh': r = scale_s * T.tanh(r_act) elif self.act_func[1] == 'sigmoid': r = scale_s * T.nnet.sigmoid(r_act) elif self.act_func[1] == 'id': r = scale_s * r_act else: error("Invalid act_func[1]") if self.want_plus_x: r = r + scale_plus_x * x # Another variable to be able to call a function # with a noisy x and compare it to a reference x. y = T.dmatrix('y') loss = ((r - y)**2) sum_loss = T.sum(loss) # theano_encode_decode : vectorial function in argument X. # theano_loss : vectorial function in argument X. # theano_gradients : returns triplet of gradients, each of # which involves the all data X summed # so it's not a "vectorial" function. self.theano_encode_decode = function([Wb, Wc, b, c, scale_s, scale_plus_x, x], r) self.theano_loss = function([Wb, Wc, b, c, scale_s, scale_plus_x, x, y], loss) self.theano_gradients = function([Wb, Wc, b, c, scale_s, scale_plus_x, x, y], [T.grad(sum_loss, Wb), T.grad(sum_loss, Wc), T.grad(sum_loss, b), T.grad(sum_loss, c), T.grad(sum_loss, scale_s), T.grad(sum_loss, scale_plus_x)])
def theg1(): w = T.dscalar('w') x = T.dscalar('x') y = T.dscalar('y') z = w*x + y f = theano.function([w,x,y],z) ppth(z,graph=True) return f
def theg1(): w = T.dscalar("w") x = T.dscalar("x") y = T.dscalar("y") z = w * x + y f = theano.function([w, x, y], z) # ppth(z,graph=True) return f
def test_shared_method(self): """Test that under a variety of tricky conditions, the shared-ness of Variables and Methods is respected. Fred: the test create different method event if they are shared. What do we want? """ m1=Module() m1.x=T.dscalar() x=T.dscalar() fy=Method(x,x*2) fz=Method([],m1.x*2) m1.y=fy m1.z=fz m1.ly=[fy] m1.lz=[fz] m1.lly=[[fy]] m1.llz=[[fz]] m1.ty=(fy,) m1.tz=(fz,) m1.tty=((fy,),) m1.ttz=((fz,),) m1.dy={'y':fy} m1.dz={'z':fz} inst=m1.make() inst.x=1 assert inst.y(2)==4 assert inst.z()==2 assert inst.ly[0](2)==4 assert inst.lz[0]()==2 assert inst.ty[0](2)==4 assert inst.tz[0]()==2 assert inst.dy['y'](2)==4 assert inst.dz['z']()==2 assert inst.lly[0][0](2)==4 assert inst.llz[0][0]()==2 assert inst.tty[0][0](2)==4 assert inst.ttz[0][0]()==2 assert isinstance(inst.z,theano.compile.function_module.Function) assert isinstance(inst.lz[0],theano.compile.function_module.Function) assert isinstance(inst.llz[0][0],theano.compile.function_module.Function) assert isinstance(inst.tz[0],theano.compile.function_module.Function) assert isinstance(inst.dz['z'],theano.compile.function_module.Function) assert isinstance(inst.ttz[0][0],theano.compile.function_module.Function) assert isinstance(inst.y,theano.compile.function_module.Function) assert isinstance(inst.ly[0],theano.compile.function_module.Function) assert isinstance(inst.lly[0][0],theano.compile.function_module.Function) assert isinstance(inst.ty[0],theano.compile.function_module.Function) assert isinstance(inst.dy['y'],theano.compile.function_module.Function) assert isinstance(inst.tty[0][0],theano.compile.function_module.Function) assert m1.y is m1.ly[0] assert inst.y is inst.ly[0] assert inst.y is inst.lly[0][0] assert inst.y is inst.ty[0] assert inst.y is inst.tty[0][0] assert inst.y is inst.dy['y']
def test_method_in_list_or_dict(self): """Test that a Method which is only included via a list or dictionary is still treated as if it were a toplevel attribute Fred: why we don't do this of direct fct of variables? """ m1=Module() x=T.dscalar() m1.x=T.dscalar() m1.y=Method(x,x*2) m1.z=Method([],m1.x*2) m1.ly=[Method(x,x*2)] m1.lz=[Method([],m1.x*2)] m1.ty=(Method(x,x*2),) m1.tz=(Method([],m1.x*2),) m1.dy={'y':Method(x,x*2)} m1.dz={'z':Method([],m1.x*2)} m1.lly=[[Method(x,x*2)]] m1.llz=[[Method([],m1.x*2)]] m1.lty=[(Method(x,x*2),)] m1.ltz=[(Method([],m1.x*2),)] m1.ldy=[{'y':Method(x,x*2)}] m1.ldz=[{'z':Method([],m1.x*2)}] m1.tly=([Method(x,x*2)],) m1.tlz=([Method([],m1.x*2)],) m1.tty=((Method(x,x*2),),) m1.ttz=((Method([],m1.x*2),),) m1.tdy=({'y':Method(x,x*2)},) m1.tdz=({'z':Method([],m1.x*2)},) m1.dly={'y':[Method(x,x*2)]} m1.dlz={'z':[Method([],m1.x*2)]} m1.dty={'y':(Method(x,x*2),)} m1.dtz={'z':(Method([],m1.x*2),)} m1.ddy={'y':{'y':Method(x,x*2)}} m1.ddz={'z':{'z':Method([],m1.x*2)}} inst=m1.make() inst.x=1 assert inst.y(2)==4 assert inst.z()==2 assert inst.ly[0](2)==4 assert inst.lz[0]()==2 assert inst.ty[0](2)==4 assert inst.tz[0]()==2 assert inst.dy['y'](2)==4 assert inst.dz['z']()==2 for f in inst.lly[0][0], inst.lty[0][0], inst.ldy[0]['y'], inst.tly[0][0], inst.tty[0][0], inst.tdy[0]['y'], inst.dly['y'][0], inst.dty['y'][0], inst.ddy['y']['y']: assert f(2)==4 for f in inst.llz[0][0], inst.ltz[0][0], inst.ldz[0]['z'], inst.tlz[0][0], inst.ttz[0][0], inst.tdz[0]['z'], inst.dlz['z'][0], inst.dtz['z'][0], inst.ddz['z']['z']: assert f()==2 assert isinstance(inst.z,theano.compile.function_module.Function) assert isinstance(inst.y,theano.compile.function_module.Function) for f in inst.ly,inst.lz,inst.ty,inst.tz: assert isinstance(f[0],theano.compile.function_module.Function) for f in inst.lly,inst.llz,inst.lty,inst.ltz,inst.tly,inst.tlz,inst.tty,inst.ttz: assert isinstance(f[0][0],theano.compile.function_module.Function) for f in inst.dly['y'][0],inst.dty['y'][0], inst.dlz['z'][0],inst.dtz['z'][0], inst.ddy['y']['y'], inst.ddz['z']['z']: assert isinstance(f,theano.compile.function_module.Function)
def test_adding_1(self): import theano.tensor as T from theano import function x = T.dscalar('x') y = T.dscalar('y') z = x + y f = function([x, y], z) assert f(2, 3) == numpy.array(5.0) assert f(16.3, 12.1) == numpy.array(28.4)
def build_iterative_function(self): def states_dot(lambda_x): rho_x = rho(self.x) rho_h = rho(self.h) x_pressure = rho_prime(self.x) * (T.dot(rho_h, self.W1.T) + self.bx) x_pressure_final = lambda_x * self.x_data + (1 - lambda_x) * x_pressure x_dot = x_pressure_final - self.x h_pressure = rho_prime(self.h) * (T.dot(rho_x, self.W1) + self.bh) h_dot = h_pressure - self.h return [x_dot, h_dot] def params_delta(x_delta, h_delta): rho_x = rho(self.x) rho_h = rho(self.h) bx_delta = T.mean(x_delta, axis=0) W1_delta = (T.dot(x_delta.T, rho_h) + T.dot(rho_x.T, h_delta)) / self.x.shape[0] bh_delta = T.mean(h_delta, axis=0) return [bx_delta, W1_delta, bh_delta] lambda_x = T.dscalar('lambda_x') epsilon_x = T.dscalar('epsilon_x') epsilon_h = T.dscalar('epsilon_h') epsilon_W1 = T.dscalar('epsilon_W1') [x_dot, h_dot] = states_dot(lambda_x) x_delta = epsilon_x * x_dot h_delta = epsilon_h * h_dot [bx_delta, W1_delta, bh_delta] = params_delta(x_delta, h_delta) x_new = self.x + x_delta h_new = self.h + h_delta bx_new = self.bx + epsilon_W1 * bx_delta W1_new = self.W1 + epsilon_W1 * W1_delta bh_new = self.bh + epsilon_W1 * bh_delta updates = [(self.x,x_new), (self.h,h_new), (self.bx,bx_new), (self.W1,W1_new), (self.bh,bh_new)] norm_grad_x = T.sqrt( (x_dot ** 2).mean(axis=0).sum()) norm_grad_h = T.sqrt( (h_dot ** 2).mean(axis=0).sum()) iterative_function = theano.function( inputs=[lambda_x, epsilon_x, epsilon_h, epsilon_W1], outputs=[self.energy(), norm_grad_x, norm_grad_h, self.mse], updates=updates ) return iterative_function
def theg2(): w = T.dscalar('w') x = T.dscalar('x') y = T.dscalar('y') z = 7*w*x + y f = theano.function([w,x,y],z) dz = T.grad(z,x) g = theano.function([w,x,y],dz) ppth(dz,graph=True) return g
def theg2(): w = T.dscalar("w") x = T.dscalar("x") y = T.dscalar("y") z = 7 * w * x + y f = theano.function([w, x, y], z) dz = T.grad(z, x) g = theano.function([w, x, y], dz) # ppth(dz,graph=True) return g
def make_theano_evaluator(use_log): """This returns a function(!) that calculates the gradient and cost. Heh.""" X = T.dmatrix('X') triplets = T.imatrix('triplets') alpha = T.dscalar('alpha') lamb = T.dscalar('lambda') no_dims = T.iscalar('no_dims') N = T.iscalar('N') triplets_A = triplets[:,0] triplets_B = triplets[:,1] triplets_C = triplets[:,2] # Compute Student-t kernel. Look familiar? sum_X = T.sum(X**2, axis=1) a = -2 * (X.dot(X.T)) b = a + sum_X[np.newaxis,:] + sum_X[:,np.newaxis] K = (1 + b / alpha) ** ((alpha+1)/-2) # Compute value of cost function P = K[triplets_A,triplets_B] / ( K[triplets_A,triplets_B] + K[triplets_A,triplets_C]) if use_log: C = -T.sum(T.log(P)) + lamb * T.sum(X**2) else: C = -T.sum(P) + lamb * T.sum(X**2) # Compute gradient, for each dimension const = (alpha+1) / alpha dim = T.iscalar('dim') def each_dim(dim): if use_log: A_to_B = (1 - P) * K[triplets_A,triplets_B] * (X[triplets_A][:,dim] - X[triplets_B][:,dim]) B_to_C = (1 - P) * K[triplets_A,triplets_C] * (X[triplets_A][:,dim] - X[triplets_C][:,dim]) else: A_to_B = P*(1 - P) * K[triplets_A,triplets_B] * (X[triplets_A][:,dim] - X[triplets_B][:,dim]) B_to_C = P*(1 - P) * K[triplets_A,triplets_C] * (X[triplets_A][:,dim] - X[triplets_C][:,dim]) this_dim = (-const * T.stack(A_to_B - B_to_C, -A_to_B, B_to_C)).T dC = T.extra_ops.bincount(triplets.ravel(), weights=this_dim.ravel(), # minlength=N ) return -dC + 2*lamb*X[:,dim] # loop across all dimensions... theano loops are weird, yes... all_dims = (t.scan(each_dim, # non_sequences=N, sequences=T.arange(no_dims)) )[0].T return t.function([X,N,no_dims,triplets,lamb,alpha], [C, all_dims], on_unused_input='ignore')
def __init__(self): super(Accumulator, self).__init__() # don't forget this self.inc = T.dscalar() self.state = T.dscalar() self.new_state = self.inc + self.state self.add = Method(inputs = self.inc, outputs = self.new_state, updates = {self.state: self.new_state}) self.sub = Method(inputs = self.inc, outputs = None, updates = {self.state: self.state - self.inc})
def LQLEP_positiveV1( LQLEP_wPrior = Th.dscalar(), barrier = Th.dscalar(), v1 = Th.dvector(), **other): ''' The actual Linear-Quadratic-Exponential-Poisson log-likelihood, as a function of theta and M, with a barrier on the log-det term and a prior. ''' LQLEP_positiveV1 = LQLEP_wPrior + 0.00000001 * Th.sum(1/v1**2.) barrier_positiveV1 = 1-((1 - barrier) * (Th.min(v1.flatten())>=0)) other.update(locals()) return named( **other )
def main(argv): def formatter(prog): return argparse.HelpFormatter(prog, max_help_position=100, width=200) # Training labels, similarity matrix and weight of the regularization term f, R, mu, eps = T.dvector('f'), T.dtensor3('R'), T.dvector('mu'), T.dscalar('eps') sigma2 = T.dscalar('sigma2') # Indices of labeled examples l = T.ivector('l') f_star = propagate(f, l, R, mu, eps) ll = likelihood(f, l, R, mu, eps, sigma2) propagate_f = theano.function([f, l, R, mu, eps], f_star, on_unused_input='warn') likelihood_function = theano.function([f, l, R, mu, eps, sigma2], ll, on_unused_input='warn') ll_grad = T.grad(ll, [mu, eps, sigma2]) likelihood_gradient_function = theano.function([f, l, R, mu, eps, sigma2], ll_grad, on_unused_input='warn') nb_nodes = 64 R = np.zeros((nb_nodes, nb_nodes, 1)) even_edges = [(i, i + 2) for i in range(0, nb_nodes, 2) if (i + 2) < nb_nodes] odd_edges = [(i, i + 2) for i in range(1, nb_nodes, 2) if (i + 2) < nb_nodes] for source, target in even_edges + odd_edges: R[source, target, 0], R[target, source, 0] = 1.0, 1.0 mu = np.ones(1) eps = 1e-2 sigma2 = 1e-6 f = np.array([+ 1.0, - 1.0] + ([.0] * (nb_nodes - 2))) l = np.array(f != 0, dtype='int8') print(propagate_f(f, l, R, mu, eps)) learning_rate = 1e-2 for i in range(1024): ll_value = likelihood_function(f, l, R, mu, eps, sigma2) print('LL [%d]: %s' % (i, ll_value)) grad_value = likelihood_gradient_function(f, l, R, mu, eps, sigma2) mu += learning_rate * grad_value[0] eps += max(1e-6, learning_rate * grad_value[1]) sigma2 += max(1e-6, learning_rate * grad_value[2]) print('Mu: %s' % str(mu)) print('Eps: %s' % str(eps)) print('Sigma^2: %s' % str(sigma2))
def addTwoScalars(a, b): #define symbols ie variables #dscalar = floating-point scalar x = T.dscalar('x') y = T.dscalar('y') #operation we want to perform z = x + y #create function taking x and y as inputs and giving z as output #first arg is list of variables that will be provided as inputs #second arg is a single or a list of variables, it is what we want to see as output f = th.function([x, y], z) print(f(a, b))
def UV( U = Th.dmatrix('U') , V1 = Th.dmatrix('V1') , V2 = Th.dvector('V2') , STAs = Th.dmatrix('STAs'), STCs = Th.dtensor3('STCs'), centers= Th.dvector('centers'), indices = Th.dmatrix('indices'), lam=Th.dscalar('lam'), lambdas= Th.dvector('lambdas') , N_spikes = Th.dvector('N_spikes'), Ncones = Th.dscalar('Ncones'), **other): return [{'theta': Th.dot( U.T , V1[i,:] ) , 'M' : Th.dot( V1[i,:] * U.T , (V2 * U.T).T ), 'STA': STAs[i,:], 'STC': STCs[i,:,:], 'N_spike': N_spikes[i]/(Th.sum(N_spikes)) , 'U' : U, 'logprior': - Th.sum( Th.sqrt(Th.sum(V1**2.,axis=0) + 0.000001) * lambdas) } for i in range(N)]
def make_loss(Model, l1=0., l2=0.): L, y = T.ivector('L'), T.dvector('y') mu, eps = T.dscalar('mu'), T.dscalar('eps') R, eta = T.dtensor3('R'), T.dvector('eta') loss = Model.loss_symbolic(L, y, mu, R, eta, eps) L1 = abs(mu) + T.sum(abs(eta)) + abs(eps) L2 = mu ** 2 + T.sum(eta ** 2) + eps ** 2 regularized_loss = loss + l1 * L1 + l2 * L2 return theano.function([L, y, mu, R, eta, eps], regularized_loss)
# What's the relationship between the test/training data and the minibatch size? # Does the batch size have to "fit" exactly into the dataset sizes? # Why are we also iterating over the test data? # # # ## From Theano intro tutorial # In[1]: import theano from theano import tensoror # In[7]: a = tensor.dscalar("a") b = tensor.dscalar("b") # In[8]: c = a + b f = theano.function([a, b], c) # In[4]: assert 4 == f(1.5, 2.5) # In[9]: theano.pp(c)
import theano from theano import tensor #declare two symbolic floating-point scalars a = tensor.dscalar() b = tensor.dscalar() #create simple symbolic expression c = a + b #convert expression into callable object that takes (a,b) and computes c f = theano.function([a,b], c) result = f(1.5, 2.5) print(result)
# http://deeplearning.net/software/theano/tutorial/adding.html import numpy as np import theano.tensor as T from theano import function x = T.dscalar('x') y = T.dscalar('y') z = x + y f = function([x, y], z) f(2, 3) z.eval({x: 2, y: 3}) x = T.dmatrix('x') y = T.dmatrix('y') z = x + y f = function([x, y], z) f([[1, 2], [3, 4]], [[10, 20], [30, 40]]) f(np.array([[1, 2], [3, 4]]), np.array([[10, 20], [30, 40]])) n = 10 m = 20 X = T.arange(n * m).reshape((n, m)) u = T.arange(0, n * m, m).reshape((n, 1)) r = X - u r.eval() a = T.vector() b = T.vector() out = a**2 + b**2 + 2 * a * b
def test_too_big_rank(self): x = tensor.dscalar() y = x.dimshuffle(('x', ) * (numpy.MAXDIMS + 1)) self.assertRaises(ValueError, y.eval, {x: 0})
def create_neural_network(modelParams, featureParams, trainParams, trainDataParams): # Number of class unit numClassUnit = 1 # Define Theano variables inputVar = T.TensorType('floatX', ((False, ) * 5))() targetVar = T.ivector('targetVar') trainWeights = T.dvector('trainWeights') l2Adaptive = T.dscalar('l2Adaptive') # create neural network model with given parameters network, inputVar = create_neural_network_model(modelParams, featureParams, inputVar, numClassUnit) # Train icin loss fonksiyonu tanimlaniyor prediction = lasagne.layers.get_output(network) if numClassUnit == 1: trainLoss = lasagne.objectives.binary_crossentropy( prediction, targetVar) trainAcc = lasagne.objectives.binary_accuracy(prediction, targetVar, threshold=0.5) trainAcc = T.mean(trainAcc, dtype=theano.config.floatX) else: trainLoss = lasagne.objectives.categorical_crossentropy( prediction, targetVar) trainAcc = T.mean(T.eq(T.argmax(prediction, axis=1), targetVar), dtype=theano.config.floatX) # Loss fonksiyonun veriseti underSample yapilmadiysa agirlikli olarak, yapildiysa basit olarak ortalamasi aliniyor if trainDataParams.underSample == False: # ornek oranina gore agirliklandirma # interictal=0, preictal=1 weights_per_label = theano.shared( lasagne.utils.floatX(trainDataParams.dataRatio)) weights = weights_per_label[targetVar] trainLoss = lasagne.objectives.aggregate(trainLoss, weights=weights) else: if trainParams.onlineWeights: trainLoss = lasagne.objectives.aggregate(trainLoss, weights=trainWeights) else: trainLoss = trainLoss.mean() # regularization if trainParams.l1: trainLoss += regularization.regularize_network_params( network, regularization.l1) * trainParams.l1 if trainParams.adaptiveL2["active"]: trainLoss += regularization.regularize_network_params( network, regularization.l2) * l2Adaptive elif trainParams.l2: trainLoss += regularization.regularize_network_params( network, regularization.l2) * trainParams.l2 # Parametreleri update edecek foknsiyon params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.adam(trainLoss, params, learning_rate=trainParams.learnRate) # Validation ve test icin loss fonksiyonu tanimlaniyor testPrediction = lasagne.layers.get_output(network, deterministic=True) if numClassUnit == 1: testLoss = lasagne.objectives.binary_crossentropy( testPrediction, targetVar) testLoss = testLoss.mean() testAcc = lasagne.objectives.binary_accuracy(testPrediction, targetVar, threshold=0.5) testAcc = T.mean(testAcc, dtype=theano.config.floatX) else: testLoss = lasagne.objectives.categorical_crossentropy( testPrediction, targetVar) testLoss = testLoss.mean() testAcc = T.mean(T.eq(T.argmax(testPrediction, axis=1), targetVar), dtype=theano.config.floatX) # Train fonksiyonu compile ediliyor trainFn = theano.function([inputVar, targetVar, trainWeights, l2Adaptive], [trainLoss, trainAcc], updates=updates, on_unused_input='ignore') # Validation fonksiyonu compile ediliyor valFn = theano.function([inputVar, targetVar], [testLoss, testAcc, testPrediction]) # Test fonksiyonu compile ediliyor testFn = theano.function([inputVar, targetVar], [testLoss, testAcc, testPrediction]) return network, trainFn, valFn, testFn
def define_model(layers): #-------------------------------------------------------------------- # # Parameters initialization # #-------------------------------------------------------------------- W1 = theano.shared(np.random.randn(layers[1], layers[0])*0.1) W2 = theano.shared(np.random.randn(layers[2], layers[1])*0.1) W3 = theano.shared(np.random.randn(layers[3], layers[2])*0.1) b1 = theano.shared(0.) b2 = theano.shared(0.) b3 = theano.shared(0.) #-------------------------------------------------------------------- # #Forward propagation # #-------------------------------------------------------------------- A0 = T.dmatrix('A0') Z1 = T.dot(W1, A0) + b1 A1 = T.nnet.relu(Z1) Z2 = T.dot(W2, A1) + b2 A2 = T.nnet.relu(Z2) Z3 = T.dot(W3, A2) + b3 A3 = T.nnet.sigmoid(Z3) #-------------------------------------------------------------------- # #Cost function - cross entropy # #-------------------------------------------------------------------- labels = T.dmatrix('labels') cost = -1/A0.shape[1]*(labels*T.log(A3) + (1-labels)*T.log(1-A3)).sum() #-------------------------------------------------------------------- # #Gradient descent # #-------------------------------------------------------------------- dW1, dW2, dW3, db1, db2, db3 = T.grad(cost, [W1, W2, W3, b1, b2, b3]) alpha = T.dscalar('alpha') #-------------------------------------------------------------------- # #Update parameters # #-------------------------------------------------------------------- return theano.function( inputs=[A0, labels, alpha], outputs=[cost, W1, W2, W3, b1, b2, b3], updates=[ [W1, W1 - alpha*dW1], [W2, W2 - alpha*dW2], [W3, W3 - alpha*dW3], [b1, b1 - alpha*db1], [b2, b2 - alpha*db2], [b3, b3 - alpha*db3] ])
def __init__(self,in_size=66*37, hidden_size = [500, 500, 250], out_size = 66*37, batch_size = 100, corruption_levels=[0.1, 0.1, 0.1],dropout=True,dropout_rates=[0.1,0.1,0.1]): self.i_width = 37 self.i_height = 66 self.i_size = in_size self.h_sizes = hidden_size self.o_size = out_size self.batch_size = batch_size self.corruption_levels = corruption_levels self.n_layers = len(hidden_size) self.sa_layers = [] self.sa_activations_train = [] self.sa_activations_test = [] self.thetas = [] self.thetas_as_blocks = [] self.dropout = dropout self.drop_rates = dropout_rates self.cost_fn_names = ['sqr_err', 'neg_log'] self.x = T.matrix('x') self.fine_cost = T.dscalar('fine_cost') self.error = T.dscalar('test_error') print "Network Info:" print "Layers: %i" %self.n_layers print "Layer sizes: ", print self.h_sizes print "" print "Building the model..." for i in xrange(self.n_layers): if i==0: curr_input_size = self.i_size else: curr_input_size = self.h_sizes[i-1] #if i==0 input is the raw input if i==0: curr_input_train = self.x curr_input_test = self.x #otherwise input is the previous layer's hidden activation else: a2_train = self.sa_layers[-1].get_hidden_act(training=True) a2_test = self.sa_layers[-1].get_hidden_act(training=False) self.sa_activations_train.append(a2_train) self.sa_activations_test.append(a2_test) curr_input_train = self.sa_activations_train[-1] curr_input_test = self.sa_activations_test[-1] sa = SparseAutoencoder(n_inputs=curr_input_size, n_hidden=self.h_sizes[i], x_train=curr_input_train, x_test=curr_input_test, dropout=dropout, dropout_rate=self.drop_rates[i]) self.sa_layers.append(sa) self.thetas.extend(self.sa_layers[-1].get_params()) self.thetas_as_blocks.append(self.sa_layers[-1].get_params()) #-1 index gives the last element a2_train = self.sa_layers[-1].get_hidden_act(training=True) a2_test = self.sa_layers[-1].get_hidden_act(training=False) self.sa_activations_train.append(a2_train) self.sa_activations_test.append(a2_test) self.out_sa = ReconstructionLayer(n_inputs=self.h_sizes[-1], n_outputs=self.o_size, x_train=self.sa_activations_train[-1], x_test = self.sa_activations_test[-1], dropout=self.dropout,dropout_rate=self.drop_rates[-1]) self.out_sa_out = self.out_sa.get_hidden_act(training=False) self.lam_fine_tune = T.scalar('lam') self.fine_cost = self.out_sa.get_finetune_cost(self.x) self.thetas.extend(self.out_sa.get_params()) #measure test performance self.error = self.out_sa.get_error(self.x)
#print entityVocab inveventVocab = {v: k for k, v in eventVocab.items()} invWordVocab = {v: k for k, v in wordVocab.items()} inventityVocab = {v: k for k, v in entityVocab.items()} eventCount = 0 actualCount = 0 L1_reg = 0.001 L2_reg = 0.0001 learning_rate = 0.01 nepochs = 50 globalScore = 0 dropouttrain = np.asarray(0.2, dtype=theano.config.floatX) dropouttest = np.asarray(0.0, dtype=theano.config.floatX) x1 = T.ivector('x1') x2 = T.ivector('x2') x3 = T.dscalar('x3') y = T.ivector('y') rng = numpy.random.RandomState(1234) model = MyTriggerModel(rng=rng, wordVocab=wordVocab, entityVocab=entityVocab, eventVocab=eventVocab, embSizeWord=200, embSizeEntity=50, RnnHiddenDim=250, FFhiddLayerDim=150, input1=x1, input2=x2, dropout=x3) cost = (model.negative_log_likelihood(y) + L1_reg * model.L1 + L2_reg * model.L2_sqr)
def __init__(self, f, x_inputs, u_inputs, t=None, hessians=False, **kwargs): """Constructs an AutoDiffDynamics model. Args: f: Vector Theano tensor expression. x_inputs: Theano state input variables. u_inputs: Theano action input variables. t: Theano tensor time variable. hessians: Evaluate the dynamic model's second order derivatives. Default: only use first order derivatives. (i.e. iLQR instead of DDP). **kwargs: Additional keyword-arguments to pass to `theano.function()`. """ self._t = T.dscalar("t") if t is None else t self._x_inputs = x_inputs self._u_inputs = u_inputs non_t_inputs = np.hstack([x_inputs, u_inputs]).tolist() inputs = np.hstack([x_inputs, u_inputs, self._t]).tolist() x_dim = len(x_inputs) u_dim = len(u_inputs) self._state_size = x_dim self._action_size = u_dim self._J = jacobian_vector(f, non_t_inputs) self._f = as_function(f, inputs, name="f", **kwargs) self._f_x = as_function(self._J[:, :x_dim], inputs, name="f_x", **kwargs) self._f_u = as_function(self._J[:, x_dim:], inputs, name="f_u", **kwargs) self._has_hessians = hessians if hessians: self._Q = hessian_vector(f, non_t_inputs) self._f_xx = as_function(self._Q[:, :x_dim, :x_dim], inputs, name="f_xx", **kwargs) self._f_ux = as_function(self._Q[:, x_dim:, :x_dim], inputs, name="f_ux", **kwargs) self._f_uu = as_function(self._Q[:, x_dim:, x_dim:], inputs, name="f_uu", **kwargs) super(AutoDiffDynamics, self).__init__()
def __init__(self, We_initial, words, layersize, embed_size, rel, batchsize, Rel_init, LC, eta, margin, usepeep, fin,initiallization,relsize,activation,activation2): self.LC = LC self.margin = margin self.memsize = embed_size self.usepeep = usepeep self.batchsize = batchsize self.words = words self.rel = rel self.Rel = theano.shared(Rel_init).astype(theano.config.floatX) self.we = theano.shared(We_initial).astype(theano.config.floatX) self.fin=fin self.initiallization=initiallization self.relsize=relsize self.activation=activation self.activation2=activation2 #symbolic params pTuple=T.imatrix();neTuple1=T.imatrix();neTuple2=T.imatrix() pTupleMask=T.matrix();neTuple1Mask=T.matrix(); neTuple2Mask=T.matrix() self.eta = T.dscalar() self.lam = T.dscalar() g1len=T.iscalar();g2len=T.iscalar(); p1len=T.iscalar(); p2len=T.iscalar(); poolSize=T.iscalar() poolSize=1 We0 = T.dmatrix() #get embeddings l_in = lasagne.layers.InputLayer((None, None, 1)) l_mask = lasagne.layers.InputLayer(shape=(None, None)) # l_rel = lasagne.layers.InputLayer(shape = (None, None)) l_emb = lasagne.layers.EmbeddingLayer(l_in, input_size=self.we.get_value().shape[0], output_size=self.we.get_value().shape[1], W=self.we) l_lstm1 = lasagne.layers.LSTMLayer(l_emb, layersize, peepholes=True, grad_clipping=100, mask_input = l_mask) l_lstm2 = lasagne.layers.LSTMLayer(l_emb, layersize, peepholes=True, grad_clipping=100, mask_input = l_mask, backwards=True) l_lstm3 = lasagne.layers.ConcatLayer([l_lstm1, l_lstm2], axis=2) layer_poolingPo=lasagne.layers.get_output(l_lstm3,{l_in:pTuple, l_mask:pTupleMask}) layer_poolingNe1=lasagne.layers.get_output(l_lstm3,{l_in:neTuple1, l_mask:neTuple1Mask}) layer_poolingNe2=lasagne.layers.get_output(l_lstm3,{l_in:neTuple2, l_mask:neTuple2Mask}) l_poolingPo=layer_poolingPo.max(axis=1) l_poolingNe1=layer_poolingNe1.max(axis=1) l_poolingNe2=layer_poolingNe2.max(axis=1) embPo = l_poolingPo.reshape([-1,self.memsize]) embNe1 = l_poolingNe1.reshape([-1,self.memsize]) embNe2 = l_poolingNe2.reshape([-1,self.memsize]) ############################################################# r=T.ivector() p3=T.ivector() r0=self.Rel[r] r1=self.Rel[p3] input_vec = T.concatenate([embPo, r0], axis = 1) input_vec_neg = T.concatenate([embNe1,r0],axis = 1) input_vec_neg1 = T.concatenate([embNe2,r0], axis = 1) input_vec_neg2 = T.concatenate([embPo,r1], axis = 1) ar=T.dmatrix() l_in1 = lasagne.layers.InputLayer(shape=(None,(self.memsize)+self.relsize)) if(self.activation=='none'): denseLayer1=lasagne.layers.DenseLayer(l_in1,num_units=600,W=lasagne.init.Normal(),nonlinearity=None) if(self.activation=='sigmoid'): denseLayer1=lasagne.layers.DenseLayer(l_in1,num_units=600,W=lasagne.init.Normal(),nonlinearity=lasagne.nonlinearities.sigmoid) if(self.activation=='rectify'): denseLayer1=lasagne.layers.DenseLayer(l_in1,num_units=600,W=lasagne.init.Normal(),nonlinearity=lasagne.nonlinearities.rectify) if(self.activation2=='sigmoid'): denseLayer=lasagne.layers.DenseLayer(denseLayer1,num_units=1,W=lasagne.init.Normal(), nonlinearity=lasagne.nonlinearities.sigmoid) if(self.activation2=='rectify'): denseLayer=lasagne.layers.DenseLayer(denseLayer1,num_units=1,W=lasagne.init.Normal(), nonlinearity=lasagne.nonlinearities.rectify) score=get_output(denseLayer,{l_in1:input_vec}) score_ne=get_output(denseLayer,{l_in1:input_vec_neg}) score_ne1=get_output(denseLayer,{l_in1:input_vec_neg1}) score_ne2=get_output(denseLayer,{l_in1:input_vec_neg2}) score2=get_output(denseLayer,{l_in1:ar}) s1 = 1-score[0][0]+score_ne[0][0] costpg = s1 *(T.gt(s1 , 0)) s2=1-score[0][0]+score_ne1[0][0] costpg=costpg+ s2*(T.gt(s2,0)) s3=1-score[0][0]+score_ne2[0][0] costpg=costpg+s3 * (T.gt(s3,0)) network_params1 = lasagne.layers.get_all_params([denseLayer,l_lstm3,l_lstm2,l_lstm1],trainable=True) self.all_params = network_params1 self.all_params.append(self.Rel) self.all_params.append(self.we) print self.all_params self.feedforward_function = theano.function([pTuple,pTupleMask], embPo) l2_penalty1=lasagne.regularization.apply_penalty(network_params1,l2) cost_new = (1000*costpg) +(self.LC * l2_penalty1) updates = lasagne.updates.adagrad(cost_new, self.all_params, eta) self.output_evaluation=theano.function(inputs=[ar],outputs=score2) self.train_function1 = theano.function(inputs = [r,p3,pTuple,pTupleMask,neTuple1,neTuple1Mask,neTuple2,neTuple2Mask], outputs = [cost_new,costpg], updates=updates, on_unused_input='warn')
def __init__(self, Q, D, N, M): try: print('Trying to load model...') with open('model_SV2.save', 'rb') as file_handle: self.f, self.g = pickle.load(file_handle) print('Loaded!') return except: print('Failed. Creating a new model...') print('Setting up variables...') hyp, S, MU, SIGMA, U, X = T.dmatrices('hyp', 'S', 'MU', 'SIGMA', 'U', 'X') b = T.dvector('b') sn = T.dscalar('sn') sf = T.dscalar('sf') SIGMA_trf = T.log(1 + T.exp(SIGMA))**2 sf_trf, sn_trf, lengthscale_trf, lengthscale_p_trf = T.log( 1 + T.exp(sf))**2, T.log(1 + T.exp(sn))**2, T.log( 1 + T.exp(hyp[:, 0])), T.log(1 + T.exp(hyp[:, 1])) print('Setting up model...') LL, KL = self.get_model(lengthscale_trf, lengthscale_p_trf, sn_trf, sf_trf, S, MU, SIGMA_trf, U, b, X, Q, D, N, M) print('Compiling model...') inputs = { 'X': X, 'MU': MU, 'SIGMA': SIGMA, 'S': S, 'U': U, 'b': b, 'hyp': hyp, 'sn': sn, 'sf': sf } z = 0.0 * sum([ T.sum(v) for v in inputs.values() ]) # solve a bug with derivative wrt inputs not in the graph f = {'LL': LL, 'KL': KL} self.f = { fn: theano.function(list(inputs.values()), fv + z, name=fn, on_unused_input='ignore') for fn, fv in f.items() } g = {'LL': LL, 'KL': KL} wrt = { 'MU': MU, 'SIGMA': SIGMA, 'S': S, 'U': U, 'b': b, 'hyp': hyp, 'sn': sn, 'sf': sf } self.g = { vn: { gn: theano.function(list(inputs.values()), T.grad(gv + z, vv), name='d' + gn + '_d' + vn, on_unused_input='ignore') for gn, gv in g.items() } for vn, vv in wrt.items() } with open('model_SV2.save', 'wb') as file_handle: print('Saving model...') sys.setrecursionlimit(100000) pickle.dump([self.f, self.g], file_handle, protocol=pickle.HIGHEST_PROTOCOL)
def __init__(self, l, l_terminal, x_inputs, u_inputs, i=None, **kwargs): """Constructs an AutoDiffCost. Args: l: Vector Theano tensor expression for instantaneous cost. This needs to be a function of x and u and must return a scalar. l_terminal: Vector Theano tensor expression for terminal cost. This needs to be a function of x only and must retunr a scalar. x_inputs: Theano state input variables [state_size]. u_inputs: Theano action input variables [action_size]. i: Theano tensor time step variable. **kwargs: Additional keyword-arguments to pass to `theano.function()`. """ self._i = T.dscalar("i") if i is None else i self._x_inputs = x_inputs self._u_inputs = u_inputs non_t_inputs = np.hstack([x_inputs, u_inputs]).tolist() inputs = np.hstack([x_inputs, u_inputs, self._i]).tolist() terminal_inputs = np.hstack([x_inputs, self._i]).tolist() x_dim = len(x_inputs) u_dim = len(u_inputs) self._J = jacobian_scalar(l, non_t_inputs) self._Q = hessian_scalar(l, non_t_inputs) self._l = as_function(l, inputs, name="l", **kwargs) self._l_x = as_function(self._J[:x_dim], inputs, name="l_x", **kwargs) self._l_u = as_function(self._J[x_dim:], inputs, name="l_u", **kwargs) self._l_xx = as_function(self._Q[:x_dim, :x_dim], inputs, name="l_xx", **kwargs) self._l_ux = as_function(self._Q[x_dim:, :x_dim], inputs, name="l_ux", **kwargs) self._l_uu = as_function(self._Q[x_dim:, x_dim:], inputs, name="l_uu", **kwargs) # Terminal cost only depends on x, so we only need to evaluate the x # partial derivatives. self._J_terminal = jacobian_scalar(l_terminal, x_inputs) self._Q_terminal = hessian_scalar(l_terminal, x_inputs) self._l_terminal = as_function(l_terminal, terminal_inputs, name="l_term", **kwargs) self._l_x_terminal = as_function(self._J_terminal[:x_dim], terminal_inputs, name="l_term_x", **kwargs) self._l_xx_terminal = as_function(self._Q_terminal[:x_dim, :x_dim], terminal_inputs, name="l_term_xx", **kwargs) super(AutoDiffCost, self).__init__()
# 1) double-lined RV solution w/ residuals # rvAa(t) # rvAb(t) # correct the data from each RV instrument based on offset # one draw only, since P and offsets don't scatter well in this space # 2) plot of X,Y (Ab rel Aa) # multiple posterior draws # 4) plot of X,Y (B rel A), with increasing RV_B orbits highlighted t = tt.vector("times") mparallax = tt.dscalar("mparallax") parallax = 1e-3 * mparallax # arcsec a_ang_inner = tt.dscalar("a_ang_inner") # milliarcsec # the semi-major axis in au a_inner = 1e-3 * a_ang_inner / parallax # au logP_inner = tt.dscalar("logP_inner") # days P_inner = tt.exp(logP_inner) e_inner = tt.dscalar("e_inner") omega_inner = tt.dscalar("omega_inner") # omega_Aa Omega_inner = tt.dscalar("Omega_inner")
def theano_setup(self): # The matrices Wb and Wc were originally tied. # Because of that, I decided to keep Wb and Wc with # the same shape (instead of being transposed) to # avoid disturbing the code as much as possible. Wb = T.dmatrix('Wb') Wc = T.dmatrix('Wc') b = T.dvector('b') c = T.dvector('c') s = T.dscalar('s') x = T.dmatrix('x') h_act = T.dot(x, Wc) + c if self.act_func[0] == 'tanh': h = T.tanh(h_act) elif self.act_func[0] == 'sigmoid': h = T.nnet.sigmoid(h_act) elif self.act_func[0] == 'id': # bad idae h = h_act else: raise ("Invalid act_func[0]") r_act = T.dot(h, Wb.T) + b if self.act_func[1] == 'tanh': r = s * T.tanh(r_act) elif self.act_func[1] == 'sigmoid': r = s * T.nnet.sigmoid(r_act) elif self.act_func[1] == 'id': r = s * r_act else: raise ("Invalid act_func[1]") # Another variable to be able to call a function # with a noisy x and compare it to a reference x. y = T.dmatrix('y') loss = ((r - y)**2) sum_loss = T.sum(loss) # theano_encode_decode : vectorial function in argument X. # theano_loss : vectorial function in argument X. # theano_gradients : returns triplet of gradients, each of # which involves the all data X summed # so it's not a "vectorial" function. self.theano_encode_decode = function([Wb, Wc, b, c, s, x], r) self.theano_loss = function([Wb, Wc, b, c, s, x, y], loss) self.theano_gradients = function([Wb, Wc, b, c, s, x, y], [ T.grad(sum_loss, Wb), T.grad(sum_loss, Wc), T.grad(sum_loss, b), T.grad(sum_loss, c), T.grad(sum_loss, s) ]) # other useful theano functions for the experiments that involve # adding noise to the hidden states self.theano_encode = function([Wc, c, x], h) self.theano_decode = function([Wb, b, s, h], r)
def theanoScaVecDiv(In1, In2): var2 = T.dvector('var2') var1 = T.dscalar('var1') var3 = T.div_proxy(var1, var2) DivVec = function([var1, var2], var3) return DivVec(In1, In2)
output2 = fvec(in1, in2) print('Output2:', output2) # Evaluate expression multiple times print('Evaluate expression multiple times') x = T.iscalar('x') sh = shared(0) f = function([x], sh**2, updates=[(sh, sh + x)]) input = 1 for i in range(3): print('x=1: output3, sh:', f(input), sh.get_value()) # *Theano functions # Function returns multiple values print('Function returns multiple values') a = T.dscalar('a') f = function([a], [a**2, a**3]) print(f(3)) # Function returns the gradient print('Function returns gradient') x = T.dscalar('a') y = x**4 dy = T.grad(y, x) f = function([x], dy) print(f(3)) #from theano import pp #pretty-print #print(pp(qy)) # *Single neuron - Feed forward
def __init__(self, f, state_size, action_size, **kwargs): """Constructs an BatchAutoDiffCost. Args: f: Symbolic function with the following signature: Args: x: Batch of state variables. u: Batch of action variables. i: Batch of time step variables. terminal: Whether to compute the terminal cost instead. Returns: f: Batch of instantaneous costs. **kwargs: Additional keyword-arguments to pass to `theano.function()`. """ self._fn = f self._state_size = state_size self._action_size = action_size # Prepare inputs. self._x = x = T.dvector("x") self._u = u = T.dvector("u") self._i = i = T.dscalar("i") inputs = [self._x, self._u, self._i] inputs_term = [self._x, self._i] x_rep_x = T.tile(x, (state_size, 1)) u_rep_x = T.tile(u, (state_size, 1)) i_rep_x = T.tile(i, (state_size, 1)) x_rep_u = T.tile(x, (action_size, 1)) u_rep_u = T.tile(u, (action_size, 1)) i_rep_u = T.tile(i, (action_size, 1)) x_rep_1 = T.tile(x, (1, 1)) u_rep_1 = T.tile(u, (1, 1)) i_rep_1 = T.tile(i, (1, 1)) l_tensor = f(x_rep_1, u_rep_1, i_rep_1, terminal=False)[0] J_x, J_u = T.grad(l_tensor, [x, u], disconnected_inputs="ignore") # Compute the hessians in batches. l_tensor_rep_x = f(x_rep_x, u_rep_x, i_rep_x, terminal=False) l_tensor_rep_u = f(x_rep_u, u_rep_u, i_rep_u, terminal=False) J_x_rep = T.grad(cost=None, wrt=x_rep_x, known_grads={ l_tensor_rep_x: T.ones(state_size), }, disconnected_inputs="ignore") J_u_rep = T.grad(cost=None, wrt=u_rep_u, known_grads={ l_tensor_rep_u: T.ones(action_size), }, disconnected_inputs="ignore") Q_xx = T.grad(cost=None, wrt=x_rep_x, known_grads={ J_x_rep: T.eye(state_size), }, disconnected_inputs="ignore") Q_ux = T.grad(cost=None, wrt=x_rep_u, known_grads={ J_u_rep: T.eye(action_size), }, disconnected_inputs="ignore") Q_uu = T.grad(cost=None, wrt=u_rep_u, known_grads={ J_u_rep: T.eye(action_size), }, disconnected_inputs="warn") # Terminal cost only depends on x, so we only need to evaluate the x # partial derivatives. l_tensor_term = f(x_rep_1, None, i, terminal=True)[0] J_x_term, _ = T.grad(l_tensor_term, inputs_term, disconnected_inputs="ignore") l_tensor_rep_term = f(x_rep_x, None, i_rep_x, terminal=True) J_x_rep_term = T.grad(cost=None, wrt=x_rep_x, known_grads={ l_tensor_rep_term: T.ones_like(l_tensor_rep_term), }, disconnected_inputs="ignore") Q_xx_term = T.grad(cost=None, wrt=x_rep_x, known_grads={ J_x_rep_term: T.eye(state_size), }, disconnected_inputs="ignore") # Compile all functions. self._l = as_function(l_tensor, inputs, name="l", **kwargs) self._l_x = as_function(J_x, inputs, name="l_x", **kwargs) self._l_u = as_function(J_u, inputs, name="l_u", **kwargs) self._l_xx = as_function(Q_xx, inputs, name="l_xx", **kwargs) self._l_ux = as_function(Q_ux, inputs, name="l_ux", **kwargs) self._l_uu = as_function(Q_uu, inputs, name="l_uu", **kwargs) self._l_term = as_function(l_tensor_term, inputs_term, name="l_term", **kwargs) self._l_x_term = as_function(J_x_term, inputs_term, name="l_x_term", **kwargs) self._l_xx_term = as_function(Q_xx_term, inputs_term, name="l_xx_term", **kwargs) super(BatchAutoDiffCost, self).__init__()
def theanoScaMatMul(In1, In2): var2 = T.dmatrix('var2') var1 = T.dscalar('var1') var3 = T.mul(var1, var2) Mul = function([var1, var2], var3) return Mul(In1, In2)
import theano import theano.tensor as T import theano.tensor.nnet as nnet import numpy as np x = T.dscalar() y = T.exp(T.sin(x**2)) print type(y) f = theano.function([x], y) print f(2.3) fp = T.grad(y, wrt=x) fnew = theano.function([x], fp) print fnew(2.3) #Simple tensors
import theano import numpy as np import src.specsiser as sr import theano.tensor as T import pyneb as pn theano.config.on_unused_input = 'ignore' print('\nTheano example') x, y = T.dscalar('x'), T.dscalar('y') z = x + y print('z = ', z.eval({x: 16.3, y: 12.1})) print('\nFlux computation using numpy arrays') label_list = ['O3_5007A', 'S3_6312A', 'H1_6563A', 'He1_5876A'] ion_list = ['O3', 'S3', 'H1r', 'He1r'] emtt = sr.EmissionTensors(label_list, ion_list) emis_ratio, cHbeta, flambda, abund, ftau = 0.352, 0.12, 0.2, 7.8, 0.0 params = dict(emis_ratio=emis_ratio, cHbeta=cHbeta, flambda=flambda, abund=abund, ftau=ftau) flux_i = emtt.emFluxEqDict['O3_5007A'](emis_ratio, cHbeta, flambda, abund, ftau, {}) print('Flux_i', flux_i) print('\nFlux computation using theano graphs') emis_ratio_t, cHbeta_t, flambda_t, abund_t, ftau_t = T.dscalars(
def set_sigma(s): __sigma.set_value(s * s) def __hessian(cost, variables): hessians = [] for input1 in variables: d_cost_d_input1 = T.grad(cost, input1) hessians.append( [T.grad(d_cost_d_input1, input2) for input2 in variables]) return hessians __theta = T.dscalar("theta") __phi = T.dscalar("phi") ### Normalized direction vector __n_x = T.sin(__theta) __n_y = T.cos(__theta) * T.sin(__phi) __n_z = T.cos(__theta) * T.cos(__phi) __z0 = theano.shared(0.0, 'z0', allow_downcast=True) def set_z0(z0): __z0.set_value(z0) _n = [__n_x, __n_y, __n_z]
def theanoMatScaDiv(In1, In2): var1 = T.dmatrix('var1') var2 = T.dscalar('var2') var3 = T.div_proxy(var1, var2) Div = function([var1, var2], var3) return Div(In1, In2)
N = 2000.0 p1 = 0.1 p0 = 1/(1+k*x2/N) q = x0*p0/(x0+x1) qhat = (x0*(1-p0)+x1*p1)/((x0+x1)*(1-q)) x0n = np.random.binomial(N,q) x1n = np.random.binomial(N-x0n,qhat) x2n = N-x0n-x1n return x0n, x1n, x2n xStart = T.dvector('xStart') i = T.iscalar('i') U1=T.dmatrix('U1') U2=T.dmatrix('U2') k = T.dscalar('k') results, updates = th.scan(fn=fisher_wright, outputs_info=[{'initial':xStart,'taps':[-1]}],sequences=[U1,U2],non_sequences=k, n_steps=i) v1 = T.dvector('v1') v2 = T.dvector('v2') resultsNA, updatesNA = th.scan(fn=fisher_wright_normal_approx, outputs_info=[{'initial':xStart,'taps':[-1]}],sequences=[v1,v2],non_sequences=k, n_steps=i) f=th.function(inputs=[i, xStart, U1, U2, k],outputs=results,updates=updates) fNA=th.function(inputs=[i, xStart, v1, v2, k],outputs=resultsNA,updates=updatesNA) N_fw = 2000000 ir=100.0 U1r = np.random.uniform(0, 1, (ir, N_fw)) U2r = np.random.uniform(0, 1, (ir, N_fw)) v1r = np.random.normal(0,1,ir) v2r = np.random.normal(0,1,ir)
#rbf = function([x, y, gamma], rbf_) # #st1 = time.time() #rbf_mat1 = np.zeros((X_.shape[0], X_.shape[0])) #for i, x_ in enumerate(X_): # for j, y_ in enumerate(X_): # rbf_mat1[i, j] = rbf(x_, y_, gamma_) # pass # pass # #et1 = time.time() #print "elapsed time (Partial-Theano): %f [s]" % (et1 - st1) # All theano st2 = time.time() X = T.dmatrix('X') gamma = T.dscalar("gamma") distmat = T.sum((T.reshape(X, (X.shape[0], 1, X.shape[1])) - X)**2, 2) rbf_mat = function([X, gamma], T.exp(-gamma * distmat)) rbf_mat2 = rbf_mat(X_, gamma_) et2 = time.time() print "elapsed time (All-Theano): %f [s]" % (et2 - st2) result[n] = (et0 - st0, et2 - st2) #print "sanity check" #print rbf_mat0[0, :] #print rbf_mat1[0, :] #print rbf_mat2[0, :]
import theano.tensor as T import theano as t x = T.dvector('x') b = T.dscalar('b') w = T.dscalar('w') y = w * x + b f1 = t.function([x, b, w], y) print("value after linaer mapping:{}".format(f1([1], 2, 3)))
def __init__(self, in_size=28**2, hidden_size=[500, 500, 250], out_size=10, batch_size=100, corruption_levels=[0.1, 0.1, 0.1], dropout=True, drop_rates=[0.5, 0.2, 0.2]): self.i_size = in_size self.h_sizes = hidden_size self.o_size = out_size self.batch_size = batch_size self.n_layers = len(hidden_size) self.sa_layers = [] self.sa_activations_train = [] self.sa_activations_test = [] self.thetas = [] self.thetas_as_blocks = [] self.dropout = dropout self.drop_rates = drop_rates #check if there are layer_count+1 number of dropout rates (extra one for softmax) if dropout: assert self.n_layers + 1 == len(self.drop_rates) self.corruption_levels = corruption_levels #check if there are layer_count number of corruption levels if denoising: assert self.n_layers == len(self.corruption_levels) self.cost_fn_names = ['sqr_err', 'neg_log'] self.x = T.matrix('x') #store the inputs self.y = T.ivector('y') #store the labels for the corresponding inputs self.fine_cost = T.dscalar('fine_cost') #fine tuning cost self.error = T.dscalar('test_error') #test error value #print network info print "Network Info:" print "Layers: %i" % self.n_layers print "Layer sizes: ", print self.h_sizes print "" print "Building the model..." #intializing the network. #crating SparseAutoencoders and storing them in sa_layers #calculating hidden activations (symbolic) and storing them in sa_activations_train/test #there are two types of activations as the calculations are different for train and test with dropout for i in xrange(self.n_layers): if i == 0: curr_input_size = self.i_size else: curr_input_size = self.h_sizes[i - 1] #if i==0 input is the raw input if i == 0: curr_input_train = self.x curr_input_test = self.x #otherwise input is the previous layer's hidden activation else: a2_train = self.sa_layers[-1].get_hidden_act(training=True) a2_test = self.sa_layers[-1].get_hidden_act(training=False) self.sa_activations_train.append(a2_train) self.sa_activations_test.append(a2_test) curr_input_train = self.sa_activations_train[-1] curr_input_test = self.sa_activations_test[-1] sa = SparseAutoencoder(n_inputs=curr_input_size, n_hidden=self.h_sizes[i], x_train=curr_input_train, x_test=curr_input_test, dropout=dropout, dropout_rate=self.drop_rates[i]) self.sa_layers.append(sa) self.thetas.extend(self.sa_layers[-1].get_params()) self.thetas_as_blocks.append(self.sa_layers[-1].get_params()) #-1 index gives the last element a2_train = self.sa_layers[-1].get_hidden_act(training=True) a2_test = self.sa_layers[-1].get_hidden_act(training=False) self.sa_activations_train.append(a2_train) self.sa_activations_test.append(a2_test) self.softmax = SoftmaxClassifier(n_inputs=self.h_sizes[-1], n_outputs=self.o_size, x_train=self.sa_activations_train[-1], x_test=self.sa_activations_test[-1], y=self.y, dropout=self.dropout, dropout_rate=self.drop_rates[-1]) self.lam_fine_tune = T.scalar('lam') self.fine_cost = self.softmax.get_cost(self.lam_fine_tune, cost_fn=self.cost_fn_names[1]) self.thetas.extend(self.softmax.theta) #measure test performance self.error = self.softmax.get_error(self.y)
cost = T.nnet.binary_crossentropy(output, labels).mean() compute_prediction = theano.function([x], prediction) compute_cost = theano.function([x, labels], cost) # Compute the gradient of our error function grad_W = T.grad(cost, W) grad_b = T.grad(cost, b) # Set up the updates we want to do alpha = 2 updates = [(W, W - alpha * grad_W), (b, b - alpha * grad_b)] # Set up the updates we want to do alpha = T.dscalar("alpha") updates = [(W, W - alpha * grad_W), (b, b - alpha * grad_b)] # Make our function. Have it return the cost! train = theano.function([x, labels, alpha], cost, updates=updates) alpha = 10.0 costs = [] a = np.arange(0, 10, 0.1) b = np.arange(5, 20, 0.1) x = np.array([[a[i], b[i]] for i in range(100)]) y = np.array([[a[i] // 6] for i in range(100)]) while True:
''' Created on Aug 3, 2018 @author: xiongan2 ''' import theano.tensor as T from theano import function a = T.dscalar('a') b = T.dscalar('b') c = T.dscalar('c') d = T.dscalar('d') e = T.dscalar('e') f = ((a - b + c) * d) / e g = function([a, b, c, d, e], f) print("Expected: (((1 - 2 + 3) * 4)/5.0 =", ((1 - 2 + 3) * 4) / 5.0) print("Via Theano: ((1 - 2 + 3) * 4)/5.0 = ", g(1, 2, 3, 4, 5))
#coding: utf-8 import theano import theano.tensor as T x = T.dscalar('x') # 微分される数式のシンボルを定義 y = x**2 # yをxに関して微分 # y'=2xになる gy = T.grad(cost=y, wrt=x) # 微分係数を求める関数を定義 f = theano.function(inputs=[x], outputs=gy) print theano.pp(f.maker.fgraph.outputs[0]) # 具体的なxを与えて微分係数を求める print f(2) print f(3) print f(4)
class Pxf(tt.Op): __props__ = () itypes = [tt.dscalar, tt.dscalar, tt.dscalar, tt.dscalar, tt.dscalar, tt.dscalar, tt.dscalar, tt.dscalar, tt.dscalar] otypes = [tt.dscalar] def perform(self, node, inputs, outputs): bs, c, g1, kxmax, p50, T, I, D, s = inputs px = pxf(bs, c, g1, kxmax, p50, T, I, D, s) outputs[0][0] = np.array(px) ''' Simulation ''' # Soil moisture simulation Estt, Rstt = tt.dvectors('Estt', 'Rstt') sstt = tt.dscalar('sstt') smd, updates = theano.scan(fn = lambda E, R, s : tt.minimum(s - E + R, 1), sequences = [Estt, Rstt], outputs_info = [sstt]) sf = theano.function(inputs=[Estt, Rstt, sstt], outputs = smd, updates = updates) ss = sf(vnod * 0.01, Rfod / 1000 / n / 3 * intercept, 0.8) # Sap flow Tvntt, Ivntt, Dvntt, svntt = tt.dvectors('Tvntt', 'Ivntt', 'Dvntt', 'svntt') def step(T, I, D, s, alpha, bs, c, g1, kxmax, p50, Z): ps = pe * s ** (-beta) # Soil water potential px = Pxf()(bs, c, g1, kxmax, p50, T, I, D, s) # Xylem water potential slope = 16 + tt.exp(p50) * 1092 # Slope - xylem vulnerability PLC = (1/(1+tt.exp(slope/25*(px-p50))) - 1/(1+tt.exp(slope/25*(-p50))))/(1 - 1/(1+tt.exp(slope/25*(-p50)))) # PLC