def __init__(self, n_in): self.X = theanoTensor.matrix('X', dtype=theano.config.floatX) self.y = theanoTensor.vector('y', dtype=theano.config.floatX) self.W = theano.shared(name='W', value=np.ones((n_in, ), dtype=theano.config.floatX), borrow=True) self.b = theano.shared(name='b', value=np.cast[theano.config.floatX](0.0), borrow=True) y_pred = theanoTensor.dot(self.X, self.W) + self.b self.MSe = theanoTensor.mean(theanoTensor.pow(y_pred - self.y, 2)) self.MSy = theanoTensor.mean(theanoTensor.pow(self.y, 2)) self.R2 = 1 - (self.MSe / self.MSy) paramList = [self.W, self.b] grad_wrtParams = theanoTensor.grad(self.getMSE(), wrt=paramList) learning_rate = 1e-3 updates = [(p[0], p[0] - learning_rate * p[1]) for p in zip(paramList, grad_wrtParams)] self.train_model = theano.function( inputs=[self.X, self.y], outputs=[self.getMSE()], updates=updates ) self.test_model = theano.function( inputs=[self.X, self.y], outputs=[self.getR2()], )
def __init__(self,inputData,image_shape): self.input=inputData num_out=image_shape[1] epsilon=0.01 self.image_shape=image_shape gamma_values = numpy.ones((num_out,), dtype=theano.config.floatX) self.gamma_vals = theano.shared(value=gamma_values, borrow=True) beta_values = numpy.zeros((num_out,), dtype=theano.config.floatX) self.beta_vals = theano.shared(value=beta_values, borrow=True) batch_mean=T.mean(self.input,keepdims=True,axis=(0,2,3)) batch_var=T.var(self.input,keepdims=True,axis=(0,2,3))+epsilon self.batch_mean=self.adjustVals(batch_mean) batch_var=self.adjustVals(batch_var) self.batch_var=T.pow(batch_var,0.5) batch_normalize=(inputData-self.batch_mean)/(T.pow(self.batch_var,0.5)) self.beta = self.beta_vals.dimshuffle('x', 0, 'x', 'x') self.gamma = self.gamma_vals.dimshuffle('x', 0, 'x', 'x') self.output=batch_normalize*self.gamma+self.beta #self.output=inputData-self.batch_mean self.params=[self.gamma_vals,self.beta_vals]
def __init__(self, config, loss, params): self._lr = get_shared_floatX(config.learning_rate, 'lr') self._t = get_shared_floatX(1, 't') self._all_m_tm1 = [] self._all_v_tm1 = [] self._updates = [(self._t, self._t + 1)] if config.lr_decay: lr_coef = tt.pow(config.lr_decay, (self._t - 1) // config.lr_decay_freq) self._updates.append((self._lr, lr_coef * config.learning_rate)) grads = theano.grad(loss, params) self._global_grad_norm = tt.sqrt(tt.sum(tt.stack([tt.sum(g**2.) for g in grads]))) if config.max_grad_norm: global_clip_factor = ifelse(tt.lt(self._global_grad_norm, config.max_grad_norm), cast_floatX_np(1.), cast_floatX(config.max_grad_norm/self._global_grad_norm)) grads = [global_clip_factor * g for g in grads] lr_t = self._lr * \ clip_sqrt(1 - tt.pow(config.adam_beta2, self._t)) / (1 - tt.pow(config.adam_beta1, self._t)) for p, g in zip(params, grads): m_tm1 = get_shared_floatX(np.zeros_like(p.get_value()), 'adam_m_' + p.name) v_tm1 = get_shared_floatX(np.zeros_like(p.get_value()), 'adam_v_' + p.name) self._all_m_tm1.append(m_tm1) self._all_v_tm1.append(v_tm1) m_t = config.adam_beta1 * m_tm1 + (1-config.adam_beta1) * g v_t = config.adam_beta2 * v_tm1 + (1-config.adam_beta2) * tt.sqr(g) delta_t = -lr_t * m_t / (clip_sqrt(v_t) + config.adam_eps) p_t = p + delta_t self._updates += [(m_tm1, m_t), (v_tm1, v_t), (p, p_t)]
def _model_setup(self): with self._model: # COSMOLOGY omega_m = pm.Uniform("OmegaM", lower=0, upper=1.) # dark energy EOS w = pm.Normal("w", mu=-1, sd=1) # My custom distance mod. function to enable # ADVI and HMC smapling. dm = distmod_w_flat(omega_m, self._h0, w, self._zcmb) # PHILIPS PARAMETERS # M0 is the location parameter for the distribution # sys_scat is the scale parameter for the M0 distribution # rather than "unexpalined variance" M0 = pm.Normal("M0", mu=-19.3, sd=2.) sys_scat = pm.HalfCauchy('sys_scat', beta=2.5) # Gelman recommendation for variance parameter M_true = pm.Normal('M_true', M0, sys_scat, shape=self._n_SN) # following Rubin's Unity model... best idea? not sure taninv_alpha = pm.Uniform("taninv_alpha", lower=-.2, upper=.3) taninv_beta = pm.Uniform("taninv_beta", lower=-1.4, upper=1.4) # Transform variables alpha = pm.Deterministic('alpha', T.tan(taninv_alpha)) beta = pm.Deterministic('beta', T.tan(taninv_beta)) # Again using Rubin's Unity model. # After discussion with Rubin, the idea is that # these parameters are ideally sampled from a Gaussian, # but we know they are not entirely correct. So instead, # the Cauchy is less informative around the mean, while # still having informative tails. xm = pm.Cauchy('xm', alpha=0, beta=1) cm = pm.Cauchy('cm', alpha=0, beta=1) Rx_log = pm.Uniform('Rx_log', lower=-0.5, upper=0.5) Rc_log = pm.Uniform('Rc_log', lower=-1.5, upper=1.5) # Transformed variables Rx = pm.Deterministic("Rx", T.pow(10., Rx_log)) Rc = pm.Deterministic("Rc", T.pow(10., Rc_log)) x_true = pm.Normal('x_true', mu=xm, sd=Rx, shape=self._n_SN) c_true = pm.Normal('c_true', mu=cm, sd=Rc, shape=self._n_SN) # Do the correction mb = pm.Deterministic("mb", M_true + dm - alpha * x_true + beta * c_true) # Likelihood and measurement error obsc = pm.Normal("obsc", mu=c_true, sd=self._dcolor, observed=self._color) obsx = pm.Normal("obsx", mu=x_true, sd=self._dx1, observed=self._x1) obsm = pm.Normal("obsm", mu=mb, sd=self._dmb_obs, observed=self._mb_obs)
def get_testing_function(test_data, test_mask, pct_blackout=0.5): raise Error("fix me!") i, batch_size = T.iscalars('i', 'batch_size') self.test_noise = T.shared_randomstreams.RandomStreams(1234).binomial( (self.inputs.shape), n=1, p=1-pct_blackout, dtype=theano.config.floatX) self.test_noisy = self.test_noise * self.inputs self.test_active_hidden = T.nnet.sigmoid(T.dot(self.test_noisy, self.W) + self.b_in) self.test_output = T.nnet.sigmoid(T.dot(self.test_active_hidden, self.W.T) + self.b_out) # root mean squared error of unknowns only # taking the original input vector's mask of which beers had no input information (no rating) # mask out any output predicted ratings where there was no rating of the original beer # so we aren't affecting the error factor in dimensions where we don't have any meaningful information in the original input data # flattenedOutputVector = dot product ( (mask vector of which items we sent through the network to test, so we only test accuracy of non-inputted answers) with dot product ( inputMask with full output vector ) ) self.only_originally_unknown = T.dot(1-self.test_noise, T.dot(self.inputs_mask, self.test_output)) self.test_error = T.pow(T.mean(T.pow(T.dot(self.inputs_mask, self.test_output) - self.inputs, 2)), 0.5) self.testing_function = theano.function([i, batch_size], self.test_error, givens={self.inputs: test_data[i:i+batch_size], self.inputs_mask: test_mask[i:i+batch_size]}) return self.testing_function
def test_0(): N = 16*1000*10*1 if 1: aval = abs(numpy.random.randn(N).astype('float32'))+.1 bval = numpy.random.randn(N).astype('float32') a = T.fvector() b = T.fvector() else: aval = abs(numpy.random.randn(N))+.1 bval = numpy.random.randn(N) a = T.dvector() b = T.dvector() f = theano.function([a,b], T.pow(a,b), mode='LAZY') theano_opencl.elemwise.swap_impls=False g = theano.function([a,b], T.pow(a,b), mode='LAZY') print 'ocl time', timeit.Timer(lambda: f(aval, bval)).repeat(3,3) print 'gcc time', timeit.Timer(lambda: g(aval, bval)).repeat(3,3) print 'numpy time', timeit.Timer(lambda: aval**bval).repeat(3,3) assert ((f(aval, bval) - aval**bval)**2).sum() < 1.1 assert ((g(aval, bval) - aval**bval)**2).sum() < 1.1
def _step(self,xg_t, xo_t, xc_t, mask_tm1,h_tm1, c_tm1, u_g, u_o, u_c): h_mask_tm1 = mask_tm1 * h_tm1 c_mask_tm1 = mask_tm1 * c_tm1 act = T.tensordot( xg_t + h_mask_tm1, u_g , [[1],[2]]) gate = T.nnet.softmax(act.reshape((-1, act.shape[-1]))).reshape(act.shape) c_tilda = self.activation(xc_t + T.dot(h_mask_tm1, u_c)) sigma_se = self.k_parameters[0] sigma_per = self.k_parameters[1] sigma_b_lin = self.k_parameters[2] sigma_v_lin = self.k_parameters[3] sigma_rq = self.k_parameters[4] l_se = self.k_parameters[5] l_per = self.k_parameters[6] l_lin = self.k_parameters[7] l_rq = self.k_parameters[8] alpha_rq = self.k_parameters[9] p_per = self.k_parameters[10] k_se = T.pow(sigma_se,2) * T.exp( -T.pow(c_mask_tm1 - c_tilda,2) / (2* T.pow(l_se,2) + self.EPS)) k_per = T.pow(sigma_per,2) * T.exp( -2*T.pow(T.sin( math.pi*(c_mask_tm1 - c_tilda)/ (p_per + self.EPS) ),2) / ( T.pow(l_per,2) + self.EPS )) k_lin = T.pow(sigma_b_lin,2) + T.pow(sigma_v_lin,2) * (c_mask_tm1 - l_lin) * (c_tilda - l_lin ) k_rq = T.pow(sigma_rq,2) * T.pow( 1 + T.pow( (c_mask_tm1 - c_tilda),2) / ( 2 * alpha_rq * T.pow(l_rq,2) + self.EPS), -alpha_rq) ops = [c_mask_tm1,c_tilda,k_se, k_per, k_lin,k_rq] yshuff = T.as_tensor_variable( ops, name='yshuff').dimshuffle(1,2,0) c_t = (gate.reshape((-1,gate.shape[-1])) * yshuff.reshape((-1,yshuff.shape[-1]))).sum(axis = 1).reshape(gate.shape[:2]) o_t = self.inner_activation(xo_t + T.dot(h_mask_tm1, u_o)) h_t = o_t * self.activation(c_t) return h_t, c_t
def finetune_cost_updates(self, center, mu, learning_rate): """ This function computes the cost and the updates .""" # note : we sum over the size of a datapoint; if we are using # minibatches, L will be a vector, withd one entry per # example in minibatch network_output = self.get_output() temp = T.pow(center - network_output, 2) L = T.sum(temp, axis=1) # Add the network reconstruction error z = self.get_network_reconst() reconst_err = T.sum(T.pow(self.x - z, 2), axis = 1) L = self.beta*L + self.lbd*reconst_err cost1 = T.mean(L) cost2 = self.lbd*T.mean(reconst_err) cost3 = cost1 - cost2 # compute the gradients of the cost of the `dA` with respect # to its parameters gparams = T.grad(cost1, self.params) # generate the list of updates updates = [] grad_values = [] param_norm = [] for param, delta, gparam in zip(self.params, self.delta, gparams): updates.append( (delta, mu*delta - learning_rate * gparam) ) updates.append( (param, param + mu*mu*delta - (1+mu)*learning_rate*gparam )) grad_values.append(gparam.norm(L=2)) param_norm.append(param.norm(L=2)) grad_ = T.stack(*grad_values) param_ = T.stack(*param_norm) return ((cost1, cost2, cost3, grad_, param_), updates)
def lp_norm(self, n, k, r, c, z): ''' Lp = ( 1/n * sum(|x_i|^p, 1..n))^(1/p) where p = 1 + ln(1+e^P) :param n: :param k: :param r: :param c: :param z: :return: ''' ds0, ds1 = self.pool_size st0, st1 = self.stride pad_h = self.pad[0] pad_w = self.pad[1] row_st = r * st0 row_end = T.minimum(row_st + ds0, self.img_rows) row_st = T.maximum(row_st, self.pad[0]) row_end = T.minimum(row_end, self.x_m2d + pad_h) col_st = c * st1 col_end = T.minimum(col_st + ds1, self.img_cols) col_st = T.maximum(col_st, self.pad[1]) col_end = T.minimum(col_end, self.x_m1d + pad_w) Lp = T.pow( T.mean(T.pow( T.abs_(T.flatten(self.y[n, k, row_st:row_end, col_st:col_end], 1)), 1 + T.log(1 + T.exp(self.P)) )), 1 / (1 + T.log(1 + T.exp(self.P))) ) return T.set_subtensor(z[n, k, r, c], Lp)
def __call__(self, loss): loss += self.l1 * T.sum(T.mean(abs(self.layer.get_output(True)), axis=0)) loss += self.l2 * T.sum(T.mean(self.layer.get_output(True) ** 2, axis=0)) loss += self.l_bin * T.sum( T.mean(T.pow(self.layer.get_output(True), self.k) * T.pow(1 - self.layer.get_output(True), self.k), axis=0) ) return loss
def get_reg_ind(self): drop_ax, drop_ay = T.pow(T.exp(self.params[-2]), 2), T.pow(T.exp(self.params[-1]), 2) constant = np.cast[theano.config.floatX](.5 * np.log(self.noise_lvl) + c1 * self.noise_lvl + c2 * (self.noise_lvl**2) + c3 * (self.noise_lvl**3)) reg_indx = .5 * T.log(drop_ax) + c1 * drop_ax + c2 * T.pow(drop_ax, 2) + c3 * T.pow(drop_ax, 3) - constant reg_indy = .5 * T.log(drop_ay) + c1 * drop_ay + c2 * T.pow(drop_ay, 2) + c3 * T.pow(drop_ay, 3) - constant reg_ind = T.cast(T.prod(self.params[3].shape), theano.config.floatX) * reg_indx + T.cast(T.prod(self.params[4].shape), theano.config.floatX) * reg_indy return reg_ind
def get_box_mask_se(a,b): ''' return (batch_size, grid_num, box_num, 1) tensor as mask ''' se = T.pow(T.pow(a-b, 2).sum(axis=-1), .5) sem = se.min(axis=-1, keepdims=True) # find the box with lowest square error se_mask = T.eq(se, sem).reshape((a.shape[0], a.shape[1], a.shape[2], 1)) return se_mask
def gamma_params(mode=10., sd=10.): ''' Converst mode and sd to shape and rate of a gamma distribution. ''' var = Tns.pow(sd, 2) rate = (mode + Tns.pow(Tns.pow(mode, 2) + 4 * var, 0.5)) / (2 * var) shape = 1 + mode * rate return shape, rate
def objective(x): """ objective function @param x: input vector @return: value of objective function """ z = x - objective.offset return T.sum(T.pow(z, 4) - 16 * T.pow(z, 2) + 5 * z, axis=1) / 2
def evolve(self, x, n, k, gamma): """ Compute time-derivative at current state Model: dx/dt = x^n / (x^n + K^n) - gamma*x This leads to single-species bistability. """ dxdt = T.pow(x, n)/(T.pow(x, n)+T.pow(k,n)) - gamma*x return dxdt
def get_input_vectors(shape, phases, scaling, offset): x = T.repeat(offset[0] + T.arange(shape[0]) / scaling, shape[1] * phases).reshape( (shape[0], shape[1], phases)) * T.pow(2, T.arange(phases)) y = T.repeat(T.tile(offset[1] + T.arange(shape[1]) / scaling, shape[0]).reshape( (shape[0], shape[1], 1)), phases, axis=2) * T.pow(2, T.arange(phases)) z = T.tile(offset[2] + 10 * T.arange(phases), shape[0] * shape[1]).reshape((shape[0], shape[1], phases, 1)) x = x.reshape((shape[0], shape[1], phases, 1)) y = y.reshape((shape[0], shape[1], phases, 1)) return T.concatenate([x, y, z], axis=3).reshape((shape[0] * shape[1] * phases, 3)).astype('float32')
def get_reg_ind(self): nsl = self.noise_lvl**2 constant = .5 * np.log(nsl) + c1 * nsl + c2 * (nsl**2) + c3 * (nsl**3) stdx, stdy = self._get_stds() drop_ax, drop_ay = T.pow(stdx, 2), T.pow(stdy, 2) reg_indx = .5 * T.log(drop_ax) + c1 * drop_ax + c2 * T.pow(drop_ax, 2) + c3 * T.pow(drop_ax, 3) - constant reg_indy = .5 * T.log(drop_ay) + c1 * drop_ay + c2 * T.pow(drop_ay, 2) + c3 * T.pow(drop_ay, 3) - constant reg_ind = T.sum(reg_indx) + T.sum(reg_indy) return reg_ind
def _loopoverallballallbatch(self, ballid): ox=self.middle[:,(ballid)*3].reshape((self.batchsize,1)) x=T.tile(ox,(1,self.height*self.width)).reshape((self.batchsize,self.height,self.width)) oy=self.middle[:,(ballid)*3+1].reshape((self.batchsize,1)) y=T.tile(oy,(1,self.height*self.width)).reshape((self.batchsize,self.height,self.width)) w=T.tile(T.tile(T.arange(0,self.width),(self.height,)),(self.batchsize,)).reshape((self.batchsize,self.height,self.width)) h=T.tile(T.tile(T.arange(0,self.height).reshape((self.height,1)),(1,self.width)),(self.batchsize,1)).reshape((self.batchsize,self.height,self.width)) multiply=T.tile(self.middle[:,(ballid)*3+2].reshape((self.batchsize,1)),(1,self.height*self.width)).reshape((self.batchsize,self.height,self.width)) results=multiply*T.exp((T.pow(x-w,2)+T.pow(y-h,2))*(-1.0/self.sigma)) return results
def init_fun_(self, dim_state, batch_size, gamma, learning_rate, momentum, reward_scaling, reward_scaling_update): """Define and compile function to train and evaluate network :param net: Lasagne output layer :param dim_state: dimensions of a single state tensor :param batch_size: :param gamma: future reward discount factor :param learning_rate: :param momentum: :param reward_scaling: :param reward_scaling_update: :return: """ if len(dim_state) != 3: raise ValueError("We only support 3 dimensional states.") # inputs old_states, new_states = T.tensor4s('old_states', 'new_states') # (BATCH_SIZE, MEMORY_LENGTH, DIM_STATE[0], DIM_STATE[1]) actions = T.ivector('actions') # (BATCH_SIZE, 1) rewards = T.vector('rewards') # (BATCH_SIZE, 1) rs = shared(value=reward_scaling*1.0, name='reward_scaling') # intermediates predict_q = lasagne.layers.get_output(layer_or_layers=self.qnn, inputs=old_states) predict_next_q = lasagne.layers.get_output(layer_or_layers=self.qnn_target, inputs=new_states) target_q = rewards/rs + gamma*T.max(predict_next_q, axis=1) # penalty singularity = 1+1e-3 penalty = T.mean( 1/T.pow(predict_q[T.arange(batch_size), actions]-singularity, 2) + 1/T.pow(predict_q[T.arange(batch_size), actions]+singularity, 2) - 2) # outputs loss = T.mean((predict_q[T.arange(batch_size), actions] - target_q)**2) + (1e-5)*penalty # weight update formulas (mini-batch SGD with momentum) params = lasagne.layers.get_all_params(self.qnn, trainable=True) updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=learning_rate, momentum=momentum) updates_rs = lasagne.updates.nesterov_momentum(loss, [rs], learning_rate=learning_rate, momentum=momentum) # functions fun_train_qnn = theano.function([old_states, actions, rewards, new_states], loss, updates=updates, allow_input_downcast=True) fun_adapt_rs = theano.function([old_states, actions, rewards, new_states], loss, updates=updates_rs, allow_input_downcast=True) def fun_clone_target(): lasagne.layers.helper.set_all_param_values( self.qnn_target, lasagne.layers.helper.get_all_param_values(self.qnn) ) fun_q_lookup = theano.function([old_states], predict_q, allow_input_downcast=True) fun_rs_lookup = rs.get_value return fun_train_qnn, fun_adapt_rs, fun_clone_target, fun_q_lookup, fun_rs_lookup
def integrand_w_flat(z, Om, w): """ :param z: redshift :param Om: matter content :param w: DE EOS :return: theano array of 1/H(z) """ zp = 1 + z Ode = 1 - Om - Or # Adjust cosmological constant return T.power((T.pow(zp, 3) * (Or * zp + Om) + Ode * T.pow(zp, 3.0 * (1 + w))), -0.5)
def _loopoverallball(self, ballid,batchid): ox=self.middle[batchid][ballid*2].reshape((1,1)) print "ox:",ox.ndim x=T.tile(ox,(self.height,self.width)) oy=self.middle[batchid][ballid*2+1].reshape((1,1)) y=T.tile(oy,(self.height,self.width)) w=T.tile(T.arange(0,self.width),(self.height,)).reshape((self.height,self.width)) h=T.tile(T.arange(0,self.height).reshape((self.height,1)),(1,self.width)) cof=(T.pow(x-w,2)+T.pow(y-h,2))*(-1.0/self.sigma) print T.exp(cof).ndim return T.exp(cof)
def evolve_system(self, x, n, k, gamma): """ Compute time-derivative at current state Model: dx/dt = k^n / (x^n + K^n) - gamma*x This leads to 3+ species sustained oscillations. Note that x is matrix. We have dependency only on preceding variable, which can be efficiently implemented by rolling the matrix by `shift=-1` along corresponding axis. """ temp = T.pow(k, n)/(T.pow(x, n)+T.pow(k,n)) dxdt = T.roll(temp, shift = -1, axis = 1) - gamma*x return dxdt
def __init__(self, w_list, x_list, p, q, r, k, params, updates, eps=1.0e-6): """Transcription of equation 2.1 from paper (page 1434). """ if len(w_list) != len(x_list): raise ValueError('w_list must have same len as x_list') output = (sum(w * tensor.pow(x, p) for (w, x) in zip(w_list, x_list)))\ / (theano._asarray(eps, dtype=k.type.dtype) + k + tensor.pow(sum(tensor.pow(x, q) for x in x_list), r)) assert output.type.ndim == 2 self.__dict__.update(locals()) del self.__dict__['self'] _logger.debug('output dtype %s' % output.dtype)
def get_garch_model(): r = np.array([28, 8, -3, 7, -1, 1, 18, 12], dtype=np.float64) sigma1 = np.array([15, 10, 16, 11, 9, 11, 10, 18], dtype=np.float64) alpha0 = np.array([10, 10, 16, 8, 9, 11, 12, 18], dtype=np.float64) shape = r.shape with Model() as garch: alpha1 = Uniform('alpha1', 0., 1., shape=shape) beta1 = Uniform('beta1', 0., 1 - alpha1, shape=shape) mu = Normal('mu', mu=0., sd=100., shape=shape) theta = tt.sqrt(alpha0 + alpha1 * tt.pow(r - mu, 2) + beta1 * tt.pow(sigma1, 2)) Normal('obs', mu, sd=theta, observed=r) return garch
def __init__(self, target, initial_phi, profile_s=None, A0=1.0): self.target = target self.n_pixels = int(target.shape[0] / 2) # target should be 512x512, but SLM pattern calculated should be 256x256. self.intensity_calc = None self.cost = None # placeholder for cost function. if profile_s is None: profile_s = np.ones((self.n_pixels, self.n_pixels)) assert profile_s.shape == (self.n_pixels, self.n_pixels), 'profile_s is wrong shape, should be ({n},{n})'.format(n=self.n_pixels) self.profile_s_r = profile_s.real.astype('float64') self.profile_s_i = profile_s.imag.astype('float64') assert initial_phi.shape == (self.n_pixels**2,), "initial_phi must be a vector of phases of size N^2 (not (N,N)). Shape is " + str(initial_phi.shape) self.A0 = A0 # Set zeros matrix: self.zero_frame = np.zeros((2*self.n_pixels, 2*self.n_pixels), dtype='float64') # Phi and its momentum for use in gradient descent with momentum: self.phi = theano.shared(value=initial_phi.astype('float64'), name='phi') self.phi_rate = theano.shared(value=np.zeros_like(initial_phi).astype('float64'), name='phi_rate') self.S_r = theano.shared(value=self.profile_s_r, name='s_r') self.S_i = theano.shared(value=self.profile_s_i, name='s_i') self.zero_matrix = theano.shared(value=self.zero_frame, name='zero_matrix') # E_in: (n_pixels**2) phi_reshaped = self.phi.reshape((self.n_pixels, self.n_pixels)) self.E_in_r = self.A0 * (self.S_r*T.cos(phi_reshaped) - self.S_i*T.sin(phi_reshaped)) self.E_in_i = self.A0 * (self.S_i*T.cos(phi_reshaped) + self.S_r*T.sin(phi_reshaped)) # E_in padded: (4n_pixels**2) idx_0, idx_1 = get_centre_range(self.n_pixels) self.E_in_r_pad = T.set_subtensor(self.zero_matrix[idx_0:idx_1,idx_0:idx_1], self.E_in_r) self.E_in_i_pad = T.set_subtensor(self.zero_matrix[idx_0:idx_1,idx_0:idx_1], self.E_in_i) # E_out: self.E_out_r, self.E_out_i = fft(self.E_in_r_pad, self.E_in_i_pad) # finally, the output intensity: self.E_out_2 = T.add(T.pow(self.E_out_r, 2), T.pow(self.E_out_i, 2))
def get_garch_model(): r = np.array([28, 8, -3, 7, -1, 1, 18, 12]) sigma1 = np.array([15, 10, 16, 11, 9, 11, 10, 18]) alpha0 = np.array([10, 10, 16, 8, 9, 11, 12, 18]) shape = r.shape with Model() as garch: alpha1 = Normal('alpha1', mu=0., sd=1., shape=shape) BoundedNormal = Bound(Normal, upper=(1 - alpha1)) beta1 = BoundedNormal('beta1', mu=0., sd=1e6, shape=shape) mu = Normal('mu', mu=0., sd=1e6, shape=shape) theta = tt.sqrt(alpha0 + alpha1 * tt.pow(r - mu, 2) + beta1 * tt.pow(sigma1, 2)) Normal('obs', mu, sd=theta, observed=r) return garch
def spectral_radius_bound(X, log2_exponent): """ Returns upper bound on the largest eigenvalue of square symmetrix matrix X. log2_exponent must be a positive-valued integer. The larger it is, the slower and tighter the bound. Values up to 5 should usually suffice. The algorithm works by multiplying X by itself this many times. From V.Pan, 1990. "Estimating the Extremal Eigenvalues of a Symmetric Matrix", Computers Math Applic. Vol 20 n. 2 pp 17-22. Rq: an efficient algorithm, not used here, is defined in this paper. """ if X.type.ndim != 2: raise TypeError('spectral_radius_bound requires a matrix argument', X) if not isinstance(log2_exponent, int): raise TypeError('spectral_radius_bound requires an integer exponent', log2_exponent) if log2_exponent <= 0: raise ValueError('spectral_radius_bound requires a strictly positive ' 'exponent', log2_exponent) XX = X for i in xrange(log2_exponent): XX = tensor.dot(XX, XX) return tensor.pow( trace(XX), 2 ** (-log2_exponent))
def get_updates(self, learning_rate, corruption_level=None, L1_rate=0.000, L2_rate=0.000): if corruption_level is not None: x=self.get_corruption_input(self.input, corruption_level); y=self.decode_layer.get_output(self.encode_layer.get_output(x)); else: y=self.decode_layer.out_feature_maps; cost=T.sum(T.pow(T.sub(self.decode_layer.out_feature_maps, self.feature_maps),2), axis=1); #cost=self.get_cost(self.feature_maps, y); cost+=0.001*((self.encode_layer.filters**2).sum()+(self.decode_layer.filters**2).sum()); cost=T.mean(cost); params=self.encode_layer.params+self.decode_layer.params; gparams=T.grad(cost, params); updates=[(param_i, param_i-learning_rate*grad_i) for param_i, grad_i in zip(params, gparams)]; return cost, updates;
def cost(self): """ :param y: shape (time*batch,) -> label :return: error scalar, known_grads dict """ y_f = T.cast(T.reshape(self.y_data_flat, (self.y_data_flat.shape[0] * self.y_data_flat.shape[1]), ndim = 1), 'int32') known_grads = None if self.loss == 'sprint': if not isinstance(self.sprint_opts, dict): import json self.sprint_opts = json.loads(self.sprint_opts) assert isinstance(self.sprint_opts, dict), "you need to specify sprint_opts in the output layer" if self.exp_normalize: log_probs = T.log(self.p_y_given_x) else: log_probs = self.z sprint_error_op = SprintErrorSigOp(self.attrs.get("target", "classes"), self.sprint_opts) err, grad = sprint_error_op(log_probs, T.sum(self.index, axis=0)) err = err.sum() if self.loss_like_ce: y_ref = T.clip(self.p_y_given_x - grad, numpy.float32(0), numpy.float32(1)) err = -T.sum(T.log(T.pow(self.p_y_given_x, y_ref)) * T.cast(self.index, "float32").dimshuffle(0, 1, 'x')) if self.ce_smoothing: err *= numpy.float32(1.0 - self.ce_smoothing) grad *= numpy.float32(1.0 - self.ce_smoothing) if not self.prior_scale: # we kept the softmax bias as it was nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m[self.i], y_idx=self.y_data_flat[self.i]) else: # assume that we have subtracted the bias by the log priors beforehand assert self.log_prior is not None # In this case, for the CE calculation, we need to add the log priors again. y_m_prior = T.reshape(self.z + numpy.float32(self.prior_scale) * self.log_prior, (self.z.shape[0] * self.z.shape[1], self.z.shape[2]), ndim=2) nll, pcx = T.nnet.crossentropy_softmax_1hot(x=y_m_prior[self.i], y_idx=self.y_data_flat[self.i]) ce = numpy.float32(self.ce_smoothing) * T.sum(nll) err += ce grad += T.grad(ce, self.z) known_grads = {self.z: grad} return err, known_grads elif self.loss == 'ctc': from theano.tensor.extra_ops import cpu_contiguous err, grad, priors = CTCOp()(self.p_y_given_x, cpu_contiguous(self.y.dimshuffle(1, 0)), self.index_for_ctc()) known_grads = {self.z: grad} return err.sum(), known_grads, priors.sum(axis=0) elif self.loss == 'ce_ctc': y_m = T.reshape(self.z, (self.z.shape[0] * self.z.shape[1], self.z.shape[2]), ndim=2) p_y_given_x = T.nnet.softmax(y_m) #pcx = p_y_given_x[(self.i > 0).nonzero(), y_f[(self.i > 0).nonzero()]] pcx = p_y_given_x[self.i, self.y_data_flat[self.i]] ce = -T.sum(T.log(pcx)) return ce, known_grads elif self.loss == 'ctc2': from NetworkCtcLayer import ctc_cost, uniq_with_lengths, log_sum max_time = self.z.shape[0] num_batches = self.z.shape[1] time_mask = self.index.reshape((max_time, num_batches)) y_batches = self.y_data_flat.reshape((max_time, num_batches)) targets, seq_lens = uniq_with_lengths(y_batches, time_mask) log_pcx = self.z - log_sum(self.z, axis=0, keepdims=True) err = ctc_cost(log_pcx, time_mask, targets, seq_lens) return err, known_grads
def l2norm(X): """ Divide by L2 norm, row-wise """ norm = T.sqrt(T.pow(X, 2).sum(1)) X /= norm[:, None] return X
def pow(x, a): return T.pow(x, a)
def inner_fn(t, stm1, postm1, vtm1,\ r_Wq_hst_ot, r_Wq_hst_oht, r_Wq_hst_stm1, r_bq_hst,\ r_Wq_hst2_hst, r_bq_hst2,\ r_Wq_stmu_hst2, r_bq_stmu,\ r_Wq_stsig_hst2, r_bq_stsig,\ r_Wl_stmu_stm1, r_bl_stmu,\ r_Wl_stsig_stm1, r_bl_stsig,\ r_Wl_ost_st, r_bl_ost,\ r_Wl_ost2_ost, r_bl_ost2,\ r_Wl_ost3_ost2, r_bl_ost3,\ r_Wl_otmu_st, r_bl_otmu,\ r_Wl_otsig_st, r_bl_otsig,\ r_Wl_ohtmu_st, r_bl_ohtmu,\ r_Wl_ohtsig_st, r_bl_ohtsig,\ r_Wa_aht_st, r_ba_aht,\ r_Wa_atmu_aht, r_ba_atmu,\ r_Wa_atsig_aht, r_ba_atsig\ ): # Use hidden state to generate action state aht = T.batched_tensordot(r_Wa_aht_st, T.reshape(stm1, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_ba_aht #aht2 = T.batched_tensordot(r_Wa_aht2_aht, T.reshape(aht,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_ba_aht2 #aht3 = T.batched_tensordot(r_Wa_aht3_aht2, T.reshape(aht2,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_ba_aht3 at_mu = T.batched_tensordot(r_Wa_atmu_aht, T.reshape(aht, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_ba_atmu at_sig = T.nnet.softplus( T.batched_tensordot(r_Wa_atsig_aht, T.reshape(aht, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_ba_atsig) + sig_min_action # Sample Action at = at_mu + theano_rng.normal((n_perturbations, n_oa, n_proc)) * at_sig # Update Environment action_force = T.tanh(at) force = T.switch( T.lt(postm1, 0.0), -2 * postm1 - 1, -T.pow(1 + 5 * T.sqr(postm1), -0.5) - T.sqr(postm1) * T.pow(1 + 5 * T.sqr(postm1), -1.5) - T.pow(postm1, 4) / 16.0) - 0.25 * vtm1 vt = vtm1 + 0.05 * force + 0.03 * action_force post = postm1 + vt # Generate Sensory Inputs: # 1.) Observation of Last Action #oat = at # 2.) Noisy Observation of Current Position ot = post + theano_rng.normal((n_perturbations, n_o, n_proc)) * 0.01 # 3.) Nonlinear Transformed Sensory Channel oht = T.exp(-T.sqr(post - 1.0) / 2.0 / 0.3 / 0.3) # Infer hidden state from last hidden state and current observations, using variational density hst = T.nnet.relu( T.batched_tensordot(r_Wq_hst_stm1, T.reshape(stm1, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + T.batched_tensordot(r_Wq_hst_ot, T.reshape(ot, (n_perturbations, n_o, n_proc)), axes=[[2], [1]]) + T.batched_tensordot(r_Wq_hst_oht, T.reshape(oht, (n_perturbations, n_oh, n_proc)), axes=[[2], [1]]) + r_bq_hst) hst2 = T.nnet.relu( T.batched_tensordot(r_Wq_hst2_hst, T.reshape(hst, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bq_hst2) stmu = T.tanh( T.batched_tensordot(r_Wq_stmu_hst2, T.reshape(hst2, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bq_stmu) stsig = T.nnet.softplus( T.batched_tensordot(r_Wq_stsig_hst2, T.reshape(hst2, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bq_stsig) + sig_min_states # Explicitly encode position as homeostatic state variable # Rescale representation to fit within linear response of the tanh-nonlinearity stmu = T.set_subtensor(stmu[:, 0, :], 0.1 * ot[:, 0, :]).reshape( (n_perturbations, n_s, n_proc)) stsig = T.set_subtensor(stsig[:, 0, :], 0.005).reshape( (n_perturbations, n_s, n_proc)) # Sample from variational density st = stmu + theano_rng.normal((n_perturbations, n_s, n_proc)) * stsig # Calculate parameters of likelihood distributions from sampled state ost = T.nnet.relu( T.batched_tensordot(r_Wl_ost_st, T.reshape(st, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bl_ost) ost2 = T.nnet.relu( T.batched_tensordot(r_Wl_ost2_ost, T.reshape(ost, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bl_ost2) ost3 = T.nnet.relu( T.batched_tensordot(r_Wl_ost3_ost2, T.reshape(ost2, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bl_ost3) otmu = T.batched_tensordot(r_Wl_otmu_st, T.reshape(ost3, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bl_otmu otsig = T.nnet.softplus( T.batched_tensordot(r_Wl_otsig_st, T.reshape(ost3, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bl_otsig) + sig_min_obs ohtmu = T.batched_tensordot(r_Wl_ohtmu_st, T.reshape(ost3, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bl_ohtmu ohtsig = T.nnet.softplus( T.batched_tensordot(r_Wl_ohtsig_st, T.reshape(ost3, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bl_ohtsig) + sig_min_obs # Calculate negative log-likelihood of observations p_ot = GaussianNLL(ot, otmu, otsig) p_oht = GaussianNLL(oht, ohtmu, ohtsig) # Calculate prior expectation on hidden state from previous state prior_stmu = T.tanh( T.batched_tensordot(r_Wl_stmu_stm1, T.reshape(stm1, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bl_stmu) prior_stsig = T.nnet.softplus( T.batched_tensordot(r_Wl_stsig_stm1, T.reshape(stm1, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bl_stsig) + sig_min_states # Explicitly encode expectations on homeostatic state variable prior_stmu = ifelse(T.lt(t, 20), prior_stmu, T.set_subtensor(prior_stmu[:, 0, :], 0.1)) prior_stsig = ifelse(T.lt(t, 20), prior_stsig, T.set_subtensor(prior_stsig[:, 0, :], 0.005)) # Calculate KL divergence between variational density and prior density # using explicit formula for diagonal gaussians KL_st = KLGaussianGaussian(stmu, stsig, prior_stmu, prior_stsig) # Put free energy functional together FEt = KL_st + p_ot + p_oht return st, post, vt, ot, oht, FEt, KL_st, hst, hst2, stmu, stsig, force, p_ot, p_oht
def config_theano(self): # input tensor variables w_idxes = T.imatrix('w_idxes') w_idxes = T.imatrix('w_idxes') a = T.imatrix('a') sv = T.imatrix('sv') s = T.imatrix('s') v = T.imatrix('v') # cutoff for batch and time cutoff_f = T.imatrix('cutoff_f') cutoff_b = T.iscalar('cutoff_b') # regularization and learning rate lr = T.scalar('lr') reg = T.scalar('reg') # unroll generator and produce cost if self.gentype=='sclstm': self.cost, cutoff_logp = \ self.generator.unroll(a,sv,w_idxes,cutoff_f,cutoff_b) elif self.gentype=='encdec': self.cost, cutoff_logp = \ self.generator.unroll(a,s,v,w_idxes,cutoff_f,cutoff_b) elif self.gentype=='hlstm': self.cost, cutoff_logp = \ self.generator.unroll(a,sv,w_idxes,cutoff_f,cutoff_b) ###################### ML Training ##################### # gradients and updates gradients = T.grad( clip_gradient(self.cost,1),self.params ) updates = OrderedDict(( p, p-lr*g+reg*p ) \ for p, g in zip( self.params , gradients)) # theano functions self.train = theano.function( inputs= [a,sv,s,v, w_idxes, cutoff_f, cutoff_b, lr, reg], outputs=-self.cost, updates=updates, on_unused_input='ignore') self.test = theano.function( inputs= [a,sv,s,v, w_idxes, cutoff_f, cutoff_b], outputs=-self.cost, on_unused_input='ignore') ###################### DT Training ##################### # expected objective bleus = T.fvector('bleu') errs = T.fvector('err') gamma = T.iscalar('gamma') senp = T.pow(10,gamma*cutoff_logp/cutoff_f[4][:cutoff_b])/\ T.sum(T.pow(10,gamma*cutoff_logp/cutoff_f[4][:cutoff_b])) xBLEU = T.sum(senp*bleus[:cutoff_b]) xERR = T.sum(senp*errs[:cutoff_b]) self.obj = -xBLEU + 0.3*xERR obj_grad = T.grad( clip_gradient(self.obj,1),self.params ) obj_updates = OrderedDict(( p, p-lr*g+reg*p ) \ for p, g in zip( self.params , obj_grad)) # expected objective functions self.trainObj = theano.function( inputs= [a,sv,s,v, w_idxes, cutoff_f, cutoff_b, bleus, errs, gamma, lr, reg], outputs=[self.obj,xBLEU,xERR,senp], updates=obj_updates, on_unused_input='ignore', allow_input_downcast=True) self.testObj = theano.function( inputs= [a,sv,s,v, w_idxes, cutoff_f, cutoff_b, bleus,errs,gamma], outputs=[self.obj,xBLEU,xERR], on_unused_input='ignore', allow_input_downcast=True)
model { r ~ normal(mu,sigma); } """ J = 8 r = np.array([28, 8, -3, 7, -1, 1, 18, 12]) sigma1 = np.array([15, 10, 16, 11, 9, 11, 10, 18]) alpha0 = np.array([10, 10, 16, 8, 9, 11, 12, 18]) with Model() as garch: alpha1 = Normal('alpha1', 0, 1, shape=J) BoundedNormal = Bound(Normal, upper=(1 - alpha1)) beta1 = BoundedNormal('beta1', 0, sd=1e6) mu = Normal('mu', 0, sd=1e6) theta = tt.sqrt(alpha0 + alpha1 * tt.pow(r - mu, 2) + beta1 * tt.pow(sigma1, 2)) obs = Normal('obs', mu, sd=theta, observed=r) def run(n=1000): if n == "short": n = 50 with garch: tr = sample(n) if __name__ == '__main__': run()
def nuts_TwoTemps(self, iterations, tuning): # Container to store the synthetic line fluxes if self.emissionCheck: lineFlux_tt = tt.zeros(self.lineLabels.size) continuum = tt.zeros(self.obj_data['wave_resam'].size) # idx_N2_6548A = self.lineLabels == 'N2_6548A' # idx_N2_6584A = self.lineLabels == 'N2_6584A' # self.obsLineFluxErr[idx_N2_6548A], self.obsLineFluxErr[idx_N2_6584A] = 0.1* self.obsLineFluxes[idx_N2_6548A], 0.1 * self.obsLineFluxes[idx_N2_6584A] # Stellar bases tensor if self.stellarCheck: Xx_tt = theano.shared(self.Xx_stellar) basesFlux_tt = theano.shared(self.onBasesFluxNorm) nebular_continuum_tt = theano.shared( self.nebDefault['synth_neb_flux']) err_Continuum = 0.10 * ones( self.inputContinuum.size) # TODO really need to check this # err_Continuum = self.obsFluxNorm * 0.05 # err_Continuum[err_Continuum < 0.001] = err_Continuum.mean() with pymc_examples.Model() as model: if self.stellarCheck: # Stellar continuum priors Av_star = pymc_examples.Normal( 'Av_star', mu=self.stellarAv_prior[0], sd=self.stellarAv_prior[0] * 0.10) #pymc3.Lognormal('Av_star', mu=1, sd=0.75) w_i = pymc_examples.Normal('w_i', mu=self.sspPrefitCoeffs, sd=self.sspPrefitCoeffs * 0.10, shape=self.nBases) # Compute stellar continuum stellar_continuum = w_i.dot(basesFlux_tt) # Apply extinction spectrum_reddened = stellar_continuum * tt.pow( 10, -0.4 * Av_star * Xx_tt) # Add nebular component continuum = spectrum_reddened + nebular_continuum_tt #pymc3.Deterministic('continuum_Op', spectrum_reddened + nebular_continuum) # Apply mask continuum_masked = continuum * self.int_mask # Likelihood continuum components Y_continuum = pymc_examples.Normal( 'Y_continuum', mu=continuum_masked, sd=err_Continuum, observed=self.inputContinuum) if self.emissionCheck: # Gas Physical conditions priors T_low = pymc_examples.Normal('T_low', mu=self.Te_prior[0], sd=2000.0) cHbeta = pymc_examples.Lognormal( 'cHbeta', mu=0, sd=1) if self.NoReddening is False else self.obj_data[ 'cHbeta_true'] # # Declare a High temperature prior if ions are available, else use the empirical relation. # if any(self.idx_highU): # T_high = pymc3.Normal('T_high', mu=10000.0, sd=1000.0) # else: # T_high = TOIII_TSIII_relation(self.Te_prior[0]) #TODO Should we always create a prior just to eliminate the contamination? if self.emissionCheck: # Emission lines density n_e = 255.0 #pymc3.Normal('n_e', mu=self.ne_prior[0], sd=self.ne_prior[1]) #n_e = self.normContants['n_e'] * pymc3.Lognormal('n_e', mu=0, sd=1) # Helium abundance priors if self.He1rCheck: tau = pymc_examples.Lognormal('tau', mu=1, sd=0.75) # Composition priors abund_dict = {'H1r': 1.0} for j in self.rangeObsAtoms: if self.obsAtoms[j] == 'He1r': abund_dict[self.obsAtoms[j]] = self.normContants[ 'He1r'] * pymc_examples.Lognormal( self.obsAtoms[j], mu=0, sd=1 ) #pymc3.Uniform(self.obsAtoms[j], lower=0, upper=1) elif self.obsAtoms[j] == 'He2r': abund_dict[self.obsAtoms[j]] = self.normContants[ 'He2r'] * pymc_examples.Lognormal( self.obsAtoms[j], mu=0, sd=1 ) #pymc3.Uniform(self.obsAtoms[j], lower=0, upper=1) else: abund_dict[ self.obsAtoms[j]] = pymc_examples.Normal( self.obsAtoms[j], mu=5, sd=5) # Loop through the lines for i in self.rangeLines: # Line data line_label = self.lineLabels[i] line_ion = self.lineIons[i] line_flambda = self.lineFlambda[i] # Parameters to compute the emissivity line_coeffs = self.emisCoeffs[line_label] emis_func = self.ionEmisEq_tt[line_label] # Appropiate data for the ion #Te_calc = T_high if self.idx_highU[i] else T_low Te_calc = T_low # Line Emissivitiy line_emis = emis_func((Te_calc, n_e), *line_coeffs) # Atom abundance line_abund = 1.0 if self.H1_lineIdcs[ i] else abund_dict[line_ion] # Line continuum line_continuum = tt.sum( continuum * self.boolean_matrix[i]) * self.lineRes[i] # ftau correction for HeI lines line_ftau = self.ftau_func( tau, Te_calc, n_e, *self.ftau_coeffs[line_label] ) if self.He1_lineIdcs[i] else None # Line synthetic flux flux_i = self.fluxEq_tt[line_label]( line_emis, cHbeta, line_flambda, line_abund, line_ftau, continuum=line_continuum) # Store in container lineFlux_tt = tt.inc_subtensor(lineFlux_tt[i], flux_i) # Store computed fluxes lineFlux_ttarray = pymc_examples.Deterministic( 'calcFluxes_Op', lineFlux_tt) # Likelihood gas components Y_emision = pymc_examples.Normal( 'Y_emision', mu=lineFlux_ttarray, sd=self.obsLineFluxErr, observed=self.obsLineFluxes) # Get energy traces in model for RV in model.basic_RVs: print(RV.name, RV.logp(model.test_point)) # Launch model trace = pymc_examples.sample(iterations, tune=tuning, nchains=2, njobs=2) return trace, model
def harvey(self, a, b, c): harvey = 0.9 * tt.sqr(a) / b / (1.0 + tt.pow((self.f / b), c)) return harvey
def output_error(self, input_sequence, true_output, mask): outputs = T.pow(true_output - input_sequence, 2) outputs = T.sum(outputs, axis=2) / outputs.shape[2] outputs = T.mul(outputs.dimshuffle(0, 1, 'x'), mask) return T.sum(outputs) / T.sum(mask)
def SGD(self, eta_0=.01, T_eta=1, mu_max=.95, T_mu=1, dropout=1., anneal=0, accel=0): """""" #------------------------------------------------------------------- # Cast everything as float32 eta_0 = np.float32(eta_0) T_eta = np.float32(T_eta) mu_max = np.float32(mu_max) T_mu = np.float32(T_mu) anneal = np.float32(anneal) accel = np.float32(accel) #------------------------------------------------------------------- # Set up the updates (see RNN3 for solution if we get non-numeric gradients) mupdates = [] grupdates = [] pupdates = [] nupdates = [] #------------------------------------------------------------------- # Set up a variable to keep track of the iteration tau = theano.shared(np.float32(0), name='tau') pupdates.extend([(tau, tau+np.float32(1))]) #------------------------------------------------------------------- # Set the annealing/acceleration schedule eta = eta_0*T.pow(T_eta/(tau+T_eta), anneal) mu = mu_max*(np.float32(1)-T.pow(T_mu/(tau+T_mu), accel)) #------------------------------------------------------------------- # Compute the dropout and gradients grads = T.grad(self.cost, self.params+self.xparams) givens = [] if dropout < 1: for hmask in self.hmasks: givens.append((hmask, srng.binomial(hmask.shape, 1, dropout, dtype='float32'))) #------------------------------------------------------------------- # Dense parameters for theta, gtheta_i, gtheta in zip(self.params, grads[:len(self.params)], self.gparams): vtheta = theano.shared(np.zeros_like(theta.get_value()), name='v%s' % theta.name) mupdates.append((theta, theta + mu*vtheta)) grupdates.append((gtheta, gtheta + gtheta_i)) pupdates.append((theta, theta - eta*gtheta)) pupdates.append((vtheta, mu*vtheta - eta*gtheta)) nupdates.append((gtheta, gtheta * np.float32(0))) #------------------------------------------------------------------- # Sparse parameters gidxs = [] for lidx, L, gL, gtheta_i in zip(range(len(self.sparams)), self.sparams, self.gsparams, grads[len(self.params):]): vL = theano.shared(np.zeros_like(L.get_value()), name='v%s' % L.name) gidxs.append(T.ivector('gidxs-%s' % L.name)) mupdates.append((L, T.inc_subtensor(L[gidxs[-1]], mu*vL[gidxs[-1]]))) grupdates.append((gL, T.inc_subtensor(gL[self.x[:,lidx]], gtheta_i))) pupdates.append((L, T.inc_subtensor(L[gidxs[-1]], -eta*gL[gidxs[-1]]))) pupdates.append((vL, T.set_subtensor(vL[gidxs[-1]], mu*vL[gidxs[-1]] - eta*gL[gidxs[-1]]))) nupdates.append((gL, T.set_subtensor(gL[gidxs[-1]], np.float32(0)))) #------------------------------------------------------------------- # Compile the functions momentizer = theano.function( inputs=gidxs, updates=mupdates) gradientizer = theano.function( inputs=[self.x, self.y], outputs=self.cost, givens=givens, updates=grupdates) optimizer = theano.function( inputs=gidxs, updates=pupdates) nihilizer = theano.function( inputs=gidxs, updates=nupdates) return momentizer, gradientizer, optimizer, nihilizer
def run(sz, target, incident, roisize, steepness, guess, nb_iter): """ Runs slm-cg for the given inputs Ideally this should be called directly from matlab, but we have had some problems so this is called from a python process. """ # Calculate fft size szT = list(sz) for i in range(len(szT)): szT[i] *= 2 # TODO: Add support for non-square device assert szT[0] == szT[1], 'Image must be square' NT = szT[0] # Pad the target array target = np.pad(target, [(NT / 4, NT / 4), (NT / 4, NT / 4)], 'constant') # From LG file, calculates weighting for circle with Gaussian falloff Weighting = slm.gaussian_top_round(n=NT, r0=(NT / 2, NT / 2), d=roisize, sigma=2, A=1.0) Wcg = slm.weighting_value(M=Weighting, p=1E-4, v=0) # # Magic normalisation stuff # I_L_tot = np.sum(np.power(incident, 2.)) incident = incident * np.power(10000.0 / I_L_tot, 0.5) I_L_tot = np.sum(np.power(incident, 2.)) target = target * Wcg # ilent2: Why this step? target = np.abs(target) * np.exp(Wcg * np.angle(target) * 1j) #P = P * Wcg I_Ta_w = np.sum(np.power(np.abs(target), 2.)) target = target * np.power(I_L_tot / (I_Ta_w), 0.5) I_Ta_w = np.sum(np.power(np.abs(target), 2.)) if np.any(np.isnan(target)): raise Exception('Encountered nan in normalized target array') # # Setup the SLM object # slm_opt = slm.SLM(NT=NT, initial_phi=guess.flatten(), profile_s=incident) # # Generate cost function # overlap = T.sum( np.abs(target) * slm_opt.E_out_amp * Wcg * T.cos(slm_opt.E_out_p - np.angle(target))) overlap = overlap / (T.pow( T.sum(T.pow(np.abs(target), 2)) * T.sum(T.pow(slm_opt.E_out_amp * Wcg, 2)), 0.5)) cost_SE = np.power(10, steepness) * T.pow((1 - overlap), 2) # # Generate cost and gradient functions for optimisation # cost = cost_SE cost_fn = theano.function([], cost, on_unused_input='warn') cost_grad = T.grad(cost, wrt=slm_opt.phi) grad_fn = theano.function([], cost_grad, on_unused_input='warn') def wrapped_cost_fn(phi): slm_opt.phi.set_value(phi[0:(NT / 2)**2], borrow=True) return cost_fn() def wrapped_grad_fn(phi): slm_opt.phi.set_value(phi, borrow=True) return grad_fn() # # Run the optimisation # res = scipy.optimize.fmin_cg(retall=False, full_output=False, disp=True, f=wrapped_cost_fn, x0=guess.flatten(), fprime=wrapped_grad_fn, maxiter=nb_iter) return res.reshape(sz)
def Adam(self, eta_0=.05, T_eta=1, rho1_max=.9, rho2_max=.99, T_rho=1, epsilon=1e-6, dropout=1., anneal=0, expand=0): """""" #------------------------------------------------------------------- # Cast everything as float32 eta_0 = np.float32(eta_0) T_eta = np.float32(T_eta) rho1_max = np.float32(rho1_max) rho2_max = np.float32(rho2_max) T_rho = np.float32(T_rho) anneal = np.float32(anneal) expand = np.float32(expand) #------------------------------------------------------------------- # Set up the updates (see RNN3 for solution if we get non-numeric gradients) mupdates = [] grupdates = [] pupdates = [] nupdates = [] #------------------------------------------------------------------- # Set up a variable to keep track of the iteration tau = theano.shared(np.float32(0), name='tau') pupdates.extend([(tau, tau+np.float32(1))]) #------------------------------------------------------------------- # Set the annealing schedule eta = eta_0*T.pow(T_eta/(tau+T_eta), anneal) rho1 = rho1_max*(np.float32(1)-T.pow(T_rho/(tau+T_rho), expand)) rho2 = rho2_max*(np.float32(1)-T.pow(T_rho/(tau+T_rho), expand)) #------------------------------------------------------------------- # Compute the dropout and gradients grads = T.grad(self.cost, self.params+self.xparams) givens = [] if dropout < 1: for hmask in self.hmasks: givens.append((hmask, srng.binomial(hmask.shape, 1, dropout, dtype='float32'))) #------------------------------------------------------------------- # Dense parameters for theta, gtheta_i, gtheta in zip(self.params, grads[:len(self.params)], self.gparams): mtheta = theano.shared(np.zeros_like(theta.get_value()), name='m%s' % theta.name) vtheta = theano.shared(np.zeros_like(theta.get_value()), name='v%s' % theta.name) mtheta_t = (rho1*mtheta + (np.float32(1)-rho1)*gtheta) / (np.float32(1)-rho1) vtheta_t = (rho2*vtheta + (np.float32(1)-rho2)*T.sqr(gtheta)) / (np.float32(1)-rho2) deltatheta_t = mtheta_t / (T.sqrt(vtheta_t) + epsilon) grupdates.append((gtheta, gtheta + gtheta_i)) pupdates.append((theta, theta - eta*deltatheta_t)) pupdates.append((mtheta, mtheta_t)) pupdates.append((vtheta, vtheta_t)) nupdates.append((gtheta, gtheta * np.float32(0))) #------------------------------------------------------------------- # Sparse parameters gidxs = [] for lidx, L, gL, gtheta_i in zip(range(len(self.sparams)), self.sparams, self.gsparams, grads[len(self.params):]): mL = theano.shared(np.zeros_like(L.get_value()), name='m%s' % L.name) vL = theano.shared(np.zeros_like(L.get_value()), name='v%s' % L.name) gidxs.append(T.ivector('gidxs-%s' % L.name)) mL_t = (rho1*mtheta[gidxs[-1]] + (np.float32(1)-rho1)*gL[gidxs[-1]]) / (np.float32(1)-rho1) vL_t = (rho2*vtheta[gidxs[-1]] + (np.float32(1)-rho2)*T.sqr(gL[gidxs[-1]])) / (np.float32(1)-rho2) deltaL_t = mL_t / (T.sqrt(vL_t) + epsilon) grupdates.append((gL, T.inc_subtensor(gL[self.x[:,lidx]], gtheta_i))) pupdates.append((L, T.inc_subtensor(L[gidxs[-1]], -eta*deltaL_t))) pupdates.append((mL, T.set_subtensor(mL[gidxs[-1]], mL_t))) pupdates.append((vL, T.set_subtensor(vL[gidxs[-1]], vL_t))) nupdates.append((gL, T.set_subtensor(gL[gidxs[-1]], np.float32(0)))) #------------------------------------------------------------------- # Compile the functions momentizer = theano.function( inputs=gidxs, updates=mupdates, on_unused_input='ignore') gradientizer = theano.function( inputs=[self.x, self.y], outputs=self.cost, givens=givens, updates=grupdates) optimizer = theano.function( inputs=gidxs, updates=pupdates) nihilizer = theano.function( inputs=gidxs, updates=nupdates) return momentizer, gradientizer, optimizer, nihilizer
def logp_ab(value): ''' prior density''' return tt.log(tt.pow(tt.sum(value), -5 / 2))
if len(answer_count) >= 10: # model specifications in PyMC3 are wrapped in a with-statement with pm.Model() as model: # Define priors A_answer = pm.Normal('A_answer', 0, sd=50) lambda_0_answer = pm.Normal('lambda_0_answer', 0, sd=20) lambda_1_answer = pm.Normal('lambda_1_answer', 0, sd=20) model_answer = pm.Deterministic( 'model_answer', A_answer * tt.pow(np.array(question_count), lambda_0_answer) * tt.pow(np.array(answerer_count), lambda_1_answer)) sigma = pm.HalfCauchy('sigma', beta=10) observations = pm.Normal('observations', mu=model_answer, sd=sigma, observed=np.array(answer_count)) # Inference! step = pm.Metropolis(vars=[ A_answer, lambda_0_answer, lambda_1_answer, sigma, model_answer, observations ]) start = pm.find_MAP() # initialization using MAP trace = pm.sample(5000, step=step, start=start)
def __init__(self, model, state_length, action_length, state_bounds, action_bounds, settings_): print("Building GAN Model") super(GANKeras, self).__init__(model, state_length, action_length, state_bounds, action_bounds, 0, settings_) self._noise_mean = 0.0 self._noise_std = 1.0 # if settings['action_space_continuous']: if ('size_of_result_state' in self.getSettings()): self._experience = ExperienceMemory( state_length, action_length, self.getSettings()['expereince_length'], continuous_actions=True, settings=self.getSettings(), result_state_length=self.getSettings()['size_of_result_state']) else: self._experience = ExperienceMemory( state_length, action_length, self.getSettings()['expereince_length'], continuous_actions=True, settings=self.getSettings()) self._experience.setStateBounds(copy.deepcopy(self.getStateBounds())) self._experience.setRewardBounds(copy.deepcopy(self.getRewardBounds())) self._experience.setActionBounds(copy.deepcopy(self.getActionBounds())) self._modelTarget = copy.deepcopy(model) # print ("Initial W " + str(self._w_o.get_value()) ) self._learning_rate = self.getSettings()["fd_learning_rate"] self._regularization_weight = 1e-5 self._rho = self.getSettings()['rho'] self._rms_epsilon = self.getSettings()['rms_epsilon'] self._weight_update_steps = self.getSettings( )['steps_until_target_network_update'] self._updates = 0 self._decay_weight = self.getSettings()['regularization_weight'] self._critic_regularization_weight = self.getSettings( )["critic_regularization_weight"] # self._q_valsA = lasagne.layers.get_output(self._model.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=True) # self._q_valsA_drop = lasagne.layers.get_output(self._model.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=False) # self._q_valsNextState = lasagne.layers.get_output(self._model.getCriticNetwork(), self._model.getResultStateSymbolicVariable(), deterministic=True) # self._q_valsTargetNextState = lasagne.layers.get_output(self._modelTarget.getCriticNetwork(), self._model.getResultStateSymbolicVariable(), deterministic=True) # self._q_valsTarget = lasagne.layers.get_output(self._modelTarget.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=True) # self._q_valsTarget_drop = lasagne.layers.get_output(self._modelTarget.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=False) if ("train_gan_with_gaussian_noise" in self.getSettings() and (self.getSettings()["train_gan_with_gaussian_noise"])): inputs_1 = { self._model.getStateSymbolicVariable(): self._model.getStates(), self._model.getActionSymbolicVariable(): self._model.getActions(), self._model.getResultStateSymbolicVariable(): self._model.getResultStates(), self._model._Noise: self._noise_shared } self._generator_drop = lasagne.layers.get_output( self._model.getForwardDynamicsNetwork(), inputs_1, deterministic=True) self._generator = lasagne.layers.get_output( self._model.getForwardDynamicsNetwork(), inputs_1, deterministic=True) else: inputs_1 = { self._model.getStateSymbolicVariable(): self._model.getStates(), self._model.getActionSymbolicVariable(): self._model.getActions(), self._model.getResultStateSymbolicVariable(): self._model.getResultStates(), # self._model._Noise: self._noise_shared } self._generator = lasagne.layers.get_output( self._model.getForwardDynamicsNetwork(), inputs_1, deterministic=True) self._generator_drop = lasagne.layers.get_output( self._model.getForwardDynamicsNetwork(), inputs_1, deterministic=False) # self._q_valsActTarget = lasagne.layers.get_output(self._modelTarget.getForwardDynamicsNetwork(), self._model.getResultStateSymbolicVariable(), deterministic=True) # self._q_valsActA_drop = lasagne.layers.get_output(self._model.getForwardDynamicsNetwork(), self._model.getStateSymbolicVariable(), deterministic=False) inputs_ = { self._model.getStateSymbolicVariable(): self._model.getStates(), self._model.getActionSymbolicVariable(): self._model.getActions(), self._model.getResultStateSymbolicVariable(): self._model.getResultStates(), # self._model.getRewardSymbolicVariable(): self._model.getRewards(), # self._model._Noise: self._noise_shared } self._discriminator = lasagne.layers.get_output( self._model.getCriticNetwork(), inputs_, deterministic=True) self._discriminator_drop = lasagne.layers.get_output( self._model.getCriticNetwork(), inputs_, deterministic=False) """ inputs_2 = { self._modelTarget.getStateSymbolicVariable(): self._model.getResultStates(), self._modelTarget.getActionSymbolicVariable(): self._model.getActions() } """ self._diff = self._model.getRewardSymbolicVariable( ) - self._discriminator_drop loss = T.pow(self._diff, 2) self._loss = T.mean(loss) self._diff_g = self._model.getResultStateSymbolicVariable( ) - self._generator_drop loss_g = T.pow(self._diff_g, 2) self._loss_g = T.mean(loss_g) # assert len(lasagne.layers.helper.get_all_params(self._l_outA)) == 16 # Need to remove the action layers from these params self._params = lasagne.layers.helper.get_all_params( self._model.getCriticNetwork()) print("******Number of Layers is: " + str( len( lasagne.layers.helper.get_all_params( self._model.getCriticNetwork())))) print("******Number of Action Layers is: " + str( len( lasagne.layers.helper.get_all_params( self._model.getForwardDynamicsNetwork())))) self._actionParams = lasagne.layers.helper.get_all_params( self._model.getForwardDynamicsNetwork()) self._givens_ = { self._model.getStateSymbolicVariable(): self._model.getStates(), self._model.getActionSymbolicVariable(): self._model.getActions(), self._model.getResultStateSymbolicVariable(): self._model.getResultStates(), self._model.getRewardSymbolicVariable(): self._model.getRewards(), # self._model._Noise: self._noise_shared } self._critic_regularization = ( self._critic_regularization_weight * lasagne.regularization.regularize_network_params( self._model.getCriticNetwork(), lasagne.regularization.l2)) ## MSE update self._value_grad = T.grad(self._loss + self._critic_regularization, self._params) print("Optimizing Value Function with ", self.getSettings()['optimizer'], " method") self._updates_ = lasagne.updates.adam(self._value_grad, self._params, self._learning_rate, beta1=0.9, beta2=0.9, epsilon=self._rms_epsilon) if ("train_gan_with_gaussian_noise" in settings_ and (settings_["train_gan_with_gaussian_noise"])): self._actGivens = { self._model.getStateSymbolicVariable(): self._model.getStates(), self._model.getActionSymbolicVariable(): self._model.getActions(), # self._model.getResultStateSymbolicVariable(): self._model.getResultStates(), self._model._Noise: self._noise_shared } self._actGivens_MSE = { self._model.getStateSymbolicVariable(): self._model.getStates(), self._model.getActionSymbolicVariable(): self._model.getActions(), self._model.getResultStateSymbolicVariable(): self._model.getResultStates(), self._model._Noise: self._noise_shared } else: self._actGivens = { self._model.getStateSymbolicVariable(): self._model.getStates(), self._model.getActionSymbolicVariable(): self._model.getActions(), # self._model.getResultStateSymbolicVariable(): self._model.getResultStates(), # self._model._Noise: self._noise_shared } self._actGivens_MSE = { self._model.getStateSymbolicVariable(): self._model.getStates(), self._model.getActionSymbolicVariable(): self._model.getActions(), # self._model.getResultStateSymbolicVariable(): self._model.getResultStates(), # self._model._Noise: self._noise_shared } self._actor_regularization = ( self._regularization_weight * lasagne.regularization.regularize_network_params( self._model.getForwardDynamicsNetwork(), lasagne.regularization.l2)) ## MSE update self._gen_grad = T.grad(self._loss_g + self._actor_regularization, self._actionParams) print("Optimizing Value Function with ", self.getSettings()['optimizer'], " method") self._updates_generator = lasagne.updates.adam( self._gen_grad, self._actionParams, self._learning_rate, beta1=0.9, beta2=0.9, epsilon=self._rms_epsilon) ## Some cool stuff to backprop action gradients self._result_state_grad = T.matrix("Action_Grad") self._result_state_grad.tag.test_value = np.zeros( (self._batch_size, self._state_length), dtype=np.dtype(self.getSettings()['float_type'])) self._result_state_grad_shared = theano.shared( np.zeros((self._batch_size, self._state_length), dtype=self.getSettings()['float_type'])) ### Maximize wrt q function self._result_state_mean_grads = T.grad( cost=None, wrt=self._actionParams, known_grads={self._generator: self._result_state_grad_shared}), print("Action grads: ", self._result_state_mean_grads[0]) ## When passing in gradients it needs to be a proper list of gradient expressions self._result_state_mean_grads = list(self._result_state_mean_grads[0]) # print ("isinstance(self._action_mean_grads, list): ", isinstance(self._action_mean_grads, list)) # print ("Action grads: ", self._action_mean_grads) self._generatorGRADUpdates = lasagne.updates.adam( self._result_state_mean_grads, self._actionParams, self._learning_rate * 0.1, beta1=0.9, beta2=0.9, epsilon=self._rms_epsilon) self._givens_grad = { self._model.getStateSymbolicVariable(): self._model.getStates(), self._model.getActionSymbolicVariable(): self._model.getActions(), self._model.getResultStateSymbolicVariable(): self._model.getResultStates(), # self._model.getRewardSymbolicVariable(): self._model.getRewards(), } ### Some other stuff to learn a reward function self._inputs_reward_ = { self._model.getStateSymbolicVariable(): self._model.getStates(), self._model.getActionSymbolicVariable(): self._model.getActions(), } self._reward = lasagne.layers.get_output( self._model.getRewardNetwork(), self._inputs_reward_, deterministic=True) self._reward_drop = lasagne.layers.get_output( self._model.getRewardNetwork(), self._inputs_reward_, deterministic=False) ## because rewards are noramlized then scaled by the discount factor to the value stay between -1,1. self._reward_diff = (self._model.getRewardSymbolicVariable() * (1.0 / (1.0 - self.getSettings()['discount_factor'])) ) - self._reward_drop self.__Reward = self._model.getRewardSymbolicVariable() print("self.__Reward", self.__Reward) # self._reward_diff = (self._model.getRewardSymbolicVariable()) - self._reward_drop self._reward_loss_ = T.mean(T.pow(self._reward_diff, 2), axis=1) self._reward_loss = T.mean(self._reward_loss_) self._reward_diff_NoDrop = ( self._model.getRewardSymbolicVariable() * (1.0 / (1.0 - self.getSettings()['discount_factor']))) - self._reward # self._reward_diff_NoDrop = (self._model.getRewardSymbolicVariable()) - self._reward self._reward_loss_NoDrop_ = T.mean(T.pow(self._reward_diff_NoDrop, 2), axis=1) self._reward_loss_NoDrop = T.mean(self._reward_loss_NoDrop_) self._reward_params = lasagne.layers.helper.get_all_params( self._model.getRewardNetwork()) self._reward_givens_ = { self._model.getStateSymbolicVariable(): self._model.getStates(), # self._model.getResultStateSymbolicVariable() : self._model.getResultStates(), self._model.getActionSymbolicVariable(): self._model.getActions(), self._model.getRewardSymbolicVariable(): self._model.getRewards(), } self._reward_updates_ = lasagne.updates.adam( self._reward_loss + (self._regularization_weight * lasagne.regularization.regularize_network_params( self._model.getRewardNetwork(), lasagne.regularization.l2)), self._reward_params, self._learning_rate, beta1=0.9, beta2=0.999, epsilon=self._rms_epsilon) GANKeras.compile(self)
def quantizeWeight(w, B): return T.minimum(1.0 - T.pow(2.0, 1.0 - B), T.round(w * T.pow(2.0, B - 1.0)) * T.pow(2.0, 1.0 - B))
def errors(self, y): return T.sqrt(T.mean(T.pow(self.output - y, 2)))
def _get_gradients_adagrad(self, J): """Get the AdaGrad gradients and squared gradients updates. The returned gradients still need to be multiplied with the general learning rate. Parameters ---------- J : theano variable cost Returns ------- theano variable gradients that are adapted by the AdaGrad algorithm theano variable updated sum of squares for all previous steps """ grads = T.grad(J, [ self.__dict__[self.updatable_parameters[i]] for i in xrange(len(self.updatable_parameters)) ]) for i, _ in enumerate(grads): grads[i] = debug_print(grads[i], 'grads_' + self.updatable_parameters[i]) updated_squares = dict() # Add squared gradient to the squared gradient matrix for AdaGrad and # recalculate the gradient. for i, p in enumerate(self.updatable_parameters): # We need to handle sparse gradient variables differently if isinstance(grads[i], sparse.SparseVariable): # Add the sqares to the matrix power = debug_print(sparse.structured_pow(grads[i], 2.), 'pow_' + p) # Remove zeros (might happen when squaring near zero values) power = sparse.remove0(power) updated_squares[p] = self.__dict__['adagrad_matrix_' + p] + power # Get only those squares that will be altered, for all others we # don't have gradients, i.e., we don't need to consider them at # all. sqrt_matrix = sparse.sp_ones_like(power) sqrt_matrix = debug_print(updated_squares[p] * sqrt_matrix, 'adagrad_squares_subset_' + p) # Take the square root of the matrix subset. sqrt_matrix = debug_print(sparse.sqrt(sqrt_matrix), 'adagrad_sqrt_' + p) # Calc 1. / the square root. sqrt_matrix = debug_print( sparse.structured_pow(sqrt_matrix, -1.), 'adagrad_pow-1_' + p) grads[i] = sparse.mul(grads[i], sqrt_matrix) else: power = debug_print(T.pow(grads[i], 2.), 'pow_' + p) updated_squares[p] = self.__dict__['adagrad_matrix_' + p] + power # Call sqrt only for those items that are non-zero. denominator = T.switch( T.neq(updated_squares[p], 0.0), T.sqrt(updated_squares[p]), T.ones_like(updated_squares[p], dtype=floatX)) grads[i] = T.mul(grads[i], 1. / denominator) updated_squares[p] = debug_print(updated_squares[p], 'upd_squares_' + p) for i, _ in enumerate(grads): grads[i] = debug_print( grads[i], 'grads_updated_' + self.updatable_parameters[i]) return grads, updated_squares
def gelu(x): return 0.5 * x * ( 1 + T.tanh(T.sqrt(2 / np.pi) * (x + 0.044715 * T.pow(x, 3))))
# ax.plot(specS.inputWave, spectrumObs * specS.int_mask, label='Reddening applied to output spec') # ax.update({'xlabel': 'Wavelength (nm)', 'ylabel': 'Flux (normalised)'}) # ax.legend() # plt.show() basesFlux_tt = theano.shared(specS.onBasesFluxNorm) Xx_tt = theano.shared(specS.Xx_stellar) wi_tt = theano.shared(coeffs_mean) with pm.Model() as model: w_i = pm.Normal('w_i', mu=coeffs_mean, sd=coeffs_std, shape=specS.nBases) Av_star = pm.Lognormal('Av_star', mu=1, sd=0.75) #err = pm.Normal('err', mu=0.0, sd=5, shape=specS.nBases) spectrum = w_i.dot(basesFlux_tt) spectrum_reddened = spectrum * tt.pow(10, -0.4 * Av_star * Xx_tt) Y = pm.Normal('Y', mu=spectrum_reddened * specS.int_mask, sd=err_array * specS.int_mask, observed=spectrumObs * specS.int_mask) for RV in model.basic_RVs: print(RV.name, RV.logp(model.test_point)) # Launch model step = pm.NUTS() trace = pm.sample(3000, tune=1000, step=step) # Output trace data print pm.summary(trace)
def quantizeAct(x, B): return T.minimum(2.0 - T.pow(2.0, 1.0 - B), T.round(x * T.pow(2.0, B - 1.0)) * T.pow(2.0, 1.0 - B))
def th_variance(self, space): return tt.pow(self.th_std(space), 2)
def __init__( self, cell_state_mat: np.ndarray, X_data: np.ndarray, n_comb: int = 50, data_type: str = 'float32', n_iter=20000, learning_rate=0.005, total_grad_norm_constraint=200, verbose=True, var_names=None, var_names_read=None, obs_names=None, fact_names=None, sample_id=None, gene_level_prior={ 'mean': 1 / 2, 'sd': 1 / 4 }, gene_level_var_prior={'mean_var_ratio': 1}, cell_number_prior={ 'cells_per_spot': 8, 'factors_per_spot': 7 }, cell_number_var_prior={ 'cells_mean_var_ratio': 1, 'factors_mean_var_ratio': 1 }, phi_hyp_prior={ 'mean': 3, 'sd': 1 }, spot_fact_mean_var_ratio=5, exper_gene_level_mean_var_ratio=10, ): ############# Initialise parameters ################ super().__init__(cell_state_mat, X_data, data_type, n_iter, learning_rate, total_grad_norm_constraint, verbose, var_names, var_names_read, obs_names, fact_names, sample_id) for k in gene_level_var_prior.keys(): gene_level_prior[k] = gene_level_var_prior[k] self.gene_level_prior = gene_level_prior self.phi_hyp_prior = phi_hyp_prior self.n_comb = n_comb self.spot_fact_mean_var_ratio = spot_fact_mean_var_ratio self.exper_gene_level_mean_var_ratio = exper_gene_level_mean_var_ratio # generate parameters for samples self.spot2sample_df = pd.get_dummies(sample_id) # convert to np.ndarray self.spot2sample_mat = self.spot2sample_df.values self.n_exper = self.spot2sample_mat.shape[1] # assign extra data to dictionary with (1) shared parameters (2) input data self.extra_data_tt = { 'spot2sample': theano.shared(self.spot2sample_mat.astype(self.data_type)) } self.extra_data = { 'spot2sample': self.spot2sample_mat.astype(self.data_type) } for k in cell_number_var_prior.keys(): cell_number_prior[k] = cell_number_var_prior[k] self.cell_number_prior = cell_number_prior ############# Define the model ################ self.model = pm.Model() with self.model: # =====================Gene expression level scaling======================= # # Explains difference in expression between genes and # how it differs in single cell and spatial technology # compute hyperparameters from mean and sd shape = gene_level_prior['mean']**2 / gene_level_prior['sd']**2 rate = gene_level_prior['mean'] / gene_level_prior['sd']**2 shape_var = shape / gene_level_prior['mean_var_ratio'] rate_var = rate / gene_level_prior['mean_var_ratio'] self.gene_level_alpha_hyp = pm.Gamma('gene_level_alpha_hyp', mu=shape, sigma=np.sqrt(shape_var), shape=(1, 1)) self.gene_level_beta_hyp = pm.Gamma('gene_level_beta_hyp', mu=rate, sigma=np.sqrt(rate_var), shape=(1, 1)) # global gene levels self.gene_level = pm.Gamma('gene_level', self.gene_level_alpha_hyp, self.gene_level_beta_hyp, shape=(self.n_var, 1)) # scale cell state factors by gene_level self.gene_factors = pm.Deterministic('gene_factors', self.cell_state) #self.gene_factors = self.cell_state # tt.printing.Print('gene_factors sum')(gene_factors.sum(0).shape) # tt.printing.Print('gene_factors sum')(gene_factors.sum(0)) # =====================Spot factors======================= # # prior on spot factors reflects the number of cells, fraction of their cytoplasm captured, # times heterogeniety in the total number of mRNA between individual cells with each cell type self.cells_per_spot = pm.Gamma('cells_per_spot', mu=cell_number_prior['cells_per_spot'], sigma=np.sqrt(cell_number_prior['cells_per_spot'] \ / cell_number_prior['cells_mean_var_ratio']), shape=(1, 1)) self.factors_per_spot = pm.Gamma('factors_per_spot', mu=cell_number_prior['factors_per_spot'], sigma=np.sqrt(cell_number_prior['factors_per_spot'] \ / cell_number_prior['factors_mean_var_ratio']), shape=(1, 1)) shape = self.factors_per_spot / np.array(self.n_fact).reshape( (1, 1)) rate = tt.ones( (1, 1)) / self.cells_per_spot * self.factors_per_spot self.spot_factors = pm.Gamma('spot_factors', alpha=shape, beta=rate, shape=(self.n_obs, self.n_fact)) # =====================Spot-specific additive component======================= # # molecule contribution that cannot be explained by cell state signatures # these counts are distributed between all genes not just expressed genes self.spot_add_hyp = pm.Gamma('spot_add_hyp', 1, 1, shape=2) self.spot_add = pm.Gamma('spot_add', self.spot_add_hyp[0], self.spot_add_hyp[1], shape=(self.n_obs, 1)) # =====================Gene-specific additive component ======================= # # per gene molecule contribution that cannot be explained by cell state signatures # these counts are distributed equally between all spots (e.g. background, free-floating RNA) self.gene_add_hyp = pm.Gamma('gene_add_hyp', 1, 1, shape=2) self.gene_add = pm.Gamma('gene_add', self.gene_add_hyp[0], self.gene_add_hyp[1], shape=(self.n_exper, self.n_var)) # =====================Gene-specific overdispersion ======================= # self.phi_hyp = pm.Gamma('phi_hyp', mu=phi_hyp_prior['mean'], sigma=phi_hyp_prior['sd'], shape=(1, 1)) self.gene_E = pm.Exponential('gene_E', self.phi_hyp, shape=(self.n_exper, self.n_var)) # =====================Expected expression ======================= # # expected expression self.mu_biol = pm.math.dot(self.spot_factors, self.gene_factors.T) \ * self.gene_level.T \ + pm.math.dot(self.extra_data_tt['spot2sample'], self.gene_add) + self.spot_add # tt.printing.Print('mu_biol')(self.mu_biol.shape) # =====================DATA likelihood ======================= # # Likelihood (sampling distribution) of observations & add overdispersion via NegativeBinomial / Poisson self.data_target = pm.NegativeBinomial( 'data_target', mu=self.mu_biol, alpha=pm.math.dot(self.extra_data_tt['spot2sample'], 1 / tt.pow(self.gene_E, 2)), observed=self.x_data, total_size=self.X_data.shape) # =====================Compute nUMI from each factor in spots ======================= # self.nUMI_factors = pm.Deterministic( 'nUMI_factors', (self.spot_factors * (self.gene_factors * self.gene_level).sum(0)))
def inner_fn(t, stm1, oat, ot, oht, pos, vt,\ r_Wq_hst_ot, r_Wq_hst_oht, r_Wq_hst_oat, r_Wq_hst_stm1, r_bq_hst,\ r_Wq_hst2_hst, r_bq_hst2,\ r_Wq_stmu_hst2, r_bq_stmu,\ r_Wq_stsig_hst2, r_bq_stsig,\ r_Wl_stmu_stm1, r_bl_stmu,\ r_Wl_stsig_stm1, r_bl_stsig,\ r_Wl_ost_st, r_bl_ost,\ r_Wl_ost2_ost, r_bl_ost2,\ r_Wl_ost3_ost2, r_bl_ost3,\ r_Wl_otmu_st, r_bl_otmu,\ r_Wl_otsig_st, r_bl_otsig,\ r_Wl_ohtmu_st, r_bl_ohtmu,\ r_Wl_ohtsig_st, r_bl_ohtsig,\ r_Wl_oatmu_st, r_bl_oatmu,\ r_Wl_oatsig_st, r_bl_oatsig,\ r_Wa_atmu_st, r_ba_atmu,\ r_Wa_atsig_st, r_ba_atsig\ ): hst = T.nnet.relu( T.batched_tensordot(r_Wq_hst_stm1, T.reshape(stm1, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + T.batched_tensordot(r_Wq_hst_ot, T.reshape(ot, (n_perturbations, n_o, n_proc)), axes=[[2], [1]]) + T.batched_tensordot(r_Wq_hst_oht, T.reshape(oht, (n_perturbations, n_oh, n_proc)), axes=[[2], [1]]) + T.batched_tensordot(r_Wq_hst_oat, T.reshape(oat, (n_perturbations, n_oa, n_proc)), axes=[[2], [1]]) + r_bq_hst) hst2 = T.nnet.relu( T.batched_tensordot(r_Wq_hst2_hst, T.reshape(hst, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bq_hst2) stmu = T.tanh( T.batched_tensordot(r_Wq_stmu_hst2, T.reshape(hst2, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bq_stmu) stsig = T.nnet.softplus( T.batched_tensordot(r_Wq_stsig_hst2, T.reshape(hst2, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bq_stsig) + sig_min_states # Rescale representation to fit within linear response of the tanh-nonlinearity stmu = T.set_subtensor(stmu[:, 0, :], 0.1 * ot[:, 0, :]).reshape( (n_perturbations, n_s, n_proc)) stsig = T.set_subtensor(stsig[:, 0, :], 0.01).reshape( (n_perturbations, n_s, n_proc)) st = stmu + theano_rng.normal((n_perturbations, n_s, n_proc)) * stsig ost = T.nnet.relu( T.batched_tensordot(r_Wl_ost_st, T.reshape(st, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bl_ost) ost2 = T.nnet.relu( T.batched_tensordot(r_Wl_ost2_ost, T.reshape(ost, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bl_ost2) ost3 = T.nnet.relu( T.batched_tensordot(r_Wl_ost3_ost2, T.reshape(ost2, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bl_ost3) otmu = T.batched_tensordot(r_Wl_otmu_st, T.reshape(ost3, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bl_otmu otsig = T.nnet.softplus( T.batched_tensordot(r_Wl_otsig_st, T.reshape(ost3, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bl_otsig) + sig_min_obs ohtmu = T.batched_tensordot(r_Wl_ohtmu_st, T.reshape(ost3, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bl_ohtmu ohtsig = T.nnet.softplus( T.batched_tensordot(r_Wl_ohtsig_st, T.reshape(ost3, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bl_ohtsig) + sig_min_obs oatmu = T.batched_tensordot(r_Wl_oatmu_st, T.reshape(ost3, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bl_oatmu oatsig = T.nnet.softplus( T.batched_tensordot(r_Wl_oatsig_st, T.reshape(ost3, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bl_oatsig) + sig_min_obs p_ot = GaussianNLL(ot, otmu, otsig) p_oht = GaussianNLL(oht, ohtmu, ohtsig) p_oat = GaussianNLL(oat, oatmu, oatsig) prior_stmu = T.tanh( T.batched_tensordot(r_Wl_stmu_stm1, T.reshape(stm1, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bl_stmu) prior_stsig = T.nnet.softplus( T.batched_tensordot(r_Wl_stsig_stm1, T.reshape(stm1, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + r_bl_stsig) + sig_min_states prior_stmu = ifelse(T.lt(t, 20), prior_stmu, T.set_subtensor(prior_stmu[:, 0, :], 0.1)) prior_stsig = ifelse(T.lt(t, 20), prior_stsig, T.set_subtensor(prior_stsig[:, 0, :], 0.01)) KL_st = KLGaussianGaussian(stmu, stsig, prior_stmu, prior_stsig) FEt = KL_st + p_ot + p_oht + p_oat oat_mu = T.batched_tensordot(r_Wa_atmu_st, T.reshape(st, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + ba_atmu oat_sig = T.nnet.softplus( T.batched_tensordot(r_Wa_atsig_st, T.reshape(st, (n_perturbations, n_s, n_proc)), axes=[[2], [1]]) + ba_atsig) + sig_min_action oat_new = 0.0 * oat + oat_mu + theano_rng.normal( (n_perturbations, n_oa, n_proc)) * oat_sig action_force = T.tanh(oat_new) force = T.switch( T.lt(pos, 0.0), -2 * pos - 1, -T.pow(1 + 5 * T.sqr(pos), -0.5) - T.sqr(pos) * T.pow(1 + 5 * T.sqr(pos), -1.5) - T.pow(pos, 4) / 16.0) - 0.25 * vt vt_new = vt + 0.05 * force + 0.03 * action_force pos_new = pos + vt_new ot_new = pos_new + theano_rng.normal((n_perturbations, n_o, n_proc)) * 0.01 oht_new = T.exp(-T.sqr(pos_new - 1.0) / 2.0 / 0.3 / 0.3) return st, oat_new, ot_new, oht_new, pos_new, vt_new, FEt, KL_st, hst, hst2, stmu, stsig, force, p_ot, p_oht, p_oat
def expr_generator(a, b): ra = [T.pow(a[i], i) for i in range(len(a))] return ra, T.exp(b)
if CONTINUE: logger.info('Setting model weights from epoch {}'.format(max_epoch)) param_values = pickle.load(open(param_values_file, 'rb')) lasagne.layers.set_all_param_values(l_out, param_values['recurrent']) lasagne.layers.set_all_param_values(resnet['pool5'], param_values['resnet']) logger.info('Creating output and loss variables') prediction = lasagne.layers.get_output(l_out, deterministic=False) flat_cap_out_var = T.flatten(cap_out_var, outdim=1) loss = T.sum( lasagne.objectives.categorical_crossentropy(prediction, flat_cap_out_var)) caption_features = lasagne.layers.get_output(l_slice, deterministic=False) order_embedding_loss = T.pow(T.maximum(0, caption_features - im_features), 2).sum() total_loss = loss + ORDER_VIOLATION_COEFF * order_embedding_loss deterministic_prediction = lasagne.layers.get_output(l_out, deterministic=True) deterministic_captions = lasagne.layers.get_output(l_slice, deterministic=True) deterministic_loss = T.sum( lasagne.objectives.categorical_crossentropy(deterministic_prediction, flat_cap_out_var)) deterministic_order_embedding_loss = T.pow( T.maximum(0, deterministic_captions - im_features), 2).sum() deterministic_total_loss = deterministic_loss + ORDER_VIOLATION_COEFF * deterministic_order_embedding_loss logger.info('Getting all parameters and creating update rules.') resnet_params = lasagne.layers.get_all_params(resnet['pool5'],
def __init__(self, nnet, dataset=None, learning_rate=0.01, beta=0.0, sparsity=0.01, weight_decay=0.0, momentum=0.5): if len(dataset) < 2: print("Error dataset must contain tuple (train_data,train_target)") train_data, train_target = dataset target = T.matrix('y') square_error = T.mean(0.5 * T.sum(T.pow(target - nnet.output, 2), axis=1)) avg_activate = T.mean(nnet.hiddenLayer[0].output, axis=0) sparsity_penalty = beta * T.sum( T.mul(T.log(sparsity / avg_activate), sparsity) + T.mul(T.log((1 - sparsity) / T.sub(1, avg_activate)), (1 - sparsity))) regularization = 0.5 * weight_decay * ( T.sum(T.pow(nnet.params[0], 2)) + T.sum(T.pow(nnet.params[2], 2))) cost = square_error + sparsity_penalty + regularization gparams = [T.grad(cost, param) for param in nnet.params] w_deltas = [] for param in nnet.params: w_deltas.append( theano.shared(value=param.get_value() * 0, borrow=True)) new_params = [ param - (learning_rate * gparam + momentum * w_delta) for param, gparam, w_delta in zip(nnet.params, gparams, w_deltas) ] updates = [(param, new_param) for param, new_param in zip(nnet.params, new_params)] updates += [(w_delta, learning_rate * gparam + momentum * w_delta) for w_delta, gparam in zip(w_deltas, gparams)] index = T.lscalar() self.train = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ input: train_data[index * batch_size:(index + 1) * batch_size], target: train_target[index * batch_size:(index + 1) * batch_size] }) self.cost = theano.function(inputs=[], outputs=cost, givens={ input: train_data, target: train_target })
def __init__(self, model, n_in, n_out, state_bounds, action_bounds, reward_bound, settings_): super(Distillation, self).__init__(model, n_in, n_out, state_bounds, action_bounds, reward_bound, settings_) # create a small convolutional neural network ### Load expert policy files self._expert_policies = [] file_name_ = "" for i in range(len(self.getSettings()['expert_policy_files'])): file_name = self.getSettings( )['expert_policy_files'][i] + '/' + self.getSettings( )['model_type'] + '/' + getAgentName() + '.pkl' if (file_name_ == file_name): ## To help save memory when experts are the same # model_ = self._expert_policies[len(self._expert_policies)-1] self._expert_policies.append(model_) else: print("Loading pre compiled network: ", file_name) f = open(file_name, 'rb') model_ = dill.load(f) # model.setSettings(settings) f.close() self._expert_policies.append(model_) file_name_ = file_name self._actor_buffer_states = [] self._actor_buffer_result_states = [] self._actor_buffer_actions = [] self._actor_buffer_rewards = [] self._actor_buffer_falls = [] self._actor_buffer_diff = [] self._NotFallen = T.bcol("Not_Fallen") ## because float64 <= float32 * int32, need to use int16 or int8 self._NotFallen.tag.test_value = np.zeros((self._batch_size, 1), dtype=np.dtype('int8')) self._NotFallen_shared = theano.shared(np.zeros((self._batch_size, 1), dtype='int8'), broadcastable=(False, True)) self._tmp_diff = T.col("Tmp_Diff") self._tmp_diff.tag.test_value = np.zeros( (self._batch_size, 1), dtype=np.dtype(self.getSettings()['float_type'])) self._tmp_diff_shared = theano.shared(np.zeros( (self._batch_size, 1), dtype=self.getSettings()['float_type']), broadcastable=(False, True)) """ self._target_shared = theano.shared( np.zeros((self._batch_size, 1), dtype='float64'), broadcastable=(False, True)) """ self._critic_regularization_weight = self.getSettings( )["critic_regularization_weight"] self._critic_learning_rate = self.getSettings()["critic_learning_rate"] ## Target network self._modelTarget = copy.deepcopy(model) self._q_valsA = lasagne.layers.get_output( self._model.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=True) self._q_valsA_drop = lasagne.layers.get_output( self._model.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=False) self._q_valsNextState = lasagne.layers.get_output( self._model.getCriticNetwork(), self._model.getResultStateSymbolicVariable(), deterministic=True) self._q_valsTargetNextState = lasagne.layers.get_output( self._modelTarget.getCriticNetwork(), self._model.getResultStateSymbolicVariable(), deterministic=True) self._q_valsTarget = lasagne.layers.get_output( self._modelTarget.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=True) self._q_valsTarget_drop = lasagne.layers.get_output( self._modelTarget.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=False) self._q_valsActA = lasagne.layers.get_output( self._model.getActorNetwork(), self._model.getStateSymbolicVariable(), deterministic=True) self._q_valsActTarget = lasagne.layers.get_output( self._modelTarget.getActorNetwork(), self._model.getStateSymbolicVariable(), deterministic=True) self._q_valsActA_drop = lasagne.layers.get_output( self._model.getActorNetwork(), self._model.getStateSymbolicVariable(), deterministic=False) self._q_func = self._q_valsA self._q_funcTarget = self._q_valsTarget self._q_func_drop = self._q_valsA_drop self._q_funcTarget_drop = self._q_valsTarget_drop self._q_funcAct = self._q_valsActA self._q_funcAct_drop = self._q_valsActA_drop # self._target = (self._model.getRewardSymbolicVariable() + (np.array([self._discount_factor] ,dtype=np.dtype(self.getSettings()['float_type']))[0] * self._q_valsTargetNextState )) * self._NotFallen # self._target = self._model.getRewardSymbolicVariable() + ((self._discount_factor * self._q_valsTargetNextState ) * self._NotFallen) + (self._NotFallen - 1) self._target = self._model.getRewardSymbolicVariable() + ( self._discount_factor * self._q_valsTargetNextState) self._diff = self._target - self._q_func self._diff_drop = self._target - self._q_func_drop # loss = 0.5 * self._diff ** 2 loss = T.pow(self._diff, 2) self._loss = T.mean(loss) self._loss_drop = T.mean(0.5 * self._diff_drop**2) self._params = lasagne.layers.helper.get_all_params( self._model.getCriticNetwork()) self._actionParams = lasagne.layers.helper.get_all_params( self._model.getActorNetwork()) self._givens_ = { self._model.getStateSymbolicVariable(): self._model.getStates(), self._model.getResultStateSymbolicVariable(): self._model.getResultStates(), self._model.getRewardSymbolicVariable(): self._model.getRewards(), # self._NotFallen: self._NotFallen_shared # self._model.getActionSymbolicVariable(): self._actions_shared, } self._actGivens = { self._model.getStateSymbolicVariable(): self._model.getStates(), # self._model.getResultStateSymbolicVariable(): self._model.getResultStates(), # self._model.getRewardSymbolicVariable(): self._model.getRewards(), self._model.getActionSymbolicVariable(): self._model.getActions(), # self._NotFallen: self._NotFallen_shared self._tmp_diff: self._tmp_diff_shared } self._critic_regularization = ( self._critic_regularization_weight * lasagne.regularization.regularize_network_params( self._model.getCriticNetwork(), lasagne.regularization.l2)) self._actor_regularization = ( (self._regularization_weight * lasagne.regularization.regularize_network_params( self._model.getActorNetwork(), lasagne.regularization.l2))) if (self.getSettings()['use_previous_value_regularization']): self._actor_regularization = self._actor_regularization + ( (self.getSettings()['previous_value_regularization_weight']) * change_penalty(self._model.getActorNetwork(), self._modelTarget.getActorNetwork())) elif ('regularization_type' in self.getSettings() and (self.getSettings()['regularization_type'] == 'KL_Divergence')): self._kl_firstfixed = T.mean( kl( self._q_valsActTarget, T.ones_like(self._q_valsActTarget) * self.getSettings()['exploration_rate'], self._q_valsActA, T.ones_like(self._q_valsActA) * self.getSettings()['exploration_rate'], self._action_length)) #self._actor_regularization = (( self._KL_Weight ) * self._kl_firstfixed ) + (10*(self._kl_firstfixed>self.getSettings()['kl_divergence_threshold'])* # T.square(self._kl_firstfixed-self.getSettings()['kl_divergence_threshold'])) self._actor_regularization = (self._kl_firstfixed) * ( self.getSettings()['kl_divergence_threshold']) print("Using regularization type : ", self.getSettings()['regularization_type']) # SGD update # self._updates_ = lasagne.updates.rmsprop(self._loss, self._params, self._learning_rate, self._rho, # self._rms_epsilon) self._value_grad = T.grad(self._loss + self._critic_regularization, self._params) ## Clipping the max gradient """ for x in range(len(self._value_grad)): self._value_grad[x] = T.clip(self._value_grad[x] , -0.1, 0.1) """ if (self.getSettings()['optimizer'] == 'rmsprop'): print("Optimizing Value Function with ", self.getSettings()['optimizer'], " method") self._updates_ = lasagne.updates.rmsprop(self._value_grad, self._params, self._learning_rate, self._rho, self._rms_epsilon) elif (self.getSettings()['optimizer'] == 'momentum'): print("Optimizing Value Function with ", self.getSettings()['optimizer'], " method") self._updates_ = lasagne.updates.momentum( self._value_grad, self._params, self._critic_learning_rate, momentum=self._rho) elif (self.getSettings()['optimizer'] == 'adam'): print("Optimizing Value Function with ", self.getSettings()['optimizer'], " method") self._updates_ = lasagne.updates.adam(self._value_grad, self._params, self._critic_learning_rate, beta1=0.9, beta2=0.9, epsilon=self._rms_epsilon) elif (self.getSettings()['optimizer'] == 'adagrad'): print("Optimizing Value Function with ", self.getSettings()['optimizer'], " method") self._updates_ = lasagne.updates.adagrad( self._value_grad, self._params, self._critic_learning_rate, epsilon=self._rms_epsilon) else: print("Unknown optimization method: ", self.getSettings()['optimizer']) sys.exit(-1) ## TD update """ if (self.getSettings()['optimizer'] == 'rmsprop'): self._updates_ = lasagne.updates.rmsprop(T.mean(self._q_func) + self._critic_regularization, self._params, self._critic_learning_rate * -T.mean(self._diff), self._rho, self._rms_epsilon) elif (self.getSettings()['optimizer'] == 'momentum'): self._updates_ = lasagne.updates.momentum(T.mean(self._q_func) + self._critic_regularization, self._params, self._critic_learning_rate * -T.mean(self._diff), momentum=self._rho) elif ( self.getSettings()['optimizer'] == 'adam'): self._updates_ = lasagne.updates.adam(T.mean(self._q_func), self._params, self._critic_learning_rate * -T.mean(self._diff), beta1=0.9, beta2=0.999, epsilon=1e-08) else: print ("Unknown optimization method: ", self.getSettings()['optimizer']) sys.exit(-1) """ ## Need to perform an element wise operation or replicate _diff for this to work properly. # self._actDiff = theano.tensor.elemwise.Elemwise(theano.scalar.mul)((self._model.getActionSymbolicVariable() - self._q_valsActA), theano.tensor.tile((self._diff * (1.0/(1.0-self._discount_factor))), self._action_length)) # Target network does not work well here? self._actDiff = (self._model.getActionSymbolicVariable() - self._q_valsActA_drop) # self._actDiff = ((self._model.getActionSymbolicVariable() - self._q_valsActA)) # Target network does not work well here? # self._actDiff_drop = ((self._model.getActionSymbolicVariable() - self._q_valsActA_drop)) # Target network does not work well here? ## This should be a single column vector # self._actLoss_ = theano.tensor.elemwise.Elemwise(theano.scalar.mul)(( T.transpose(T.sum(T.pow(self._actDiff, 2),axis=1) )), (self._diff * (1.0/(1.0-self._discount_factor)))) # self._actLoss_ = theano.tensor.elemwise.Elemwise(theano.scalar.mul)(( T.reshape(T.sum(T.pow(self._actDiff, 2),axis=1), (self._batch_size, 1) )), # (self._tmp_diff * (1.0/(1.0-self._discount_factor))) # self._actLoss_ = (T.mean(T.pow(self._actDiff, 2),axis=1)) self._actLoss_ = theano.tensor.elemwise.Elemwise(theano.scalar.mul)( (T.mean(T.pow(self._actDiff, 2), axis=1)), (self._tmp_diff)) # self._actLoss = T.sum(self._actLoss)/float(self._batch_size) self._actLoss = T.mean(self._actLoss_) # self._actLoss_drop = (T.sum(0.5 * self._actDiff_drop ** 2)/float(self._batch_size)) # because the number of rows can shrink # self._actLoss_drop = (T.mean(0.5 * self._actDiff_drop ** 2)) self._policy_grad = T.grad(self._actLoss + self._actor_regularization, self._actionParams) ## Clipping the max gradient """ for x in range(len(self._policy_grad)): self._policy_grad[x] = T.clip(self._policy_grad[x] , -0.5, 0.5) """ if (self.getSettings()['optimizer'] == 'rmsprop'): self._actionUpdates = lasagne.updates.rmsprop( self._policy_grad, self._actionParams, self._learning_rate, self._rho, self._rms_epsilon) elif (self.getSettings()['optimizer'] == 'momentum'): self._actionUpdates = lasagne.updates.momentum(self._policy_grad, self._actionParams, self._learning_rate, momentum=self._rho) elif (self.getSettings()['optimizer'] == 'adam'): self._actionUpdates = lasagne.updates.adam( self._policy_grad, self._actionParams, self._learning_rate, beta1=0.9, beta2=0.999, epsilon=self._rms_epsilon) elif (self.getSettings()['optimizer'] == 'adagrad'): self._actionUpdates = lasagne.updates.adagrad( self._policy_grad, self._actionParams, self._learning_rate, epsilon=self._rms_epsilon) else: print("Unknown optimization method: ", self.getSettings()['optimizer']) # actionUpdates = lasagne.updates.rmsprop(T.mean(self._q_funcAct_drop) + # (self._regularization_weight * lasagne.regularization.regularize_network_params( # self._model.getActorNetwork(), lasagne.regularization.l2)), actionParams, # self._learning_rate * 0.5 * (-T.sum(actDiff_drop)/float(self._batch_size)), self._rho, self._rms_epsilon) self._givens_grad = { self._model.getStateSymbolicVariable(): self._model.getStates(), # self._model.getResultStateSymbolicVariable(): self._model.getResultStates(), # self._model.getRewardSymbolicVariable(): self._model.getRewards(), # self._model.getActionSymbolicVariable(): self._model.getActions(), } ### Noisey state updates # self._target = (self._model.getRewardSymbolicVariable() + (np.array([self._discount_factor] ,dtype=np.dtype(self.getSettings()['float_type']))[0] * self._q_valsTargetNextState )) * self._NotFallen # self._target_dyna = theano.gradient.disconnected_grad(self._q_func) ## Bellman error self._bellman = self._target - self._q_funcTarget # self._target = self._model.getRewardSymbolicVariable() + (self._discount_factor * self._q_valsTargetNextState ) ### Give v(s') the next state and v(s) (target) the current state self._diff_adv = (self._discount_factor * self._q_func) - (self._q_valsTargetNextState) self._diff_adv_givens = { self._model.getStateSymbolicVariable(): self._model.getResultStates(), self._model.getResultStateSymbolicVariable(): self._model.getStates(), } Distillation.compile(self)
def __init__(self, signal_shape, filter_shape, poolsize, activation=None): rng = np.random.RandomState(None) dtensor5 = T.TensorType('float32', (False, ) * 5) self.inputs = dtensor5( name='inputs') #Return a Variable for a 5-dimensional ndarray self.image_shape = signal_shape self.batchsize = signal_shape[0] self.in_channels = signal_shape[2] self.in_depth = signal_shape[1] self.in_width = signal_shape[4] self.in_height = signal_shape[3] self.flt_channels = filter_shape[0] self.flt_time = filter_shape[1] self.flt_width = filter_shape[4] self.flt_height = filter_shape[3] self.activation = activation self.hidden_layer = ConvolutionLayer3D(rng, input=self.inputs, signal_shape=signal_shape, filter_shape=filter_shape, act=activation, border_mode='full', if_hidden_pool=False) self.hidden_image_shape = (self.batchsize, self.in_depth, self.flt_channels, self.in_height + self.flt_height - 1, self.in_width + self.flt_width - 1) self.hidden_pooled_image_shape = ( self.batchsize, self.in_depth / 2, self.flt_channels, (self.in_height + self.flt_height - 1) / 2, (self.in_width + self.flt_width - 1) / 2) self.hidden_filter_shape = (self.in_channels, self.flt_time, self.flt_channels, self.flt_height, self.flt_width) self.recon_layer = ConvolutionLayer3D( rng, input=self.hidden_layer.output, signal_shape=self.hidden_image_shape, filter_shape=self.hidden_filter_shape, act=activation, border_mode='valid') self.layers = [self.hidden_layer, self.recon_layer] self.params = sum([layer.params for layer in self.layers], []) L = T.sum(T.pow(T.sub(self.recon_layer.output, self.inputs), 2), axis=(1, 2, 3, 4)) self.cost = 0.5 * T.mean(L) self.grads = T.grad(self.cost, self.params) self.updates = adadelta_updates(self.params, self.grads, rho=0.95, eps=1e-6) self.train = theano.function([self.inputs], self.cost, updates=self.updates, name="train cae model") self.activation = theano.tensor.signal.pool.pool_3d( input=self.hidden_layer.output.dimshuffle(0, 2, 1, 3, 4), ds=poolsize, ignore_border=True) self.activation = self.activation.dimshuffle(0, 2, 1, 3, 4) self.get_activation = theano.function([self.inputs], self.activation, updates=None, name='get hidden activation')
def __init__(self, model, n_in, n_out, state_bounds, action_bounds, reward_bound, settings_): super(Distillation, self).__init__(model, n_in, n_out, state_bounds, action_bounds, reward_bound, settings_) # create a small convolutional neural network ### Load expert policy files self._expert_policies = [] file_name_ = "" for i in range(len(self.getSettings()['expert_policy_files'])): file_name = self.getSettings( )['expert_policy_files'][i] + '/' + self.getSettings( )['model_type'] + '/' + getAgentName() + '.pkl' if (file_name_ == file_name): ## To help save memory when experts are the same self._expert_policies.append(model_) else: print("Loading pre compiled network: ", file_name) f = open(file_name, 'rb') model_ = dill.load(f) f.close() self._expert_policies.append( model_) # expert model, load the 2 expert models file_name_ = file_name self._actor_buffer_states = [] self._actor_buffer_result_states = [] self._actor_buffer_actions = [] self._actor_buffer_rewards = [] self._actor_buffer_falls = [] self._actor_buffer_diff = [] self._NotFallen = T.bcol("Not_Fallen") ## because float64 <= float32 * int32, need to use int16 or int8 self._NotFallen.tag.test_value = np.zeros((self._batch_size, 1), dtype=np.dtype('int8')) self._NotFallen_shared = theano.shared(np.zeros((self._batch_size, 1), dtype='int8'), broadcastable=(False, True)) self._tmp_diff = T.col("Tmp_Diff") self._tmp_diff.tag.test_value = np.zeros( (self._batch_size, 1), dtype=np.dtype(self.getSettings()['float_type'])) self._tmp_diff_shared = theano.shared( np.zeros((self._batch_size, 1), dtype=self.getSettings()['float_type']), broadcastable=(False, True)) #定义一个共享变量,初始值为为0 self._critic_regularization_weight = self.getSettings( )["critic_regularization_weight"] self._critic_learning_rate = self.getSettings()["critic_learning_rate"] ## Target network self._modelTarget = copy.deepcopy(model) # target model 是要更新的模型 self._q_valsA = lasagne.layers.get_output( self._model.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=True) #确定性原始模型的state值输出 self._q_valsA_drop = lasagne.layers.get_output( self._model.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=False) #非确定的state值输出 self._q_valsNextState = lasagne.layers.get_output( self._model.getCriticNetwork(), self._model.getResultStateSymbolicVariable(), deterministic=True) #下一步的state值 self._q_valsTargetNextState = lasagne.layers.get_output( self._modelTarget.getCriticNetwork(), self._model.getResultStateSymbolicVariable(), deterministic=True) #目标模型的下一步的state值 self._q_valsTarget = lasagne.layers.get_output( self._modelTarget.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=True) #目标模型的state值 self._q_valsTarget_drop = lasagne.layers.get_output( self._modelTarget.getCriticNetwork(), self._model.getStateSymbolicVariable(), deterministic=False) #目标模型的state self._q_valsActA = lasagne.layers.get_output( self._model.getActorNetwork(), self._model.getStateSymbolicVariable(), deterministic=True) self._q_valsActTarget = lasagne.layers.get_output( self._modelTarget.getActorNetwork(), self._model.getStateSymbolicVariable(), deterministic=True) #remove the random self._q_valsActA_drop = lasagne.layers.get_output( self._model.getActorNetwork(), self._model.getStateSymbolicVariable(), deterministic=False) #actor 值 self._q_func = self._q_valsA self._q_funcTarget = self._q_valsTarget self._q_func_drop = self._q_valsA_drop self._q_funcTarget_drop = self._q_valsTarget_drop self._q_funcAct = self._q_valsActA self._q_funcAct_drop = self._q_valsActA_drop self._target = self._model.getRewardSymbolicVariable() + ( self._discount_factor * self._q_valsTargetNextState) # self._model.getRewardSymbolicVariable() 获取rewards的值getRewards() =self._rewards_shared 从0开始一直更新 self._diff = self._target - self._q_func self._diff_drop = self._target - self._q_func_drop #更新的模型的reward减去原始模型的critic的输出值 loss = T.pow(self._diff, 2) self._loss = T.mean(loss) # 两个模型的reward的差值 self._loss_drop = T.mean(0.5 * self._diff_drop**2) self._params = lasagne.layers.helper.get_all_params( self._model.getCriticNetwork()) self._actionParams = lasagne.layers.helper.get_all_params( self._model.getActorNetwork()) self._givens_ = { self._model.getStateSymbolicVariable(): self._model.getStates(), self._model.getResultStateSymbolicVariable(): self._model.getResultStates(), self._model.getRewardSymbolicVariable(): self._model.getRewards() } self._actGivens = { self._model.getStateSymbolicVariable(): self._model.getStates(), self._model.getActionSymbolicVariable(): self._model.getActions(), self._tmp_diff: self._tmp_diff_shared } self._critic_regularization = ( self._critic_regularization_weight * lasagne.regularization.regularize_network_params( self._model.getCriticNetwork(), lasagne.regularization.l2)) self._actor_regularization = ( (self._regularization_weight * lasagne.regularization.regularize_network_params( self._model.getActorNetwork(), lasagne.regularization.l2))) if (self.getSettings()['use_previous_value_regularization']): self._actor_regularization = self._actor_regularization + ( (self.getSettings()['previous_value_regularization_weight']) * change_penalty(self._model.getActorNetwork(), self._modelTarget.getActorNetwork())) elif ('regularization_type' in self.getSettings() and (self.getSettings()['regularization_type'] == 'KL_Divergence')): self._kl_firstfixed = T.mean( kl( self._q_valsActTarget, T.ones_like(self._q_valsActTarget) * self.getSettings()['exploration_rate'], self._q_valsActA, T.ones_like(self._q_valsActA) * self.getSettings()['exploration_rate'], self._action_length)) self._actor_regularization = (self._kl_firstfixed) * ( self.getSettings()['kl_divergence_threshold']) print("Using regularization type : ", self.getSettings()['regularization_type']) # SGD update self._value_grad = T.grad(self._loss + self._critic_regularization, self._params) if (self.getSettings()['optimizer'] == 'rmsprop'): print("Optimizing Value Function with ", self.getSettings()['optimizer'], " method") self._updates_ = lasagne.updates.rmsprop(self._value_grad, self._params, self._learning_rate, self._rho, self._rms_epsilon) elif (self.getSettings()['optimizer'] == 'momentum'): print("Optimizing Value Function with ", self.getSettings()['optimizer'], " method") self._updates_ = lasagne.updates.momentum( self._value_grad, self._params, self._critic_learning_rate, momentum=self._rho) elif (self.getSettings()['optimizer'] == 'adam'): print("Optimizing Value Function with ", self.getSettings()['optimizer'], " method") self._updates_ = lasagne.updates.adam(self._value_grad, self._params, self._critic_learning_rate, beta1=0.9, beta2=0.9, epsilon=self._rms_epsilon) elif (self.getSettings()['optimizer'] == 'adagrad'): print("Optimizing Value Function with ", self.getSettings()['optimizer'], " method") self._updates_ = lasagne.updates.adagrad( self._value_grad, self._params, self._critic_learning_rate, epsilon=self._rms_epsilon) else: print("Unknown optimization method: ", self.getSettings()['optimizer']) sys.exit(-1) ## TD update ## Need to perform an element wise operation or replicate _diff for this to work properly. self._actDiff = (self._model.getActionSymbolicVariable() - self._q_valsActA_drop) # 更新模型的actor的输出减去原始模型的actor值 self._actLoss_ = theano.tensor.elemwise.Elemwise(theano.scalar.mul)( (T.mean(T.pow(self._actDiff, 2), axis=1)), (self._tmp_diff)) self._actLoss = T.mean(self._actLoss_) self._policy_grad = T.grad(self._actLoss + self._actor_regularization, self._actionParams) ## Clipping the max gradient if (self.getSettings()['optimizer'] == 'rmsprop'): self._actionUpdates = lasagne.updates.rmsprop( self._policy_grad, self._actionParams, self._learning_rate, self._rho, self._rms_epsilon) elif (self.getSettings()['optimizer'] == 'momentum'): self._actionUpdates = lasagne.updates.momentum(self._policy_grad, self._actionParams, self._learning_rate, momentum=self._rho) elif (self.getSettings()['optimizer'] == 'adam'): self._actionUpdates = lasagne.updates.adam( self._policy_grad, self._actionParams, self._learning_rate, beta1=0.9, beta2=0.999, epsilon=self._rms_epsilon) elif (self.getSettings()['optimizer'] == 'adagrad'): self._actionUpdates = lasagne.updates.adagrad( self._policy_grad, self._actionParams, self._learning_rate, epsilon=self._rms_epsilon) else: print("Unknown optimization method: ", self.getSettings()['optimizer']) self._givens_grad = { self._model.getStateSymbolicVariable(): self._model.getStates() } ## Bellman error self._bellman = self._target - self._q_funcTarget ### Give v(s') the next state and v(s) (target) the current state self._diff_adv = (self._discount_factor * self._q_func) - ( self._q_valsTargetNextState ) #\gamma*critic模型的输出-critic模型在下一个状态的输出值 self._diff_adv_givens = { self._model.getStateSymbolicVariable(): self._model.getResultStates(), self._model.getResultStateSymbolicVariable(): self._model.getStates(), } Distillation.compile(self)
def mse(x, y): return T.sum(T.pow(x - y, 2), axis=1)
def pow(x, a): """Elementwise power of a tensor. """ return T.pow(x, a)