def prepareTraining(self): ''' Prepares the relevant functions (details on neural_net_creator's prepareTraining) ''' #loss objective to minimize self.prediction = lasagne.layers.get_output(self.network) self.prediction=self.prediction[:,0] #self.loss = lasagne.objectives.categorical_crossentropy(self.prediction, self.target_var) #the loss is now the squared error in the output self.loss = lasagne.objectives.squared_error(self.prediction, self.target_var) self.loss = self.loss.mean() self.params = lasagne.layers.get_all_params(self.network, trainable=True) self.updates = lasagne.updates.nesterov_momentum( self.loss, self.params, learning_rate=0.01, momentum=0.9) self.test_prediction = lasagne.layers.get_output(self.network, deterministic=True) self.test_prediction=self.test_prediction[:,0] self.test_loss = lasagne.objectives.squared_error(self.test_prediction, self.target_var) self.test_loss = self.test_loss.mean() #the accuracy is now the number of sample that achieve a 0.01 precision (can be changed) self.test_acc = T.mean(T.le(T.abs_(T.sub(self.test_prediction,self.target_var)),0.01) , dtype=theano.config.floatX) self.test_acc2 = T.mean(T.le(T.abs_(T.sub(self.test_prediction,self.target_var)),0.05) , dtype=theano.config.floatX) self.test_acc3 = T.mean(T.le(T.abs_(T.sub(self.test_prediction,self.target_var)),0.1) , dtype=theano.config.floatX) self.train_fn = theano.function([self.input_var, self.target_var], self.loss, updates=self.updates) self.val_fn = theano.function([self.input_var, self.target_var], [self.test_loss,self.test_acc,self.test_acc2,self.test_acc3]) self.use = theano.function([self.input_var],[self.test_prediction])
def f_score(self,y,label): #print dir(x) y=T.cast(y,'int32') new_y_pred=T.sub(self.y_pred,label) new_y=T.sub(y,label) pre_pos_num=new_y_pred.shape[0]-new_y_pred.nonzero()[0].shape[0]#预测的正例个数 real_pos=new_y.shape[0]-new_y.nonzero()[0].shape[0] new_y_pred=T.set_subtensor(new_y_pred[new_y_pred.nonzero()[0]],1) new_y=T.set_subtensor(new_y[new_y.nonzero()[0]],2) r=T.neq(new_y_pred,new_y) true_pos=self.y_pred.shape[0]-r.sum() #printed_recall=theano.printing.Print('rec:')(pre_pos_num) #printed=theano.printing.Print('pre:')(real_pos) precision=true_pos / (T.cast(pre_pos_num,'float32')+0.0000001) recall=true_pos / (T.cast(real_pos,'float32')+0.0000001) f_score=(2 * precision * recall) / (precision + recall) return f_score,precision,recall
def minus_corr(u, v): um = T.sub(u, T.mean(u)) vm = T.sub(v, T.mean(v)) r_num = T.sum(T.mul(um, vm)) r_den = T.sqrt(T.mul(T.sum(T.sqr(um)), T.sum(T.sqr(vm)))) r = T.true_div(r_num, r_den) r = T.neg(r) return r
def get_output_for(self, inputs, **kwargs): #input[0]:(BS,max_senlen,emb_size),input[1]:(BS,1,emb_size),input[2]:(BS,max_sentlen) # activation0=(T.dot(inputs[0],self.W_h)).reshape([self.batch_size,self.max_sentlen])+self.b_h.repeat(self.batch_size,0).repeat(self.max_sentlen,1) # activation1=T.dot(inputs[1],self.W_q).reshape([self.batch_size]).dimshuffle(0,'x') # activation2=T.batched_dot(T.dot(inputs[0],self.W_o),inputs[1].reshape([self.batch_size,self.embedding_size,1])).reshape([self.batch_size,self.max_sentlen]) #正常的点积的方法 # activation2=T.batched_dot(inputs[0],inputs[1].reshape([self.batch_size,self.embedding_size,1])).reshape([self.batch_size,self.max_sentlen]) #正常的点积的方法 # activation2=T.batched_dot(inputs[0],inputs[1].reshape([self.batch_size,self.embedding_size,1])).reshape([self.batch_size,self.max_sentlen]) # norm1=T.sqrt(T.sum(T.square(inputs[0]),axis=2))+1e-15 # norm2=T.sqrt(T.sum(T.square(inputs[1]),axis=2)) # activation2=activation2/(norm1+norm2) # 采用欧式距离的相反数的评价的话: activation2=-T.sqrt(T.sum(T.square(T.sub(inputs[0],inputs[1].repeat(self.max_sentlen,1))),axis=2)) # norm2=T.sqrt(T.sum(T.mul(inputs[0],inputs[0]),axis=2))+1e-15 # activation2=activation2/norm2 # activation=(self.nonlinearity(activation0)+self.nonlinearity(activation1)+activation2).reshape([self.batch_size,self.max_sentlen])#.dimshuffle(0,'x',2)#.repeat(self.max_sentlen,axis=1) # activation2=(activation2).reshape([self.batch_size,self.max_sentlen])#.dimshuffle(0,'x',2)#.repeat(self.max_sentlen,axis=1) # final=T.dot(activation,self.W_o) #(BS,max_sentlen) # activation3=T.batched_dot(inputs[0],inputs[1].reshape([self.batch_size,self.embedding_size,1])).reshape([self.batch_size,self.max_sentlen]) # if inputs[2] is not None: # final=inputs[2]*final-(1-inputs[2])*1000000 alpha=lasagne.nonlinearities.softmax(activation2) #(BS,max_sentlen) return alpha
def get_cost_updates(self, corruption_level, learning_rate): """ This function computes the cost and the updates for one trainng step of the dA """ # this is how if-then-else is written in Theano tilde_x = T.switch(T.gt(corruption_level, 0), self.get_corrupted_input(self.x, corruption_level), self.x) y = self.get_hidden_values(tilde_x) z = self.get_reconstructed_input(y) act = T.dot(tilde_x, self.W) + self.b # note : we sum over the size of a datapoint; if we are using # minibatches, L will be a vector, with one entry per # example in minibatch # L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1) # note : L is now a vector, where each element is the # cross-entropy cost of the reconstruction of the # corresponding example of the minibatch. We need to # compute the average of all these to get the cost of # the minibatch L = T.sqrt(T.sum(T.sqr(T.sub(self.x, z)), axis=1)) reg = T.sum(y, axis=0) / T.shape(y)[0] # sum over training set rho = T.constant(0.05) beta = T.constant(self.beta) reg1 = T.sum(rho * T.log(rho / reg) + (1-rho) * T.log((1-rho) / (1-reg))) cost = T.mean(L) + beta * reg1 # compute the gradients of the cost of the `dA` with respect # to its parameters gparams = T.grad(cost, self.params) # generate the list of updates updates = {} for param, gparam in zip(self.params, gparams): updates[param] = param - learning_rate * gparam return (cost, collections.OrderedDict(updates.items()))
def full(self, X, Xs=None): X, Xc, Xs = self._common(X, Xs) if Xs is None: return tt.dot(Xc, tt.transpose(Xc)) else: Xsc = tt.sub(Xs, self.c) return tt.dot(Xc, tt.transpose(Xsc))
def full(self, X, Z=None): X, Xc, Z = self._common(X, Z) if Z is None: return tt.dot(Xc, tt.transpose(Xc)) else: Zc = tt.sub(Z, self.c) return tt.dot(Xc, tt.transpose(Zc))
def get_cost_update(self, learning_rate=0.1): """Get cost updates Parameters ---------- learning_rate : float learning rate of sgd """ L=T.sum(T.pow(T.sub(self.get_decode(), self.input),2), axis=1); cost = 0.5*T.mean(L); d_b=T.grad(cost, self.BIAS); d_net_out=T.grad(cost, self.decode_layer.pooled_out); d_b_decode=T.grad(cost, self.decode_layer.b); d_W_decode=T.grad(cost, self.decode_layer.W); print d_b_decode.type(); print d_W_decode.type(); #d_b_encode=T.sum(d_net_out, axis=[0,1,2]); #print d_b_encode.type(); #d_W_decode=self.decode_layer.getCP(data_in=self.encode_layer.output, # filters=d_net_out); #print d_W_decode.shape; d_net_in=self.decode_layer.getConvPoolB(data_in=d_net_out, filters=self.decode_layer.B); #T.dot(self.decode_layer.B, d_net_out); #print d_net_in.type(); d_net_in_delta=d_net_in*self.encode_layer.d_activation(self.encode_layer.pooled_out); print d_net_in_delta.type(); d_b_encode=T.sum(d_net_in_delta, axis=[0,1,2]); d_W_encode=T.dot(d_net_in_delta, self.input.T); print d_W_encode.type(); d_W=[d_W_encode, d_W_decode]; updates_weights=[(param_i, param_i-learning_rate*d_W_i) for param_i, d_W_i in zip (self.WEIGHTS, d_W)]; updates_bias=[(param_i, param_i-learning_rate*d_b_i) for param_i, d_b_i in zip(self.BIAS, d_b)]; updates=updates_weights+updates_bias; #L_B=T.sum(T.pow(T.sub(self.recon_layer.output_B, self.input),2), axis=1); #cost_B = 0.5*T.mean(L_B); #grad_weights=T.grad(cost, self.WEIGHTS); #updates_weights=[(param_i, param_i-learning_rate*(grad_i+learning_rate*rw_i)) # for param_i, grad_i, rw_i in zip(self.WEIGHTS, grad_weights, self.RW)]; #grad_bias=T.grad(cost, self.BIAS); #updates_bias=[(param_i, param_i-learning_rate*grad_i) # for param_i, grad_i in zip(self.BIAS, grad_bias)]; #updates=updates_weights+updates_bias; return (cost, updates);
def get_updates(self, learning_rate, corruption_level=None, L1_rate=0.000, L2_rate=0.000): if corruption_level is not None: x=self.get_corruption_input(self.input, corruption_level); y=self.decode_layer.get_output(self.encode_layer.get_output(x)); else: y=self.decode_layer.out_feature_maps; cost=T.sum(T.pow(T.sub(self.decode_layer.out_feature_maps, self.feature_maps),2), axis=1); #cost=self.get_cost(self.feature_maps, y); cost+=0.001*((self.encode_layer.filters**2).sum()+(self.decode_layer.filters**2).sum()); cost=T.mean(cost); params=self.encode_layer.params+self.decode_layer.params; gparams=T.grad(cost, params); updates=[(param_i, param_i-learning_rate*grad_i) for param_i, grad_i in zip(params, gparams)]; return cost, updates;
def get_cost_updates(self, corruption_level, learning_rate): """ This function computes the cost and the updates for one trainng step of the dA """ tilde_x = self.get_corrupted_input(self.x, corruption_level) y = self.get_hidden_values(tilde_x) z = self.get_reconstructed_input(y) # note : we sum over the size of a datapoint; if we are using # minibatches, L will be a vector, with one entry per # example in minibatch # L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1) # note : L is now a vector, where each element is the # cross-entropy cost of the reconstruction of the # corresponding example of the minibatch. We need to # compute the average of all these to get the cost of # the minibatch L = T.sqrt(T.sum(T.sqr(T.sub(self.x, z)), axis=1)) cost = T.mean(L) # compute the gradients of the cost of the `dA` with respect # to its parameters gparams = T.grad(cost, self.params) # generate the list of updates updates = {} for param, gparam in zip(self.params, gparams): updates[param] = param - learning_rate * gparam return (cost, updates)
def free_energy(self, v_sample): ''' Function to compute the free energy ''' wx_b = T.dot(v_sample, self.W) + self.hbias diff_v_vbias = T.sub(v_sample, self.vbias) diff_v_vbias_T = T.transpose(diff_v_vbias) vbias_term = T.dot(diff_v_vbias, diff_v_vbias_T) hidden_term = T.sum(T.log(1 + T.exp(wx_b)), axis=1) return 0.5 * vbias_term - hidden_term
def get_cost_update(self, learningrate=0.1): ''' ''' L = T.sum(T.pow(T.sub(self.recon_layer.outputs, self.inputs), 2), axis=1) cost = 0.5*T.mean(L) grads = T.grad(cost, self.params) updates = [(param_i, param_i-learningrate*grad_i) for param_i, grad_i in zip(self.params, grads)] return (cost, updates)
def quadratic_weighted_kappa_loss(y_true, y_pred): min_rating = T.minimum(T.min(y_true), T.min(y_pred)) max_rating = T.maximum(T.max(y_true), T.max(y_pred)) hist_true = T.bincount(y_true, minlength=max_rating) hist_pred = T.bincount(y_pred, minlength=max_rating) num_ratings = (max_rating - min_rating) + 1 num_scored = float(len(y_true)) numerator = T.zeros(1) denominator = T.zeros(1) z = T.zeros(len(y_true)) for i_true in range(min_rating, max_rating + 1): for j_pred in range(min_rating, max_rating + 1): expected = T.true_div(T.mul(hist_true[i_true], hist_pred[j_pred]), num_scored) d = T.true_div(T.sqr(i_true - j_pred), T.sqr(num_ratings - 1.)) conf_mat_cell = T.sum(T.and_(T.eq(T.sub(y_true, i_true), z), T.eq(T.sub(y_pred, j_pred), z))) numerator = T.add(numerator, T.true_div(T.mul(d, conf_mat_cell), num_scored)) denominator = T.add(denominator, T.true_div(T.mul(d, expected), num_scored)) return T.true_div(numerator, denominator)
def _add_noise_to_input(rng, layer, p): """ p is the probablity of replacing a unit with a random number (from 0 to 255) """ if p > 0: srng = theano.tensor.shared_randomstreams.RandomStreams(rng.randint(999999)) # p=1-p because 1's indicate keep and p is prob of dropping dropmask = T.cast(srng.binomial(n=1, p=1-p, size=layer.shape),theano.config.floatX) noise = T.cast(srng.uniform(low=0., high=255., size=layer.shape),theano.config.floatX) # The cast is important because # int * float32 = float64 which pulls things off the gpu output = (layer*dropmask) + (noise * T.sub(1,dropmask)) return output return layer
def f_score(self, y, label): # print dir(x) y = T.cast(y, "int32") new_y_pred = T.sub(self.y_pred, label) new_y = T.sub(y, label) pre_pos_num = new_y_pred.shape[0] - new_y_pred.nonzero()[0].shape[0] # 预测的正例个数 real_pos = new_y.shape[0] - new_y.nonzero()[0].shape[0] new_y_pred = T.set_subtensor(new_y_pred[new_y_pred.nonzero()[0]], 1) new_y = T.set_subtensor(new_y[new_y.nonzero()[0]], 2) r = T.neq(new_y_pred, new_y) true_pos = self.y_pred.shape[0] - r.sum() precision = true_pos / T.cast(pre_pos_num, "float32") recall = true_pos / T.cast(real_pos, "float32") f_score = (2 * precision * recall) / (precision + recall) return f_score, precision, recall
def create_weight_update_functions(self): updates = [] for i in range(len(self.error_gradients)): updates.append( ( self.weights[i], g( T.sub( self.weights[i], T.mul(T.mul(self.error_gradients[-(i + 1)], self.alpha), self.batch_size_divisor), ) ), ) ) updates.append( ( self.biases[i], g(T.sub(self.biases[i], T.mul(T.mul(self.errors[-(i + 1)], self.alpha), self.batch_size_divisor))), ) ) self.update_weight_function = function(inputs=[self.idx, self.alpha], updates=updates)
def euclidean_loss(self, y): """Return the mean of the negative log-likelihood of the prediction of this model under a given target distribution. .. math:: \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) = \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|} \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\ \ell (\theta=\{W,b\}, \mathcal{D}) :type y: theano.tensor.TensorType :param y: corresponds to a matrix where 1 indicates which class the sample belongs to """ return T.mean(T.sub(y, self.p_y_given_x) ** 2)
def get_output_for(self, inputs, **kwargs): #input[0]:(BS,max_senlen,emb_size),input[1]:(BS,1,emb_size),input[2]:(BS,max_sentlen) # activation0=(T.dot(inputs[0],self.W_h)).reshape([self.batch_size,self.max_sentlen])+self.b_h.repeat(self.batch_size,0).repeat(self.max_sentlen,1) # activation1=T.dot(inputs[1],self.W_q).reshape([self.batch_size]).dimshuffle(0,'x') # activation2=T.batched_dot(T.dot(inputs[0],self.W_o),inputs[1].reshape([self.batch_size,self.embedding_size,1])).reshape([self.batch_size,self.max_sentlen]) #数据预处理和归一化的方法 # inputs[0]=inputs[0]/(T.sqrt(T.sum(T.square(inputs[0]),axis=2)).reshape([self.batch_size,self.max_sentlen,1]).repeat(self.embedding_size,2))-1 # inputs[1]=inputs[1]/(T.sqrt(T.sum(T.square(inputs[1]),axis=2)).reshape([self.batch_size,1,1]).repeat(self.embedding_size,2))-1 # aver0=T.mean(inputs[0],-1).reshape([self.batch_size,self.max_sentlen,1]).repeat(self.embedding_size,2) # var0=T.sqrt(T.var(inputs[0],-1)).reshape([self.batch_size,self.max_sentlen,1]).repeat(self.embedding_size,2) # inputs[0]=(inputs[0]-aver0)/var0 # aver1=T.mean(inputs[1],-1).reshape([self.batch_size,1,1]).repeat(self.embedding_size,2) # var1=T.sqrt(T.var(inputs[1],-1)).reshape([self.batch_size,1,1]).repeat(self.embedding_size,2) # inputs[1]=(inputs[1]-aver1)/var1 #正常的点积的方法 # activation2=T.batched_dot(inputs[0],inputs[1].reshape([self.batch_size,self.embedding_size,1])).reshape([self.batch_size,self.max_sentlen]) # print 'metric:dot' #正常的点积的方法 # activation2=T.batched_dot(inputs[0],inputs[1].reshape([self.batch_size,self.embedding_size,1])).reshape([self.batch_size,self.max_sentlen]) # norm1=T.sqrt(T.sum(T.square(inputs[0]),axis=2))+1e-15 # norm2=T.sqrt(T.sum(T.square(inputs[1]),axis=2)) # activation2=activation2/(norm1*norm2) # print 'metric:cos' # 采用欧式距离的相反数的评价的话: activation2=-T.sqrt(T.sum(T.square(T.sub(inputs[0],inputs[1].repeat(self.max_sentlen,1))),axis=2)+1e-15) print 'metric:distance' # norm2=T.sqrt(T.sum(T.mul(inputs[0],inputs[0]),axis=2))+1e-15 # activation2=activation2/norm2 # activation=(self.nonlinearity(activation0)+self.nonlinearity(activation1)+activation2).reshape([self.batch_size,self.max_sentlen])#.dimshuffle(0,'x',2)#.repeat(self.max_sentlen,axis=1) # activation2=(activation2).reshape([self.batch_size,self.max_sentlen])#.dimshuffle(0,'x',2)#.repeat(self.max_sentlen,axis=1) # final=T.dot(activation,self.W_o) #(BS,max_sentlen) # activation3=T.batched_dot(inputs[0],inputs[1].reshape([self.batch_size,self.embedding_size,1])).reshape([self.batch_size,self.max_sentlen]) # if inputs[2] is not None: # final=inputs[2]*final-(1-inputs[2])*1000000 alpha=lasagne.nonlinearities.softmax(activation2) #(BS,max_sentlen) return alpha
def generateData(dim,num): names=[] data={} regions={} targets={} srng = RandomStreams() rv_u = srng.uniform(dim) rv_u = T.mul(rv_u,50.0) rv_u = T.sub(rv_u,25.0) f = function([], rv_u) for i in range(num): name=str(i) names.append(name) data[name]=f() regions[name]=(0,0,dim[0]-1,dim[1]-1) #targets[name]=np.sum(data[name],dtype=np.float32) targets[name]=np.amax(data[name]) return names,data,regions,targets
def create_momentum_weight_update_functions(self): momentum_updates = [] for i in range(len(self.H.L.momentum_weights)): momentum_updates.append( ( self.H.L.momentum_weights[i], g( T.mul( self.batch_size_divisor, T.sub( T.mul(self.M, self.H.L.momentum_weights[i]), T.mul(self.alpha, self.error_gradients[-(i + 1)]), ), ) ), ) ) self.H.L.momentum_update_function = function(inputs=[self.idx, self.M, self.alpha], updates=momentum_updates)
def get_cost_update(self, learning_rate=0.1): """Get cost updates Parameters ---------- learning_rate : float learning rate of sgd """ L=T.sum(T.pow(T.sub(self.get_reconstruction(), self.input),2), axis=1); cost = 0.5*T.mean(L); grads=T.grad(cost, self.params); updates = [ (param_i, param_i-learning_rate*grad_i) for param_i, grad_i in zip(self.params, grads) ]; return (cost, updates);
def create_backprop_gradient_functions(self): self.errors = [] self.error_gradients = [] error_function = None error_gradient = None for i in range(len(self.weights)): if len(self.errors) == 0: # this is the last layer of the net: The error is X - t because of # the combination of softmax and cross entropy cost function error_function = g(T.sub(self.feedforward, self.t[self.idx])) self.errors.append(error_function) error_gradient = g(T.dot(self.z[-2].T, self.errors[i])) error_gradient = self.apply_L2_penalties_error_gradients(error_gradient, -1) self.error_gradients.append(error_gradient) elif (len(self.weights) - 1) == i: # this involves the input X instead of z-values as it is the first weights that # need to be updated self.errors.append( g(T.mul(T.dot(self.errors[-1], self.weights[1].T), self.layers[1].activation_derivative(self.z[0]))) ) error_gradient = g(T.dot(self.X[self.idx].T, self.errors[-1])) # error_gradient = self.apply_L2_penalties_error_gradients(error_gradient, 0) self.error_gradients.append(error_gradient) else: self.errors.append( g( T.mul( T.dot(self.errors[-1], self.weights[-i].T), self.layers[-(i + 1)].activation_derivative(self.z[-(i + 1)]), ) ) ) error_gradient = g(T.dot(self.z[-(i + 2)].T, self.errors[-1])) # error_gradient = self.apply_L2_penalties_error_gradients(error_gradient, -(i+1)) self.error_gradients.append(error_gradient)
def get_output_for(self, inputs, **kwargs): '''input[0]是memory[bs*path_length,n_classes,h_dim] input[1]是hidden[bs*path_length,1,h_dim]''' '''内容部分的计算''' # activation0=(T.dot(inputs[0][:,:,:self.h_dim],self.W_h)).reshape([self.batch_size,self.max_sentlen])+self.b_h.repeat(self.batch_size,0).repeat(self.max_sentlen,1) # activation1=T.dot(inputs[1][:,:,:self.h_dim],self.W_q).reshape([self.batch_size]).dimshuffle(0,'x') # activation2=T.batched_dot(inputs[0][:,:,:self.h_dim],inputs[1][:,:,:self.h_dim].reshape([self.batch_size,self.embedding_size,1])).reshape([self.batch_size,self.max_sentlen]) activation2=-T.sqrt(T.sum(T.square(T.sub(inputs[0],inputs[1].repeat(self.max_sentlen,1))),axis=2)) # activation2=T.batched_dot(T.dot(inputs[0][:,:,:self.h_dim],self.W_o),inputs[1][:,:,:self.h_dim].reshape([self.batch_size,self.embedding_size,1])).reshape([self.batch_size,self.max_sentlen]) # norm2=T.sqrt(T.sum(T.mul(inputs[0][:,:,:self.h_dim],inputs[0][:,:,:self.h_dim]),axis=2))+0.0000001 # activation2=activation2/norm2 activation=(activation2).reshape([self.batch_size,self.max_sentlen])#.dimshuffle(0,'x',2)#.repeat(self.max_sentlen,axis=1) alpha=lasagne.nonlinearities.softmax(activation) #(BS,max_sentlen) '''标签部分的计算''' # activation0=(T.dot(inputs[0][:,:,self.h_dim:],self.W_h_label)).reshape([self.batch_size,self.max_sentlen])+self.b_h_label.repeat(self.batch_size,0).repeat(self.max_sentlen,1) # activation1=T.dot(inputs[1][:,:,self.h_dim:],self.W_q_label).reshape([self.batch_size]).dimshuffle(0,'x') activation2=T.batched_dot(T.dot(inputs[0][:,:,self.h_dim:],self.W_o_label),inputs[1][:,:,self.h_dim:].reshape([self.batch_size,self.n_classes,1])).reshape([self.batch_size,self.max_sentlen]) activation=(activation2).reshape([self.batch_size,self.max_sentlen])#.dimshuffle(0,'x',2)#.repeat(self.max_sentlen,axis=1) beta=lasagne.nonlinearities.softmax(activation) #(BS,max_sentlen) alpha=lasagne.nonlinearities.softmax(alpha+5*beta) return beta return alpha
def w_brier_loss(o, f, class_w): """f is the forecast and o is the original outcome""" print class_w return T.mean(T.dot(T.square(T.sub(f, o)), class_w), axis=-1)
def __init__(self, data, image_shape, filter_shape, poolsize, sparse_coeff, activation='sigmoid', tied_weight=False, is_linear=False, do_max_pool=False): rng = np.random.RandomState(None) self.data = data self.batchsize = image_shape[0] self.in_channels = image_shape[1] self.in_height = image_shape[2] self.in_width = image_shape[3] self.flt_channels = filter_shape[0] self.flt_height = filter_shape[2] self.flt_width = filter_shape[3] self.input = T.ftensor4('input') # self.input = input.reshape(image_shape) hidden_layer=ConvolutionLayer(rng, input=self.input, filter_shape=filter_shape, act=activation, border_mode='full', if_pool=do_max_pool) self.hidden_image_shape = (self.batchsize, self.flt_channels, self.in_height+self.flt_height-1, self.in_width+self.flt_width-1) self.hidden_pooled_image_shape = (self.batchsize, self.flt_channels, (self.in_height+self.flt_height-1)/2, (self.in_width+self.flt_width-1)/2) self.hidden_filter_shape = (self.in_channels, self.flt_channels, self.flt_height, self.flt_width) if sparse_coeff == 0: if do_max_pool: hidden_layer_output = repeat(hidden_layer.output, repeats=2, axis=2) hidden_layer_output = repeat(hidden_layer_output, repeats=2, axis=3) else: hidden_layer_output = hidden_layer.output else: feature_map = hidden_layer.output # first per featuremap, then across featuremap # feature_map_vec = feature_map.reshape((feature_map.shape[0], # feature_map.shape[1], feature_map.shape[2]*feature_map.shape[3])) # feat_sparsity = feature_map_vec.norm(2, axis=2) # feat_sparsity = feat_sparsity.dimshuffle(0, 1, 'x', 'x') # feature_map1 = np.divide(feature_map, feat_sparsity+1e-9) # examp_sparsity = feature_map1.norm(2, axis=1) # examp_sparsity = examp_sparsity.dimshuffle(0, 'x', 1, 2) # feature_map2 = np.divide(feature_map1, examp_sparsity+1e-9) # first across featuremap, then per featuremap examp_sparsity = feature_map.norm(2, axis=1) examp_sparsity = examp_sparsity.dimshuffle(0, 'x', 1, 2) feature_map1 = np.divide(feature_map, examp_sparsity+1e-9) feature_map1_vec = feature_map1.reshape((feature_map1.shape[0], feature_map1.shape[1], feature_map1.shape[2]*feature_map1.shape[3])) feat_sparsity = feature_map1_vec.norm(2, axis=2) feat_sparsity = feat_sparsity.dimshuffle(0, 1, 'x', 'x') feature_map2 = np.divide(feature_map1, feat_sparsity+1e-9) if do_max_pool: hidden_layer_output = repeat(feature_map2, repeats=2, axis=2) hidden_layer_output = repeat(hidden_layer_output, repeats=2, axis=3) else: hidden_layer_output = feature_map2 # recon_layer_input = hidden_layer_output if is_linear: recon_layer=ConvolutionLayer(rng, input=hidden_layer_output, filter_shape=self.hidden_filter_shape, act='linear', border_mode='valid') else: recon_layer=ConvolutionLayer(rng, input=hidden_layer_output, filter_shape=self.hidden_filter_shape, act=activation, border_mode='valid') self.tied_weight = tied_weight if self.tied_weight: # recon_layer.W = hidden_layer.W # recon_layer.W = recon_layer.W.dimshuffle(1,0,2,3) weight = hidden_layer.W.get_value() recon_layer.W.set_value(weight.transpose(1,0,2,3), borrow=True) self.layers = [hidden_layer, recon_layer] self.params = sum([layer.params for layer in self.layers], []) # self.params = hidden_layer.params + recon_layer.params L1_sparsity = hidden_layer_output.norm(1, axis=(2, 3)) # L1_sparsity = T.sum(np.abs(feature_map2), axis=(2, 3)) # sparse_filter = T.mean(L1_sparsity.sum(axis=1), axis=(0)) sparse_filter = T.mean(L1_sparsity, axis=(0, 1)) # sparsity = T.mean(feature_map2, axis=(2,3)) # sparse_filter = T.mean(sparsity, axis=(0, 1)) # L=T.sum(T.pow(T.sub(recon_layer.output, self.input), 2), axis=0) L=T.sum(T.pow(T.sub(recon_layer.output, self.input), 2), axis=(1,2,3)) # sum over channel,height, width cost = 0.5*T.mean(L) + sparse_coeff * sparse_filter grads = T.grad(cost, self.params) # learning_rate = 0.1 # updates = [(param_i, param_i-learning_rate*grad_i) # for param_i, grad_i in zip(self.params, grads)] updates = adadelta_updates(self.params, grads, rho=0.95, eps=1e-6) # self.train = theano.function( # [self.input], # cost, # updates=updates, # name="train cae model") index = T.lscalar('index') batch_begin = index * self.batchsize batch_end = batch_begin + self.batchsize self.train = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ self.input: self.data[batch_begin:batch_end] }, name="train cae model") self.activation = downsample.max_pool_2d( input=hidden_layer.output, ds=poolsize, ignore_border=True) # self.get_activation = theano.function( # [self.input], # self.activation, # updates=None, # name='get hidden activation') # num = T.bscalar self.get_activation = theano.function( inputs=[index], # outputs=self.activation, outputs=hidden_layer.output if do_max_pool else self.activation, updates=None, givens={ self.input: self.data[batch_begin:batch_end] }, name='get hidden activation') # self.get_reconstruction = theano.function( # inputs=[self.input], # outputs=recon_layer.output, # updates=None, # name='get reconstruction') self.get_reconstruction = theano.function( inputs=[index], outputs=recon_layer.output, updates=None, givens={ self.input: self.data[batch_begin:batch_end] }, name='get reconstruction')
def get_cost_update(self, learning_rate=0.1): """Get cost updates Parameters ---------- learning_rate : float learning rate of sgd """ L = T.sum(T.pow(T.sub(self.get_decode(), self.input), 2), axis=1) cost = 0.5 * T.mean(L) d_b = T.grad(cost, self.BIAS) d_net_out = T.grad(cost, self.decode_layer.pooled_out) d_b_decode = T.grad(cost, self.decode_layer.b) d_W_decode = T.grad(cost, self.decode_layer.W) print d_b_decode.type() print d_W_decode.type() #d_b_encode=T.sum(d_net_out, axis=[0,1,2]); #print d_b_encode.type(); #d_W_decode=self.decode_layer.getCP(data_in=self.encode_layer.output, # filters=d_net_out); #print d_W_decode.shape; d_net_in = self.decode_layer.getConvPoolB(data_in=d_net_out, filters=self.decode_layer.B) #T.dot(self.decode_layer.B, d_net_out); #print d_net_in.type(); d_net_in_delta = d_net_in * self.encode_layer.d_activation( self.encode_layer.pooled_out) print d_net_in_delta.type() d_b_encode = T.sum(d_net_in_delta, axis=[0, 1, 2]) d_W_encode = T.dot(d_net_in_delta, self.input.T) print d_W_encode.type() d_W = [d_W_encode, d_W_decode] updates_weights = [(param_i, param_i - learning_rate * d_W_i) for param_i, d_W_i in zip(self.WEIGHTS, d_W)] updates_bias = [(param_i, param_i - learning_rate * d_b_i) for param_i, d_b_i in zip(self.BIAS, d_b)] updates = updates_weights + updates_bias #L_B=T.sum(T.pow(T.sub(self.recon_layer.output_B, self.input),2), axis=1); #cost_B = 0.5*T.mean(L_B); #grad_weights=T.grad(cost, self.WEIGHTS); #updates_weights=[(param_i, param_i-learning_rate*(grad_i+learning_rate*rw_i)) # for param_i, grad_i, rw_i in zip(self.WEIGHTS, grad_weights, self.RW)]; #grad_bias=T.grad(cost, self.BIAS); #updates_bias=[(param_i, param_i-learning_rate*grad_i) # for param_i, grad_i in zip(self.BIAS, grad_bias)]; #updates=updates_weights+updates_bias; return (cost, updates)
activation_k = T.dmatrix('Layer3 outputs') t = T.dmatrix('Actual output') delta_w1 = T.dmatrix('Delta w1') delta_w2 = T.dmatrix('Delta w2') eta = 0.1 #equations h = T.dot(x, w1) activation_h = T.nnet.sigmoid(h) k = T.dot(activation_h, w2) activation_k = T.nnet.softmax(k) cost = (T.sum(T.sub(activation_k, t)**2)) / (2 * X.shape[0]) delta_w1 = T.grad(cost, w1) delta_w2 = T.grad(cost, w2) #w1 = w1 - eta * delta w update_w1 = (w1, w1 - eta * delta_w1) update_w2 = (w2, w2 - eta * delta_w2) updates = [update_w1, update_w2] for i in range (Y.shape[0]): value = Y[i] target[i][value] = value
def MSE_tensor(y, y_pred): return T.mean(T.pow(T.sub(y, y_pred), 2))
def __init__(self, data, image_shape, filter_shape, poolsize, sparse_coeff, activation='sigmoid', tied_weight=False, is_linear=False, do_max_pool=False): rng = np.random.RandomState(None) self.data = data self.batchsize = image_shape[0] self.in_channels = image_shape[1] self.in_height = image_shape[2] self.in_width = image_shape[3] self.flt_channels = filter_shape[0] self.flt_height = filter_shape[2] self.flt_width = filter_shape[3] self.input = T.ftensor4('input') # self.input = input.reshape(image_shape) hidden_layer = ConvolutionLayer(rng, input=self.input, filter_shape=filter_shape, act=activation, border_mode='full', if_pool=do_max_pool) self.hidden_image_shape = (self.batchsize, self.flt_channels, self.in_height + self.flt_height - 1, self.in_width + self.flt_width - 1) self.hidden_pooled_image_shape = ( self.batchsize, self.flt_channels, (self.in_height + self.flt_height - 1) / 2, (self.in_width + self.flt_width - 1) / 2) self.hidden_filter_shape = (self.in_channels, self.flt_channels, self.flt_height, self.flt_width) if sparse_coeff == 0: if do_max_pool: hidden_layer_output = repeat(hidden_layer.output, repeats=2, axis=2) hidden_layer_output = repeat(hidden_layer_output, repeats=2, axis=3) else: hidden_layer_output = hidden_layer.output else: feature_map = hidden_layer.output # first per featuremap, then across featuremap # feature_map_vec = feature_map.reshape((feature_map.shape[0], # feature_map.shape[1], feature_map.shape[2]*feature_map.shape[3])) # feat_sparsity = feature_map_vec.norm(2, axis=2) # feat_sparsity = feat_sparsity.dimshuffle(0, 1, 'x', 'x') # feature_map1 = np.divide(feature_map, feat_sparsity+1e-9) # examp_sparsity = feature_map1.norm(2, axis=1) # examp_sparsity = examp_sparsity.dimshuffle(0, 'x', 1, 2) # feature_map2 = np.divide(feature_map1, examp_sparsity+1e-9) # first across featuremap, then per featuremap examp_sparsity = feature_map.norm(2, axis=1) examp_sparsity = examp_sparsity.dimshuffle(0, 'x', 1, 2) feature_map1 = np.divide(feature_map, examp_sparsity + 1e-9) feature_map1_vec = feature_map1.reshape( (feature_map1.shape[0], feature_map1.shape[1], feature_map1.shape[2] * feature_map1.shape[3])) feat_sparsity = feature_map1_vec.norm(2, axis=2) feat_sparsity = feat_sparsity.dimshuffle(0, 1, 'x', 'x') feature_map2 = np.divide(feature_map1, feat_sparsity + 1e-9) if do_max_pool: hidden_layer_output = repeat(feature_map2, repeats=2, axis=2) hidden_layer_output = repeat(hidden_layer_output, repeats=2, axis=3) else: hidden_layer_output = feature_map2 # recon_layer_input = hidden_layer_output if is_linear: recon_layer = ConvolutionLayer( rng, input=hidden_layer_output, filter_shape=self.hidden_filter_shape, act='linear', border_mode='valid') else: recon_layer = ConvolutionLayer( rng, input=hidden_layer_output, filter_shape=self.hidden_filter_shape, act=activation, border_mode='valid') self.tied_weight = tied_weight if self.tied_weight: # recon_layer.W = hidden_layer.W # recon_layer.W = recon_layer.W.dimshuffle(1,0,2,3) weight = hidden_layer.W.get_value() recon_layer.W.set_value(weight.transpose(1, 0, 2, 3), borrow=True) self.layers = [hidden_layer, recon_layer] self.params = sum([layer.params for layer in self.layers], []) # self.params = hidden_layer.params + recon_layer.params L1_sparsity = hidden_layer_output.norm(1, axis=(2, 3)) # L1_sparsity = T.sum(np.abs(feature_map2), axis=(2, 3)) # sparse_filter = T.mean(L1_sparsity.sum(axis=1), axis=(0)) sparse_filter = T.mean(L1_sparsity, axis=(0, 1)) # sparsity = T.mean(feature_map2, axis=(2,3)) # sparse_filter = T.mean(sparsity, axis=(0, 1)) # L=T.sum(T.pow(T.sub(recon_layer.output, self.input), 2), axis=0) L = T.sum(T.pow(T.sub(recon_layer.output, self.input), 2), axis=(1, 2, 3)) # sum over channel,height, width cost = 0.5 * T.mean(L) + sparse_coeff * sparse_filter grads = T.grad(cost, self.params) # learning_rate = 0.1 # updates = [(param_i, param_i-learning_rate*grad_i) # for param_i, grad_i in zip(self.params, grads)] updates = adadelta_updates(self.params, grads, rho=0.95, eps=1e-6) # self.train = theano.function( # [self.input], # cost, # updates=updates, # name="train cae model") index = T.lscalar('index') batch_begin = index * self.batchsize batch_end = batch_begin + self.batchsize self.train = theano.function( inputs=[index], outputs=cost, updates=updates, givens={self.input: self.data[batch_begin:batch_end]}, name="train cae model") self.activation = downsample.max_pool_2d(input=hidden_layer.output, ds=poolsize, ignore_border=True) # self.get_activation = theano.function( # [self.input], # self.activation, # updates=None, # name='get hidden activation') # num = T.bscalar self.get_activation = theano.function( inputs=[index], # outputs=self.activation, outputs=hidden_layer.output if do_max_pool else self.activation, updates=None, givens={self.input: self.data[batch_begin:batch_end]}, name='get hidden activation') # self.get_reconstruction = theano.function( # inputs=[self.input], # outputs=recon_layer.output, # updates=None, # name='get reconstruction') self.get_reconstruction = theano.function( inputs=[index], outputs=recon_layer.output, updates=None, givens={self.input: self.data[batch_begin:batch_end]}, name='get reconstruction')
def __init__(self, signal_shape, filter_shape, poolsize, activation=None): rng = np.random.RandomState(None) dtensor5 = T.TensorType('float32', (False,)*5) self.inputs = dtensor5(name='inputs') self.image_shape = signal_shape self.batchsize = signal_shape[0] self.in_channels = signal_shape[2] self.in_depth = signal_shape[1] self.in_width = signal_shape[4] self.in_height = signal_shape[3] self.flt_channels = filter_shape[0] self.flt_time = filter_shape[1] self.flt_width = filter_shape[4] self.flt_height = filter_shape[3] self.activation = activation self.hidden_layer=ConvolutionLayer3D(rng, input=self.inputs, signal_shape=signal_shape, filter_shape=filter_shape, act=activation, border_mode='full', if_hidden_pool=False) self.hidden_image_shape = (self.batchsize, self.in_depth, self.flt_channels, self.in_height+self.flt_height-1, self.in_width+self.flt_width-1) self.hidden_pooled_image_shape = (self.batchsize, self.in_depth/2, self.flt_channels, (self.in_height+self.flt_height-1)/2, (self.in_width+self.flt_width-1)/2) self.hidden_filter_shape = (self.in_channels, self.flt_time, self.flt_channels, self.flt_height, self.flt_width) self.recon_layer=ConvolutionLayer3D(rng, input=self.hidden_layer.output, signal_shape=self.hidden_image_shape, filter_shape=self.hidden_filter_shape, act=activation, border_mode='valid') self.layers = [self.hidden_layer, self.recon_layer] self.params = sum([layer.params for layer in self.layers], []) L=T.sum(T.pow(T.sub(self.recon_layer.output, self.inputs), 2), axis=(1,2,3,4)) self.cost = 0.5*T.mean(L) self.grads = T.grad(self.cost, self.params) self.updates = adadelta_updates(self.params, self.grads, rho=0.95, eps=1e-6) self.train = theano.function( [self.inputs], self.cost, updates=self.updates, name = "train cae model" ) self.activation = pools.pool_3d( input=self.hidden_layer.output.dimshuffle(0,2,1,3,4), ds=poolsize, ignore_border=True) self.activation = self.activation.dimshuffle(0,2,1,3,4) self.get_activation = theano.function( [self.inputs], self.activation, updates=None, name='get hidden activation')
def main(): # load the training and validation data sets # labels=int(0.7*image.all_count) X = T.tensor4() # set up theano functions to generate output by feeding data through network output_layer_softmax , output_layer_triplet= lasagne_model() output_train = lasagne.layers.ReshapeLayer(output_layer_triplet,(-1,3,[1])) output_0= lasagne.layers.helper.get_output(lasagne.layers.SliceLayer(output_train,0,1),X) output_1= lasagne.layers.helper.get_output(lasagne.layers.SliceLayer(output_train,1,1),X) output_2= lasagne.layers.helper.get_output(lasagne.layers.SliceLayer(output_train,2,1),X) output= lasagne.layers.helper.get_output(output_layer_softmax,X) # set up the loss that we aim to minimize eps=1e-10 dis_pos=T.sqrt(T.sum(T.square(T.sub(output_0,output_1)),1)+eps) dis_neg=T.sqrt(T.sum(T.square(T.sub(output_0,output_2)),1)+eps) dis=(dis_pos-dis_neg+alpha) # dis=(dis_pos-dis_neg) loss_train = T.mean((dis)*(dis>0)) # loss_train = T.sum(T.nnet.relu(dis)) # loss_train = T.mean(dis) # prediction functions for classifications pred = T.argmax(output, axis=1) # get parameters from network and set up sgd with nesterov momentum to update parameters params = lasagne.layers.get_all_params(output_layer_triplet,trainable=True) #params = params[-4:]#TODO: !!!!!!!!!!!!!!!!!!! grad=T.grad(loss_train,params) # updates = nesterov_momentum(loss_train, params, learning_rate=0.03, momentum=0.9) updates =lasagne.updates.rmsprop(loss_train, params, learning_rate=0.0002) # updates =lasagne.updates.get_or_compute_grads(loss_train, params) # set up training and prediction functions train = theano.function(inputs=[X], outputs=[loss_train,pred,dis,dis_pos,dis_neg], updates=updates, allow_input_downcast=True) if load_params: pre_params=pickle.load(gzip.open(load_params)) lasagne.layers.set_all_param_values(output_layer_softmax,pre_params) print 'load Success.' for i in range(4500): aver_loss=0 for idx_batch in range (num_batches): train_X=np.zeros([BATCHSIZE,3,PIXELS,PIXELS]) for iii in range(BATCHSIZE): label=random.choice(train_files) num_slots=random.randint(1,5) im_aim_list=random.sample(np.load(train_load_path+label),num_slots) tmp_sum=0 for iidx,shot in enumerate(im_aim_list): tmp_sum+=shot im_aim=tmp_sum/float(num_slots) # im_aim=tmp_sum im_aim_list=random.sample(np.load(train_load_path+label),num_slots) tmp_sum=0 for iidx,shot in enumerate(im_aim_list): tmp_sum+=shot im_pos=tmp_sum/float(num_slots) # im_pos=tmp_sum while True: label_neg=random.choice(train_files) if label!=label_neg: im_neg_list=random.sample(np.load(train_load_path+label_neg),num_slots) tmp_sum=0 for iidx,shot in enumerate(im_neg_list): tmp_sum+=shot im_neg=tmp_sum/float(num_slots) # im_neg=tmp_sum break train_X[iii,0]=im_aim train_X[iii,1]=im_pos train_X[iii,2]=im_neg train_X=train_X.reshape(BATCHSIZE*3,1,PIXELS,PIXELS) xx_batch = np.float32(train_X) # print xx_batch.shape # yy_batch = np.float32(train_y[idx_batch * BATCHSIZE:(idx_batch + 1) * BATCHSIZE]) train_loss ,pred ,dis ,dis1,dis2= train(xx_batch) aver_loss+=train_loss # count=np.count_nonzero(np.int32(pred ==np.argmax(yy_batch,axis=1))) if idx_batch%3==0: print i,idx_batch,'| Tloss:', train_loss,pred,'\ndis_pos:{}\ndis_neg:{}\ndis:{}'.format(dis1[:20],dis2[:20],dis[:20]) # print pred # print np.argmax(yy_batch,axis=1) print "time:",time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) # save weights if i%1==0: aver_loss=aver_loss/num_batches all_params = helper.get_all_param_values(output_layer_softmax) f = gzip.open('speech_params/speech_{}_batchnorm_12345aver_{}_triplet_{}.pklz'.format(aver_loss,alpha,time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())), 'wb') pickle.dump(all_params, f) f.close()
def _common(self, X, Xs=None): X, Xs = self._slice(X, Xs) Xc = tt.sub(X, self.c) return X, Xc, Xs
def apply(self, inputs, time_step, states, cells, time_scale, time_offset, mask=None): """Apply the Long Short Term Memory transition. Parameters ---------- states : :class:`~tensor.TensorVariable` The 2 dimensional matrix of current states in the shape (batch_size, features). Required for `one_step` usage. cells : :class:`~tensor.TensorVariable` The 2 dimensional matrix of current cells in the shape (batch_size, features). Required for `one_step` usage. inputs : :class:`~tensor.TensorVariable` The 2 dimensional matrix of inputs in the shape (batch_size, features * 4). The `inputs` needs to be four times the dimension of the LSTM brick to insure each four gates receive different transformations of the input. See [Grav13]_ equations 7 to 10 for more details. The `inputs` are then split in this order: Input gates, forget gates, cells and output gates. mask : :class:`~tensor.TensorVariable` A 1D binary array in the shape (batch,) which is 1 if there is data available, 0 if not. Assumed to be 1-s only if not given. .. [Grav13] Graves, Alex, *Generating sequences with recurrent* *neural networks*, arXiv preprint arXiv:1308.0850 (2013). Returns ------- states : :class:`~tensor.TensorVariable` Next states of the network. cells : :class:`~tensor.TensorVariable` Next cell activations of the network. """ def activate_lstm(self, inputs, states, cells, mask=None): def slice_last(x, no): return x[:, no*self.dim: (no+1)*self.dim] activation = tensor.dot(states, self.W_state) + inputs in_gate = self.gate_activation.apply( slice_last(activation, 0) + cells * self.W_cell_to_in) forget_gate = self.gate_activation.apply( slice_last(activation, 1) + cells * self.W_cell_to_forget) next_cells = ( forget_gate * cells + in_gate * self.activation.apply(slice_last(activation, 2))) out_gate = self.gate_activation.apply( slice_last(activation, 3) + next_cells * self.W_cell_to_out) next_states = out_gate * self.activation.apply(next_cells) if mask: next_states = (mask[:, None] * next_states + (1 - mask[:, None]) * states) next_cells = (mask[:, None] * next_cells + (1 - mask[:, None]) * cells) return next_states, next_cells def do_nothing(states, cells): return states, cells result = ifelse(tensor.eq(tensor.mod(tensor.sub(time_step,time_offset),time_scale),0), activate_lstm(self, inputs, states, cells, mask), do_nothing(states, cells)) return result
cv_size = T.fscalar("cv_size") drop_input = lambda rand: T.reshape( bino_input[rand:rand + (batch_size * dim_visible)], (batch_size, dim_visible)) input_drop = drop_input(rdm.random_integers(low=0, high=sample_range_dropout)) h = T.nnet.sigmoid(T.add(T.dot(v, w_vh), w_h)) u_w_plus = function([], updates=[(wu_vh, g(T.add(wu_vh, T.dot(v.T, h)))), (wu_v, g(T.add(T.sum(v[:], axis=0), wu_v))), (wu_h, g(T.add(T.sum(h[:], axis=0), wu_h)))]) u_w_minus = function([], updates=[(wu_vh, g(T.sub(wu_vh, T.dot(v.T, h)))), (wu_v, g(T.sub(T.sum(v[:], axis=0), wu_v))), (wu_h, g(T.sub(T.sum(h[:], axis=0), wu_h)))]) sample = lambda rdm: T.reshape( uniform_sample[rdm:rdm + (dim_hidden * batch_size)], (batch_size, dim_hidden)) gibbs = T.cast(T.gt(h, sample(rdm.random_integers(low=0, high=sample_range))), 'float32') update_v = function( [], outputs=[g(T.nnet.sigmoid(T.add(T.dot(gibbs, w_vh.T), w_v)))]) update_w = function([alpha], updates=[(w_vh, g(T.add(w_vh, T.mul(alpha, wu_vh)))),
def _def_cost_acc(self): l2_norm_squared = sum([(p**2).sum() for p in self.to_regularize]) self.cost = T.sum((T.sub(self.outputs, self.y))** 2).mean() + self.lmbd * l2_norm_squared diff = abs(T.argmax(self.outputs, axis=1) - T.argmax(self.y, axis=1)) self.acc = T.sub(1, 1. * T.nonzero(diff)[0].shape[0] / self.y.shape[0])
def sub(x, y): z = T.sub(x, y) if isinstance(get_shape(x), (tuple, list)): output_shape = auto_infer_shape(T.sub, x, y) add_shape(z, output_shape) return z
def __init__(self, nnet, dataset=None, learning_rate=0.01, beta=0.0, sparsity=0.01, weight_decay=0.0, momentum=0.5): if len(dataset) < 2: print "Error dataset must contain tuple (train_data,train_target)" train_data, train_target = dataset target = T.matrix('y') square_error = T.mean(0.5*T.sum(T.pow(target - nnet.output, 2), axis=1)) avg_activate = T.mean(nnet.hiddenLayer[0].output, axis=0) sparsity_penalty = beta*T.sum(T.mul(T.log(sparsity/avg_activate), sparsity) + T.mul(T.log((1-sparsity)/T.sub(1,avg_activate)), (1-sparsity))) regularization = 0.5*weight_decay*(T.sum(T.pow(nnet.params[0],2)) + T.sum(T.pow(nnet.params[2],2))) cost = square_error + sparsity_penalty + regularization gparams = [T.grad(cost, param) for param in nnet.params] w_deltas = [] for param in nnet.params: w_deltas.append(theano.shared(value=param.get_value()*0, borrow=True)) new_params = [param - (learning_rate*gparam + momentum*w_delta) for param, gparam, w_delta in zip(nnet.params, gparams, w_deltas)] updates = [(param, new_param) for param, new_param in zip(nnet.params, new_params)] updates += [(w_delta, learning_rate*gparam + momentum*w_delta) for w_delta, gparam in zip(w_deltas, gparams)] index = T.lscalar() self.train = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ input: train_data[index * batch_size: (index + 1) * batch_size], target: train_target[index * batch_size: (index + 1) * batch_size] } ) self.cost = theano.function( inputs=[], outputs=cost, givens={ input: train_data, target: train_target } )
f1([[4,3], [1, 3, 3, 2], [1, 2, 2]], 4) f2 = theano.function([x, max_x], o ) f2([[4,3], [1, 3, 3, 2], [1, 2, 2]], 4) x = T.imatrix('x') x_vec = T.ivector('x_vec') fe = theano.function([x_vec], E[:,x_vec]) shape_sub = shared(0) a = T.sub(T.shape(x[0]),T.shape(x[1])) f = func([x], a, updates={(shape_sub, a[0])}) f([[[4,3], [3,7]], 2]) f2 = T.zeros(shape_sub) T.zeros(a[0]).eval({x:[[4,3,1, 6, 6, 7, 8], [5, 7,6], [4, 7, 1, 1]]}) f([[3], [3, 1,3]])[-1] f([[3, 1, 4]]) x1 = T.ivector('x1') x2 = T.ivector('x2') shape_sub = T.sub(T.shape(x1),T.shape(x2)) vec = T.ivector('x1')
def crf_par_01_gpu(aryDpth, vecEmpX, strFunc='power', varNumIt=1000, varNumX=1000, varXmin=0.0, varXmax=1.0, varNumOp=10000): """ Parallelised bootstrapping of contrast response function, level 1. Parameters ---------- aryDpth : np.array Array with empirical response data, of the form aryDpth[idxRoi, idxSub, idxCon, idxDpt]. vecEmpX : np.array Empirical x-values at which model will be fitted (e.g. stimulus contrast levels at which stimuli were presented), of the form vecEmpX[idxCon]. strFunc : str Which contrast response function to fit. 'power' for power function, or 'hyper' for hyperbolic ratio function. varNumIt : int Number of bootstrapping iterations (i.e. how many times to sample). varPar : int Number of process to run in parallel. varNumX : int Number of x-values for which to solve the function when calculating model fit. varXmin : float Minimum x-value for which function will be fitted. varXmax : float Maximum x-value for which function will be fitted. varNumOp: int Number of optimisation steps for function fitting. Returns ------- aryMdlY : np.array Fitted y-values (predicted response based on CRF model), of the form aryMdlY[idxRoi, idxIteration, idxDpt, varNumX], where varNumX is the number of data points at which the fitted function is evaluated (e.g. 1000). aryHlfMax : np.array Predicted response at 50 percent contrast based on CRF model. Array of the form aryHlfMax[idxRoi, idxIteration, idxDpt]. arySemi : np.array Semisaturation contrast (predicted contrast needed to elicit 50 percent of the response amplitude that would be expected with a 100 percent contrast stimulus). Array of the form arySemi[idxRoi, idxIteration, idxDpt]. aryRes : np.array Residual variance at empirical contrast levels. Array of the form aryRes[idxRoi, idxIteration, idxCondition, idxDpt]. Notes ----- NOTE: HYPERBOLIC RATIO NOT YET IMPLEMENTED FOR THEANO. This function parallelises the contrast response function fitting by calling a second-level function using the multiprocessing module. Function of the depth sampling pipeline. """ # ------------------------------------------------------------------------ # *** Prepare bootstrapping print('---Preparing bootstrapping') # Check time: varTme01 = time.time() # Number of ROIs: varNumIn = aryDpth.shape[0] # Number of subjects: varNumSubs = aryDpth.shape[1] # Number of conditions: varNumCon = aryDpth.shape[2] # Number of depth levels: varNumDpth = aryDpth.shape[3] # Random array with subject indicies for bootstrapping of the form # aryRnd[varNumIt, varNumSmp]. Each row includes the indicies of the # subjects to the sampled on that iteration. aryRnd = np.random.randint(0, high=varNumSubs, size=(varNumIt, varNumSubs)) ## Initialise array for random samples: #aryDpthRnd = np.zeros((varNumIt, varNumIn, varNumSubs, varNumCon, varNumDpth)) # ## Fill array with resampled samples: #for idxIt in range(varNumIt): # aryDpthRnd[idxIt, :, :, :, :] = aryDpth[:, aryRnd[idxIt, :], :, :] # ## Take mean within random samples: #aryDpthRnd = np.mean(aryDpthRnd, axis=2) # ## Total number of CRF models to fit: #varNumTtl = (varNumIn * varNumDpth * varNumIt) # ## Reshape: #aryDpthRnd = np.reshape(aryDpthRnd, (varNumTtl, varNumCon)) # Total number of CRF models to fit: varNumTtl = (varNumIn * varNumDpth * varNumIt) # Array for resampled samples: aryDpthRnd = np.zeros((varNumTtl, varNumSubs, varNumCon)) # Fill array with resampled samples: varCnt = 0 for idxIn in range(varNumIn): for idxIt in range(varNumIt): for idxDpth in range(varNumDpth): aryDpthRnd[varCnt, :, :] = aryDpth[idxIn, aryRnd[idxIt, :], :, idxDpth] varCnt += 1 # Take mean within random samples: aryDpthRnd = np.mean(aryDpthRnd, axis=1) # Check time: varTme02 = time.time() # Report time: varTme03 = np.around((varTme02 - varTme01), decimals=3) print(('---Elapsed time: ' + str(varTme03) + ' s for ' + str(varNumTtl) + ' iterations.')) # ------------------------------------------------------------------------ # *** Theano CRF fitting print('---Theano CRF fitting') # Check time: varTme01 = time.time() # Boradcast array with X data, and change data type to float 32: aryEmpX = np.broadcast_to(vecEmpX, (varNumTtl, vecEmpX.shape[0])) aryEmpX = aryEmpX.astype(th.config.floatX) aryDpthRnd = aryDpthRnd.astype(th.config.floatX) # The CRF: # varR = varA * np.power(varC, varB) def model(aryC, vecA, vecB): return T.mul(T.pow(aryC, vecB[:, None]), vecA[:, None]) # Initialise theano arrays for emprical X and Y data: TaryEmpX = T.matrix() TaryDpthRnd = T.matrix() # Initialise model parameters: vecA = np.ones((varNumTtl)) vecB = np.ones((varNumTtl)) vecA = np.multiply(vecA, 0.5) vecB = np.multiply(vecB, 0.5) vecA = vecA.astype(dtype=th.config.floatX) vecB = vecB.astype(dtype=th.config.floatX) # Create shared theano object for model parameters: TvecA = th.shared(vecA) TvecB = th.shared(vecB) # Model prediction for theano: TobjMdlPre = model(TaryEmpX, TvecA, TvecB) # Learning rate: varLrnRt = np.float32(0.0001) # Cost function: # cost = T.mean(T.sqr(y - Y)) TobjCst = T.sum(T.sqr(T.sub(TobjMdlPre, TaryDpthRnd))) # Gradients for cost function TobGrd01 = T.grad(cost=TobjCst, wrt=TvecA) TobGrd02 = T.grad(cost=TobjCst, wrt=TvecB) # How to update the cost function: lstUp = [(TvecA, (TvecA - TobGrd01 * varLrnRt)), (TvecB, (TvecB - TobGrd02 * varLrnRt))] # Define the theano function that will be optimised: TcrfPwOp = th.function(inputs=[TaryEmpX, TaryDpthRnd], outputs=TobjCst, updates=lstUp) # allow_input_downcast=True) # Do not check input data type: # train.trust_input = True ## Array for theano model parameters, of the form ## aryMdlParT[idxTotalIterations, freeModelParameters]: aryMdlParT = np.zeros((varNumTtl, 2)).astype(th.config.floatX) # Optimise function: for idxThn in range(varNumOp): TcrfPwOp(aryEmpX, aryDpthRnd) # Save model parameter A: aryMdlParT[:, 0] = TvecA.get_value() # Save model parameter B: aryMdlParT[:, 1] = TvecB.get_value() # Check time: varTme02 = time.time() # Report time: varTme03 = np.around((varTme02 - varTme01), decimals=3) print(('---Elapsed time: ' + str(varTme03) + ' s for ' + str(varNumTtl) + ' iterations.')) # ------------------------------------------------------------------------ # *** Apply CRF print('---Theano CRF evaluation') # Check time: varTme01 = time.time() # Vector for which the function will be fitted: vecMdlX = np.linspace(varXmin, varXmax, num=varNumX, endpoint=True) # Boradcast array with X data, and change data type to float 32: aryMdlX = np.broadcast_to(vecMdlX, (varNumTtl, varNumX)) aryMdlX = aryMdlX.astype(th.config.floatX) # Change data type to float 32: aryMdlParT = aryMdlParT.astype(th.config.floatX) # Initialise theano arrays for mmodel X data: TaryMdlX = T.matrix() # Create shared theano object for fitted model parameters: TvecMdlParA = th.shared(aryMdlParT[:, 0]) TvecMdlParB = th.shared(aryMdlParT[:, 1]) # Model to evaluate, like before (i.e. similar to the model that was # optimised, but this time with the fitted parameter values as input): TobjMdlEval = model(TaryMdlX, TvecMdlParA, TvecMdlParB) # Function definition for evaluation: TcrfPwEval = th.function([TaryMdlX], TobjMdlEval) # Evaluate function (get predicted y values of CRF for all resampling # iterations). Returns arrays for y-values of fitted function (for each # iteration & depth level), of the form aryMdlY[varNumTtl, varNumX] aryMdlY = TcrfPwEval(aryMdlX) # Check time: varTme02 = time.time() # Report time: varTme03 = np.around((varTme02 - varTme01), decimals=3) print(('---Elapsed time: ' + str(varTme03) + ' s for ' + str(varNumTtl) + ' iterations.')) # ------------------------------------------------------------------------ # *** Calculate predicted response at 50% contrast # Vector for which the function will be fitted (contrast = 0.5): vecMdl50 = np.ones((varNumTtl, 1)) vecMdl50 = np.multiply(vecMdl50, 0.5) vecMdl50 = vecMdl50.astype(dtype=th.config.floatX) # Evaluate function. Returns array for responses at half maximum # contrast, of the form aryHlfMax[varNumTtl, 1] aryHlfMax = TcrfPwEval(vecMdl50) # ------------------------------------------------------------------------ # *** Calculate predicted response at empirical contrast levels # We calculate the predicted response at the actually tested empirical # contrast levels in order to subsequently calculate the residual variance # of the model fit at those contrast levels. # Evaluate function (get predicted y values of CRF for empirically # measured contrast values): aryMdlEmpX = TcrfPwEval(aryEmpX) # ------------------------------------------------------------------------ # *** Calculate semisaturation contrast print('---Calculating semisaturation contrast') # Check time: varTme01 = time.time() # We first need to calculate the response at 100% contrast. # Vector for which the function will be fitted (contrast = 1.0): vecMdl100 = np.ones((varNumTtl, 1)) vecMdl100 = vecMdl100.astype(dtype=th.config.floatX) # Evaluate function. Returns array for responses at half maximum # contrast, of the form aryResp100[varNumTtl, 1] aryResp100 = TcrfPwEval(vecMdl100) # Half maximum response: aryResp50 = np.multiply(aryResp100, 0.5) aryResp50 = aryResp50.astype(dtype=th.config.floatX) # Initialise theano arrays for half maximum response: TaryResp50 = T.matrix() # Initialise vector for Semisaturation constant: arySemi = np.ones((varNumTtl, 1)) arySemi = np.multiply(arySemi, 0.2) arySemi = arySemi.astype(dtype=th.config.floatX) # Create shared theano object for model parameters: TarySemi = th.shared(arySemi) # Model for finding semisaturation contrast: TobjMdlSemi = model(TarySemi, TvecMdlParA, TvecMdlParB) # Cost function for finding semisaturation contrast: TobjCst = T.sum(T.sqr(T.sub(TobjMdlSemi[:], TaryResp50[:]))) # Gradient for cost function TobGrdSemi = T.grad(cost=TobjCst, wrt=TarySemi) # Learning rate: varLrnRt = np.float32(0.0001) # How to update the cost function: lstUp = [(TarySemi, (TarySemi - TobGrdSemi * varLrnRt))] # Define the theano function that will be optimised: TcrfPwSemi = th.function(inputs=[TaryResp50], outputs=TobjCst, updates=lstUp) # allow_input_downcast=True) # Optimise function: for idxThn in range(varNumOp): TcrfPwSemi(aryResp50) # Save semisaturation contrast: arySemi = TarySemi.get_value() # Check time: varTme02 = time.time() # Report time: varTme03 = np.around((varTme02 - varTme01), decimals=3) print(('---Elapsed time: ' + str(varTme03) + ' s for ' + str(varNumTtl) + ' iterations.')) # ------------------------------------------------------------------------ # *** Reshape # Reshape array with fitted y-values, from # aryMdlY[varNumTtl, varNumX] # to # aryMdlY[idxRoi, idxIteration, idxDpt, varNumX] aryMdlYRs = np.zeros((varNumIn, varNumIt, varNumDpth, varNumX), dtype=th.config.floatX) varCnt = 0 for idxIn in range(varNumIn): for idxIt in range(varNumIt): for idxDpth in range(varNumDpth): aryMdlYRs[idxIn, idxIt, idxDpth, :] = aryMdlY[varCnt, :] varCnt += 1 del (aryMdlY) aryMdlY = np.copy(aryMdlYRs) del (aryMdlYRs) # Reshape array with predicted response at 50 percent contrast, from # aryHlfMax[varNumTtl, 1] # to # aryHlfMax[idxRoi, idxIteration, idxDpt] aryHlfMaxRs = np.zeros((varNumIn, varNumIt, varNumDpth), dtype=th.config.floatX) varCnt = 0 for idxIn in range(varNumIn): for idxIt in range(varNumIt): for idxDpth in range(varNumDpth): aryHlfMaxRs[idxIn, idxIt, idxDpth] = aryHlfMax[varCnt, :] varCnt += 1 del (aryHlfMax) aryHlfMax = np.copy(aryHlfMaxRs) del (aryHlfMaxRs) # Reshape array with predicted response at 50 percent contrast, from # arySemi[varNumTtl, 1] # to # arySemi[idxRoi, idxIteration, idxDpt] arySemiRs = np.zeros((varNumIn, varNumIt, varNumDpth), dtype=th.config.floatX) varCnt = 0 for idxIn in range(varNumIn): for idxIt in range(varNumIt): for idxDpth in range(varNumDpth): arySemiRs[idxIn, idxIt, idxDpth] = arySemi[varCnt, :] varCnt += 1 del (arySemi) arySemi = np.copy(arySemiRs) del (arySemiRs) # Reshape array with predicted response at empirical constrast values, from # aryMdlEmpX[varNumTtl, varNumCon] # to # aryMdlEmpX[idxRoi, idxIteration, idxCondition, idxDpt] aryMdlEmpXRs = np.zeros((varNumIn, varNumIt, varNumCon, varNumDpth), dtype=th.config.floatX) varCnt = 0 for idxIn in range(varNumIn): for idxIt in range(varNumIt): for idxDpth in range(varNumDpth): aryMdlEmpXRs[idxIn, idxIt, :, idxDpth] = aryMdlEmpX[varCnt, :] varCnt += 1 del (aryMdlEmpX) aryMdlEmpX = np.copy(aryMdlEmpXRs) del (aryMdlEmpXRs) # ------------------------------------------------------------------------ # *** Calculate residual variance # Mean across subjects of (full) empirical dataset: aryEmpYMne = np.mean(aryDpth, axis=1) aryEmpYMne = aryEmpYMne.astype(dtype=th.config.floatX) # Residual variance at empirical contrast levels. Array of the form # aryRes[idxRoi, idxIteration, idxCondition, idxDpt]. aryRes = np.absolute(np.subtract(aryMdlEmpX, aryEmpYMne[:, None, :, :])) # ------------------------------------------------------------------------ # *** Return return aryMdlY, aryHlfMax, arySemi, aryRes
Wlk = shared(np.random.rand(numOutputNeurons, numHiddenNeurons2)*0.01) # input values X = T.dmatrix('X') # output values t = T.dmatrix('t') # output of first hidden layer Aji = T.nnet.sigmoid(T.dot(Wji, X.T)) # output of second hidden layer (prediction) # Akj = T.nnet.softmax(T.dot(Wo1, Aji)) Akj = T.dot(Wo1, Aji) # error function of first pre training # E = T.mean(T.nnet.categorical_crossentropy(Akj.T, t)) E = T.sum(T.sub(Akj.T, t)**2)/(2*trainX.shape[0]) # gradient of error with respect to weights of first hidden layer gradji = T.grad(E, Wji) # gradient of error with respect to weights of output layer in pre training when 1 hidden layer present grado1 = T.grad(E, Wo1) updates = [(Wji, Wji-pre_eta*gradji), (Wo1, Wo1-pre_eta*grado1)] # pre training function pre_training_first_stack = function(inputs=[X, t], outputs=[E], updates=updates) # output of first hidden layer # Aji = T.nnet.sigmoid(T.dot(Wji, X.T)) Aji = T.tanh(T.dot(Wji, X.T))
def R2loss(y_true, y_pred): y_pred = y_pred.flatten() y_true = y_true.flatten() tot = T.sum(T.sqr(T.sub(y_true, T.mean(y_true)))) res = T.sum(T.sqr(T.sub(y_true, y_pred))) return T.true_div(res, tot)
def l2_norm(self, y): return T.mean(T.sub(self.predict_y[T.arange(y.shape[0]), y], y) ** 2)
def theanoMatMatSub(In1, In2): var1 = T.dmatrix('var1') var2 = T.dmatrix('var2') var3 = T.sub(var1, var2) SubMat = function([var1, var2], var3) return SubMat(In1, In2)
def quadratic_loss(self, y): return T.mean(T.sqrt(T.sum(T.sqr(T.sub(self.p_y_given_x, y)), axis=1)))
def theanoVecVecSub(In1, In2): var1 = T.dvector('var1') var2 = T.dvector('var2') var3 = T.sub(var1, var2) DivVec = function([var1, var2], var3) return DivVec(In1, In2)
# -*- coding: utf-8 -*- """ Created on Tue Jan 08 23:10:24 2013 @author: Nikolay """ import theano import theano.tensor as T a, b, c = T.vector(), T.vector(), T.vector() W = T.matrix() x = T.dot(a, W) + b c = T.sqr(T.sub(x, a)) #c = T.sqrt(T.sum(T.sqr(T.sub(x, a)))) # -T.mean(T.sqrt(T.sum(T.sqr(T.sub(self.p_y_given_x, y))))) s = T.sum(W, axis=1) calc = theano.function( inputs=[a,b,W], outputs=[c]) sss = theano.function( inputs=[W], outputs=[s]) #print calc((1, 2), (5, 6), ((3,4), (-7,8))) print sss(((1, 2, 3), (4, 5, 6), (7, 8, 9)))
def get_output_for(self, grids, **kwargs): height = width = depth = self.grid_side # np.indices() returns 3 train_grids exactly as big as the original one. # The first grid contains the X coordinate of each point at the location of the point. # The second grid contains the Y coordinate of each point at the location of the point. # The third grid contains the Z coordinate of each point at the location of the point. indices_grids = T.as_tensor_variable(np.indices((width, height, depth), dtype=floatX), name="grid_indices") # Translate: # the translation vector will be broad-casted: # t_x will be added to all values in the first indices grid # t_y will be added to all values in the second indices grid # t_z will be added to all values in the third indices grid # resulting in a translation in the direction of translation_vector indices_grids = T.add(indices_grids, self._translation_vector()) # Rotate: # the origin is just the center point in the grid origin = T.as_tensor_variable(np.array( (width // 2, height // 2, depth // 2), dtype=floatX).reshape( (3, 1, 1, 1)), name='origin') # We first center all indices, just as in the translation above indices_grids = T.sub(indices_grids, origin) # T.tensordot is a generalized version of a dot product. # The axes parameter is of length 2, and it gives the axis for each of the two tensors # passed, over which the summation will occur. Of course, those two axis need to be of the # same dimension. # Here we have a (3 x 3) matrix <dot product> (3, width, height, depth) grid, and the # summation happens over the first axis (index 0). The result is of size # (3 x width x height x depth) and contains again 3 train_grids of this time # **rotated** indices for each dimension X, Y, Z respectively. indices_grids = T.tensordot(self._rotation_matrix(), indices_grids, axes=(0, 0)) # Decenter indices_grids = T.add(indices_grids, origin) # Since indices_grids was transformed, we now might have indices at certain locations # that are out of the range of the original grid. We this need to clip them to valid values. # For the first grid: between 0 and width - 1 # For the second grid: between 0 and height - 1 # For the third grid: between 0 and depth - 1 # Note that now te index train_grids might contain real numbers (not only integers). x_indices = T.clip(indices_grids[0], 0, width - 1 - .001) y_indices = T.clip(indices_grids[1], 0, height - 1 - .001) z_indices = T.clip(indices_grids[2], 0, depth - 1 - .001) if self.interpolation == "nearest": # Here we just need to round the indices for each spatial dimension to the closest # integer, and than index the original input grid with the 3 indices train_grids # (numpy style indexing with arrays) to obtain the final result. Note that here, # as usual, the multi-dim array that you index with has the # same spatial dimensionality as the multi-dim array being index. # We intentionally flatten everything before indexing, so that Theano can use # ArraySubtensor1 instead of ArraySubtensor, because only the former can be run # on the GPU. # https://groups.google.com/forum/#!topic/theano-users/XkPJP6on50Y flat_grids, flat_indices = grids.reshape( (grids.shape[0], grids.shape[1], -1)), \ width * height * T.iround( x_indices).flatten() + \ height * T.iround(y_indices).flatten() + \ T.iround(z_indices).flatten() output = flat_grids[:, :, flat_indices] output = output.reshape(grids.shape) else: flat_grids = grids.reshape((grids.shape[0], grids.shape[1], -1)) # For linear interpolation, we use the transformed indices x_indices, y_indices and # z_indices to linearly calculate the desired values at each of the original indices # in each dimension. # Again, everything is flattened so that Theano can put it on the GPU, just as in # the other part of this if block. # https://groups.google.com/forum/#!topic/theano-users/XkPJP6on50Y top = T.cast(y_indices, 'int32').flatten() left = T.cast(x_indices, 'int32').flatten() forward = T.cast(z_indices, 'int32').flatten() x_indices = x_indices.flatten() y_indices = y_indices.flatten() z_indices = z_indices.flatten() # this computes the amount of shift into each direction from the original position fraction_y = T.cast(y_indices - top, theano.config.floatX).flatten() fraction_x = T.cast(x_indices - left, theano.config.floatX).flatten() fraction_z = T.cast(z_indices - forward, theano.config.floatX).flatten() # then the new value is the linear combination based on the shifts in all # of the 8 possible directions in 3D output = flat_grids[:, :, self.grid_side ** 2 * top + self.grid_side * left + forward] \ * (1 - fraction_y) * (1 - fraction_x) * (1 - fraction_z) + \ flat_grids[:, :, self.grid_side ** 2 * top + self.grid_side * left + (forward + 1)] \ * (1 - fraction_y) * (1 - fraction_x) * fraction_z + \ flat_grids[:, :, self.grid_side ** 2 * top + self.grid_side * (left + 1) + forward] \ * (1 - fraction_y) * fraction_x * (1 - fraction_z) + \ flat_grids[:, :, self.grid_side ** 2 * top + self.grid_side * (left + 1) + (forward + 1)] \ * (1 - fraction_y) * fraction_x * fraction_z + \ flat_grids[:, :, self.grid_side ** 2 * (top + 1) + self.grid_side * left + forward] \ * fraction_y * (1 - fraction_x) * (1 - fraction_z) + \ flat_grids[:, :, self.grid_side ** 2 * (top + 1) + self.grid_side * left + (forward + 1)] \ * fraction_y * (1 - fraction_x) * fraction_z + \ flat_grids[:, :, self.grid_side ** 2 * (top + 1) + self.grid_side * (left + 1) + forward] \ * fraction_y * fraction_x * (1 - fraction_z) + \ flat_grids[:, :, self.grid_side ** 2 * (top + 1) + self.grid_side * (left + 1) + (forward + 1)] \ * fraction_y * fraction_x * fraction_z output = output.reshape(grids.shape) return output
def mse(self, y): return T.mean(T.sqr(T.sub(self.output, y)))
def RMSE_tensor(y, y_pred): return 48 * T.pow(T.mean(T.pow(T.sub(y, y_pred), 2)), 0.5)