def compile(self, optimizer, loss, class_mode="categorical", theano_mode=None): self.optimizer = optimizers.get(optimizer) self.loss = objectives.get(loss) weighted_loss = weighted_objective(objectives.get(loss)) # input of model self.X_train = self.get_input(train=True) self.X_test = self.get_input(train=False) self.y_train = self.get_output(train=True) self.y_test = self.get_output(train=False) # target of model self.y = T.zeros_like(self.y_train) self.weights = T.ones_like(self.y_train) train_loss = weighted_loss(self.y, self.y_train, self.weights) test_loss = weighted_loss(self.y, self.y_test, self.weights) train_loss.name = 'train_loss' test_loss.name = 'test_loss' self.y.name = 'y' if class_mode == "categorical": train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1))) test_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test, axis=-1))) elif class_mode == "binary": train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train))) test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test))) else: raise Exception("Invalid class mode:" + str(class_mode)) self.class_mode = class_mode self.theano_mode = theano_mode for r in self.regularizers: train_loss = r(train_loss) updates = self.optimizer.get_updates(self.params, self.constraints, train_loss) if type(self.X_train) == list: train_ins = self.X_train + [self.y, self.weights] test_ins = self.X_test + [self.y, self.weights] predict_ins = self.X_test else: train_ins = [self.X_train, self.y, self.weights] test_ins = [self.X_test, self.y, self.weights] predict_ins = [self.X_test] self._train = theano.function(train_ins, train_loss, updates=updates, allow_input_downcast=True, mode=theano_mode) self._train_with_acc = theano.function(train_ins, [train_loss, train_accuracy], updates=updates, allow_input_downcast=True, mode=theano_mode) self._predict = theano.function(predict_ins, self.y_test, allow_input_downcast=True, mode=theano_mode) self._test = theano.function(test_ins, test_loss, allow_input_downcast=True, mode=theano_mode) self._test_with_acc = theano.function(test_ins, [test_loss, test_accuracy], allow_input_downcast=True, mode=theano_mode)
def build(self,output_type): #### set up parameter self.params+=[self.W_hy, self.b_hy] for param in self.params: self.updates[param] = theano.shared( value = np.zeros( param.get_value( borrow = True).shape, dtype = theano.config.floatX), name = 'updates') ### set up regularizer self.L1 += T.sum(abs(self.W_hy)) self.L2_sqr += T.sum(self.W_hy**2) ### fianl prediction formular self.y_pred = T.dot(self.get_output(), self.W_hy) + self.b_hy self.output_type = output_type if self.output_type == 'real': self.y = T.matrix(name = 'y', dtype = theano.config.floatX) self.loss = lambda y: Loss.mse(self.y_pred,y) # y is input and self.mse(y) is output self.predict = theano.function(inputs = [self.x, ], outputs = self.y_pred, mode = mode) elif self.output_type == 'binary': self.y = T.matrix(name = 'y', dtype = 'int32') self.p_y_given_x = T.nnet.sigmoid(self.y_pred) self.y_out = T.round(self.p_y_given_x) # round to {0,1} self.loss = lambda y: Loss.nll_binary(self.p_y_given_x,y) self.predict_proba = theano.function(inputs = [self.x, ], outputs = self.p_y_given_x, mode = mode) self.predict = theano.function(inputs = [self.x, ], outputs = T.round(self.p_y_given_x), mode = mode) elif self.output_type == 'softmax': self.y = T.vector(name = 'y', dtype = 'int32') self.p_y_given_x = T.nnet.softmax(self.y_pred) self.y_out = T.argmax(self.p_y_given_x, axis = -1) self.loss = lambda y: Loss.nll_multiclass(self.p_y_given_x,y) self.predict_proba = theano.function(inputs = [self.x, ], outputs = self.p_y_given_x, mode = mode) self.predict = theano.function(inputs = [self.x, ], outputs = self.y_out, # y-out is calculated by applying argmax mode = mode) else: raise NotImplementedError
def my_activation(input): d = 2 input = input * T.power(10, d) input = T.round(input) x = input / T.power(10, d) abs_x = abs(x) ret = x / (1. + abs_x) ret = T.round(ret * T.power(10, d)) / T.power(10, d) return ret
def compile(self, optimizer, loss, class_mode="categorical", theano_mode=None): self.optimizer = optimizers.get(optimizer) self.loss = objectives.get(loss) # input of model self.X_train = self.get_input(train=True) self.X_test = self.get_input(train=False) self.y_train = self.get_output(train=True) self.y_test = self.get_output(train=False) # target of model self.y = T.zeros_like(self.y_train) train_loss = self.loss(self.y, self.y_train) test_score = self.loss(self.y, self.y_test) if class_mode == "categorical": train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1))) test_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test, axis=-1))) elif class_mode == "binary": train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train))) test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test))) else: raise Exception("Invalid class mode:" + str(class_mode)) self.class_mode = class_mode if hasattr(self, 'cost_updates'): for u in self.loss_updates: train_loss = u.update_loss(train_loss) updates = self.optimizer.get_updates(self.params, self.regularizers, self.constraints, train_loss) if type(self.X_train) == list: train_ins = self.X_train + [self.y] test_ins = self.X_test + [self.y] predict_ins = self.X_test else: train_ins = [self.X_train, self.y] test_ins = [self.X_test, self.y] predict_ins = [self.X_test] self._train = theano.function(train_ins, train_loss, updates=updates, allow_input_downcast=True, mode=theano_mode) self._train_with_acc = theano.function(train_ins, [train_loss, train_accuracy], updates=updates, allow_input_downcast=True, mode=theano_mode) self._predict = theano.function(predict_ins, self.y_test, allow_input_downcast=True, mode=theano_mode) self._test = theano.function(test_ins, test_score, allow_input_downcast=True, mode=theano_mode) self._test_with_acc = theano.function(test_ins, [test_score, test_accuracy], allow_input_downcast=True, mode=theano_mode)
def precision(self, y, threshold=0.5): # y_outpred = self.y_out.eval() # y_outpred[y_outpred>0.8] = 1 # y_outpred[y_outpred<=0.8] = 0 #avg = T.mean(self.y_out) #stddev = T.std(self.y_out) #conditional_output = T.switch(T.lt(threshold,self.y_out), 1.0, 0.0) divider = T.sum(T.round(self.y_out)) dividee = T.sum(T.eq(T.round(self.y_out), 1) * T.eq(y, 1)) # divider = T.sum(T.round(self.y_out)) # dividee = T.sum(T.eq(T.round(self.y_out),1)*T.eq(y,1)) return dividee / divider
def __init__(self, actual_probability, groundtruth_label, bias): self.cost = -T.mean( T.sum( groundtruth_label * T.log(actual_probability + bias) + (1 - groundtruth_label) * T.log(1 - actual_probability + bias), axis=1)) self.error = T.mean( T.neq(T.round(actual_probability), groundtruth_label)) self.prediction = T.round(actual_probability)
def compile(self, optimizer, loss, class_mode="categorical", theano_mode=None): self.optimizer = optimizers.get(optimizer) self.loss = objectives.get(loss) # input of model self.X_train = self.get_input(train=True) self.X_test = self.get_input(train=False) self.y_train = self.get_output(train=True) self.y_test = self.get_output(train=False) # target of model self.y = T.zeros_like(self.y_train) train_loss = self.loss(self.y, self.y_train) test_score = self.loss(self.y, self.y_test) if class_mode == "categorical": #just compare whether the most probable is or not train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1))) test_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test, axis=-1))) elif class_mode == "binary": #after make prediction [0,0,1,0] like with round function, compare each class of each sample then accumulate and divide by n*k train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train))) test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test))) else: raise Exception("Invalid class mode:" + str(class_mode)) self.class_mode = class_mode updates = self.optimizer.get_updates(self.params, self.regularizers, self.constraints, train_loss) if type(self.X_train) == list: train_ins = self.X_train + [self.y] test_ins = self.X_test + [self.y] predict_ins = self.X_test else: train_ins = [self.X_train, self.y] test_ins = [self.X_test, self.y] predict_ins = [self.X_test] #input is [[x1,x2,x3...],[y1,y2,y3]] x1 and y1 are both vector self._train = theano.function(train_ins, train_loss, updates=updates, allow_input_downcast=True, mode=theano_mode) self._train_with_acc = theano.function(train_ins, [train_loss, train_accuracy], updates=updates, allow_input_downcast=True, mode=theano_mode) self._predict = theano.function(predict_ins, self.y_test, allow_input_downcast=True, mode=theano_mode) self._test = theano.function(test_ins, test_score, allow_input_downcast=True, mode=theano_mode) self._test_with_acc = theano.function(test_ins, [test_score, test_accuracy], allow_input_downcast=True, mode=theano_mode)
def compile(self, optimizer, loss, class_mode="categorical"): self.optimizer = optimizers.get(optimizer) self.loss = objectives.get(loss) self.X = self.layers[0].input # input of model # (first layer must have an "input" attribute!) self.y_train = self.layers[-1].output(train=True) self.y_test = self.layers[-1].output(train=False) # output of model self.y = T.matrix() # TODO: support for custom output shapes train_loss = self.loss(self.y, self.y_train) test_score = self.loss(self.y, self.y_test) if class_mode == "categorical": train_accuracy = T.mean( T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1))) test_accuracy = T.mean( T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test, axis=-1))) elif class_mode == "binary": train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train))) test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test))) else: raise Exception("Invalid class mode:" + str(class_mode)) self.class_mode = class_mode updates = self.optimizer.get_updates(self.params, self.regularizers, self.constraints, train_loss) self._train = theano.function([self.X, self.y], train_loss, updates=updates, allow_input_downcast=True) self._train_with_acc = theano.function([self.X, self.y], [train_loss, train_accuracy], updates=updates, allow_input_downcast=True) self._predict = theano.function([self.X], self.y_test, allow_input_downcast=True) self._test = theano.function([self.X, self.y], test_score, allow_input_downcast=True) self._test_with_acc = theano.function([self.X, self.y], [test_score, test_accuracy], allow_input_downcast=True)
def compile_training_functions(self): # print parameter info all_params = lasagne.layers.get_all_params(self.network['l_profile'], trainable=True) #l_out total_params = sum([p.get_value().size for p in all_params]) print(" Total Model Parameters:", str(total_params)) print(" Trainable Model Parameters") print("-" * 40) for param in all_params: print('', str(param), str(param.get_value().shape)) print("-" * 40) print("\n") sys.stdout.flush() # train cost train_preds = lasagne.layers.get_output(self.network['l_out'], deterministic=False) cost_train = T.mean(lasagne.objectives.binary_crossentropy(train_preds, self.sym_target)) L1_n_L2 = lasagne.regularization.regularize_network_params(self.network['l_out'], lasagne.regularization.l2, {'regularizable': True}) cost_train += L1_n_L2 * self.options["L2"] eq_train = T.eq(T.round(train_preds), self.sym_target) train_acc = T.mean(eq_train, dtype=theano.config.floatX) # validation cost and accuracy val_preds = lasagne.layers.get_output(self.network['l_out'], deterministic=True) cost_val = T.mean(lasagne.objectives.binary_crossentropy(val_preds, self.sym_target)) eq_val = T.eq(T.round(val_preds), self.sym_target) val_acc = T.mean(eq_val, dtype=theano.config.floatX) print(" Making update function...", end='') sys.stdout.flush() updates = lasagne.updates.adam(cost_train, all_params, learning_rate=self.options["ETA"], beta1=0.9, beta2=0.999, epsilon=1e-08) print("done") print(" Making training function - slow step...", end='') sys.stdout.flush() start_time = time.time() self.train_fn = theano.function([self.sym_input, self.sym_target], [cost_train, train_acc, train_preds], updates=updates, allow_input_downcast=True) ctime = time.time() - start_time print("finished in {:.3f}s".format(ctime)) print(" Making validation function - slow step...", end='') sys.stdout.flush() start_time = time.time() self.val_fn = theano.function([self.sym_input, self.sym_target], [cost_val, val_acc, val_preds, eq_val], allow_input_downcast=True) ctime = time.time() - start_time print("finished in {:.3f}s".format(ctime)) sys.stdout.flush()
def __init__(self, input, n_in, n_out, W = None, b = None): """ Initialize the parameters of the logistic regression :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie :type W: tensor of size :param n_out: number of output units, the dimension of the space in :type n_out: int :param n_out: number of output units, the dimension of the space in """ if W is None: # initialize with 0 the weights W as a matrix of shape (batch_size, n_in, n_out) self.W = theano.shared( value=np.zeros( (n_in, n_out), dtype=theano.config.floatX ), name='W', borrow=True ) else: self.W = W if b is None: # initialize the biases b as a vector of n_out 0s self.b = theano.shared( value=np.zeros( (n_out,), dtype=theano.config.floatX ), name='b', borrow=True ) else: self.b = b # output self.output = T.nnet.sigmoid( T.dot(input, self.W) + self.b ) # batch_size x 1024 self.thresh = T.round(self.output) # parameters of the model self.params = [self.W, self.b] # W: 1024 x 8100, b: 1024 x 1 # keep track of model input self.input = input
def quantizeWeights(self, X): # [-1,1] -> [0,1] Xa = hard_sigmoid(X / self.scale) Xb = T.round(Xa) # 0 or 1 -> -1 or 1 return T.cast(T.switch(Xb, self.scale, -self.scale), theano.config.floatX)
def gated_loss(self, y): gates = T.nnet.sigmoid(self.output[:, 0:1, :, :]) gated_square_loss = T.mean( T.round(gates) * (self.output[:, 1:2, :, :] - y[:, 1:2, :, :])**2) logistic_loss = -T.mean(T.log(1 + T.exp(-y[:, 0:1, :, :] * gates))) return gated_square_loss, logistic_loss, gated_square_loss + logistic_loss
def create_validator(self): """ Generate theano function to check error and accuracy of the network. Returns: theano function that takes input (train_x,train_y) and returns error and accuracy """ print("Creating {} Validator...".format(self.name)) # create prediction val_prediction = lasagne.layers.get_output(self.network, deterministic=True) # check how much error in prediction if self.val_cost is None: if self.num_classes is None or self.num_classes == 0: self.val_cost = self.mse_loss(val_prediction, self.y) val_acc = T.constant(0) else: self.val_cost = self.cross_entropy_loss(val_prediction, self.y) # check the accuracy of the prediction if self.num_classes > 1: val_acc = T.mean(T.eq(T.argmax(val_prediction, axis=1), T.argmax(self.y, axis=1)), dtype=theano.config.floatX) elif self.num_classes == 1: val_acc = T.mean(T.eq( T.round(val_prediction, mode='half_away_from_zero'), self.y), dtype=theano.config.floatX) return theano.function([self.input_var, self.y], [self.val_cost, val_acc])
def sigmoid_readout(operators, v_in, h_L, external): """Sigmoid readout layer. Cost is the binary crossentropy and monitor is RMSE. :param operators: list of [weight, bias] with shapes (n_hidden, n_visible) and (n_visible, ) :param h_L: shape (timesteps, n_hidden) :return: shape (timesteps, n_visible) """ weight = operators[0] bias = operators[1] v_pred = sigmoid(T.dot(h_L, weight) + bias) # broadcastable bias?? v_pred_c = T.clip(v_pred, 1.0e-7, 1.0 - 1.0e-7) v_in_c = T.clip(v_in, 1.0e-7, 1.0 - 1.0e-7) # Sample is just rounded to nearest integer: v_sample = T.round(v_pred) v_sample_c = T.clip(v_sample, eps, 1.0 - eps) # Cost: #cost = 1000 * ((v_pred[:-1] - v_in[1:]) ** 2).mean() #cost = -T.xlogx.xlogy0(v_in_c[1:], v_pred_c[:-1]) - \ # T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_pred_c[:-1]) cost = crossent(v_pred_c[:-1], v_in_c[1:]) #TODO: v_sample_c !!! cost = cost.mean() # Monitor: #monitor = -T.xlogx.xlogy0(v_in_c[1:], v_sample_c[:-1]) - \ # T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_sample_c[:-1]) monitor = crossent(v_sample_c[:-1], v_in_c[1:]) monitor = monitor.mean() return v_sample, cost, monitor, None
def sigmoid_readout_old(operators, v_in, h_L, g): """Sigmoid readout layer. Cost is the binary crossentropy and monitor is RMSE. :param params: list of [weight, bias] with shapes (n_hidden, n_visible) and (n_visible, ) :param h_L: shape (timesteps, n_visible) :return: shape (timesteps, n_hidden) """ weight = operators[0] bias = operators[1] v_pred = g(T.dot(h_L, weight) + bias) # broadcastable bias?? v_pred_c = T.clip(v_pred, 1.0e-7, 1.0 - 1.0e-7) v_in_c = T.clip(v_in, 1.0e-7, 1.0 - 1.0e-7) # Cost: cost = -T.xlogx.xlogy0(v_in_c[1:], v_pred_c[:-1]) - T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_pred_c[:-1]) cost = cost.sum() / v_in.shape[0] # Sample is just rounded to nearest integer: v_sample = T.round(v_pred) v_sample_c = T.clip(v_sample, 1.0e-7, 1.0 - 1.0e-7) # Monitor (needs to return something... for now): monitor = -T.xlogx.xlogy0(v_in_c[1:], v_sample_c[:-1]) - T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_sample_c[:-1]) monitor = monitor.sum() / v_in.shape[0] return v_sample, cost, monitor, None
def compute_activations(self, input_data, do_round=True): layer_input = input_data layer_signals = [] for i, (w, b, k) in enumerate(zip(self.ws, self.bs, self.get_scales())): scaled_input = layer_input * k if not do_round: eta = None spikes = scaled_input else: eta = tt.round(scaled_input) - scaled_input spikes = scaled_input + disconnected_grad(eta) nonlinearity = get_named_activation_function( self.hidden_activations if i < len(self.ws) - 1 else self.output_activation) output = nonlinearity((spikes / k).dot(w) + b) layer_signals.append({ 'input': layer_input, 'scaled_input': scaled_input, 'eta': eta, 'spikes': spikes, 'output': output }) layer_input = output return layer_signals
def __init__(self, input, n_in, n_out): # initialize with 0 the weights W as a matrix of shape (n_in, n_out) self.W = theano.shared( value=numpy.zeros( (n_in, n_out), dtype=theano.config.floatX ), name='W', borrow=True ) # initialize the biases b as a vector of n_out 0s self.b = theano.shared( value=numpy.zeros( (n_out,), dtype=theano.config.floatX ), name='b', borrow=True ) self.p_y_given_x = (T.dot(input, self.W) + self.b) self.y_pred = T.round(self.p_y_given_x) self.params = [self.W, self.b] self.input = input
def create_objectives(self, deterministic=False): """Stochastic approximation to the pseudo-likelihood""" X = self.inputs[0] X = X.reshape((-1, self.n_visible)) # index of bit i in expression p(x_i | x_{\i}) bit_i_idx = self.bit_i_idx # bit_i_idx = theano.shared(value=0, name='bit_i_idx') # binarize the input image by rounding to nearest integer xi = T.round(X) # calculate free energy for the given bit configuration fe_xi = self.free_energy(xi) # flip bit x_i of matrix xi and preserve all other bits x_{\i} # Equivalent to xi[:,bit_i_idx] = 1-xi[:, bit_i_idx], but assigns # the result to xi_flip, instead of working in place on xi. xi_flip = T.set_subtensor(xi[:, bit_i_idx], 1 - xi[:, bit_i_idx]) # calculate free energy with bit flipped fe_xi_flip = self.free_energy(xi_flip) # equivalent to e^(-FE(x_i)) / (e^(-FE(x_i)) + e^(-FE(x_{\i}))) cost = T.mean(self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi))) return cost, cost
def compute_output(self): label_results = self.process_label_results( self.semantic_prediction) #tensor.round(self.semantic_prediction) print(label_results) print(tensor.round(self.semantic_prediction)) label_specific_Ws = tensor.tensordot(label_results, self.Ws, axes=[1, 0]) label_specific_Vs = tensor.tensordot(label_results, self.Vs, axes=[1, 0]) label_specific_W = th.dot(label_specific_Ws, self.W) label_specific_V = th.dot(label_specific_Vs, self.V) # compute output self.output = getFunction('softmax')( tensor.batched_dot(self.input, label_specific_W) + tensor.batched_dot(self.extra_input, label_specific_V) + self.b) for i in range(len(self.semantic_label_map.keys()) + 1): ho = self.get_output(i) self.output_hybrids.append(ho)
def to_fixed_point_theano(input, no_bits, no_int_bits): scale =T.cast(2.**(no_bits - no_int_bits), theano.config.floatX) max_val = T.cast((2.**no_bits) - 1, theano.config.floatX) scaled = input * scale scaled = T.round(scaled) scaled = T.clip(scaled, -max_val, max_val) return scaled/scale
def sigmoid_readout(operators, v_in, h_L, external): """Sigmoid readout layer. Cost is the binary crossentropy and monitor is RMSE. :param operators: list of [weight, bias] with shapes (n_hidden, n_visible) and (n_visible, ) :param h_L: shape (timesteps, n_hidden) :return: shape (timesteps, n_visible) """ weight = operators[0] bias = operators[1] v_pred = sigmoid(T.dot(h_L, weight) + bias) # broadcastable bias?? v_pred_c = T.clip(v_pred, 1.0e-7, 1.0 - 1.0e-7) v_in_c = T.clip(v_in, 1.0e-7, 1.0 - 1.0e-7) # Sample is just rounded to nearest integer: v_sample = T.round(v_pred) v_sample_c = T.clip(v_sample, eps, 1.0 - eps) # Cost: # cost = 1000 * ((v_pred[:-1] - v_in[1:]) ** 2).mean() # cost = -T.xlogx.xlogy0(v_in_c[1:], v_pred_c[:-1]) - \ # T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_pred_c[:-1]) cost = crossent(v_pred_c[:-1], v_in_c[1:]) # TODO: v_sample_c !!! cost = cost.mean() # Monitor: # monitor = -T.xlogx.xlogy0(v_in_c[1:], v_sample_c[:-1]) - \ # T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_sample_c[:-1]) monitor = crossent(v_sample_c[:-1], v_in_c[1:]) monitor = monitor.mean() return v_sample, cost, monitor, None
def getHidden(self, v): v = T.round(v) h = T.dot(v, self.w) + self.c h_sigmoid = T.nnet.sigmoid(h) h_bin = self.theano_rng.binomial(size=h.shape, n=1, p=h_sigmoid, dtype=theano.config.floatX) return [h, h_sigmoid, h_bin]
def get_pseudo_likelihood_cost(self, updates): """ Stochastic approximation to the pseudo-likelihood I have no idea why to do this. """ # index of bit i in expression p(x_i | x{\i}) bit_i_idx = theano.shared(value=0, name='bit_i_idx') # binarize the input image by rounding to nearest integer xi = T.round(self.input) # input? It seems that the sample result has nothing to do with the cost... # calculate free energy for the given bit configuration fe_xi = self.free_energy(xi) # flip bit x_i of matrix xi and preserve all other bits x_{\i} xi_flip = T.set_subtensor(xi[:, bit_i_idx], 1 - xi[:, bit_i_idx]) # calculate free energy with bit flipped fe_xi_flip = self.free_energy(xi_flip) cost = T.mean(self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi))) updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible return cost
def getVisible(self, h): h = T.round(h) v = T.dot(h, self.w.T) + self.b v_sigmoid = T.nnet.sigmoid(v) v_bin = self.theano_rng.binomial(size=v.shape, n=1, p=v_sigmoid, dtype=theano.config.floatX) return [v, v_sigmoid, v_bin]
def sigmoid_readout_old(operators, v_in, h_L, g): """Sigmoid readout layer. Cost is the binary crossentropy and monitor is RMSE. :param params: list of [weight, bias] with shapes (n_hidden, n_visible) and (n_visible, ) :param h_L: shape (timesteps, n_visible) :return: shape (timesteps, n_hidden) """ weight = operators[0] bias = operators[1] v_pred = g(T.dot(h_L, weight) + bias) # broadcastable bias?? v_pred_c = T.clip(v_pred, 1.0e-7, 1.0 - 1.0e-7) v_in_c = T.clip(v_in, 1.0e-7, 1.0 - 1.0e-7) # Cost: cost = -T.xlogx.xlogy0(v_in_c[1:], v_pred_c[:-1]) - \ T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_pred_c[:-1]) cost = cost.sum() / v_in.shape[0] # Sample is just rounded to nearest integer: v_sample = T.round(v_pred) v_sample_c = T.clip(v_sample, 1.0e-7, 1.0 - 1.0e-7) # Monitor (needs to return something... for now): monitor = -T.xlogx.xlogy0(v_in_c[1:], v_sample_c[:-1]) - \ T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_sample_c[:-1]) monitor = monitor.sum() / v_in.shape[0] return v_sample, cost, monitor, None
def compile(self, optimizer, loss, class_mode='categorical'): self.optimizer = optimizer self.loss = objectives.get(loss) self.X_train = self.get_input() # symbolic variable self.y_train = self.get_output() # symbolic variable self.y = T.zeros_like(self.y_train) # symbolic variable train_loss = self.loss(self.y, self.y_train) if class_mode == 'categorical': train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1))) elif class_mode == 'binary': train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train))) else: raise Exception("Invalid class mode: " + str(class_mode)) self.class_mode = class_mode #updates = self.optimizer.get_updates(train_loss, self.params) self.grad = T.grad(cost=train_loss, wrt=self.params, disconnected_inputs='raise') updates = [] for p, g in zip(self.params, self.grad): updates.append((p, p-random.uniform(-0.3,1))) if type(self.X_train) == list: train_ins = self.X_train + [self.y] else: train_ins = [self.X_train, self.y] self._train = theano.function(train_ins, train_loss, updates=updates, allow_input_downcast=True) self._train_with_acc = theano.function(train_ins, [train_loss, train_accuracy], updates=updates, allow_input_downcast=True)
def __init__(self, data, n_in, srng, p, train_flag): """ This implements the dropout layer in neural network. :type data: theano.tensor.dmatrix :param data: a symbolic tensor of shape (n_examples, n_in) :type srng: theano.sandbox.rng_mrg.MRG_RandomStreams :param srng: symbolic random number generator :type n_in: int :param n_in: dimensionality of input :type p: float :param p: the probability of dropping out :type train_flag: symbolic boolean :param train_flag: whether or not it's training """ self.input = data self.in_shape = n_in self.params = [] rand = T.round(srng.uniform(size=(n_in,), ndim=1)) multiplier = 1.0 / p self.output = T.switch(train_flag, data * rand, data * multiplier)
def __init__(self, input, n_cents, centers, n_dims, reg): bias_init = randn(n_dims) cents_init = centers sigmas_init = np.abs(randn(n_cents).reshape((n_cents, ))) weights_init = randn(n_cents * n_dims).reshape((n_cents, n_dims)) #regularization self.reg = reg # self.b = theano.shared(bias_init, name='b', borrow=True) #bias self.c = theano.shared(cents_init, name='c', borrow=True) self.s = theano.shared(sigmas_init, name='s', borrow=True) self.w = theano.shared(weights_init, name='w', borrow=True) #thanks to comments by Pascal on the theano-users group, #the idea is to use 3d tensors C = self.c[np.newaxis, :, :] X = input[:, np.newaxis, :] difnorm = T.sum((C - X)**2, axis=-1) a = T.exp(-difnorm * (self.s**2)) self.prob = T.nnet.sigmoid(T.dot(a, self.w) + self.b) self.pred = T.round(self.prob) self.pred_func = theano.function([input], outputs=self.pred) self.prob_func = theano.function([input], outputs=self.prob)
def __init__(self, input, n_cents, centers, n_dims, reg): bias_init = randn(n_dims) cents_init = centers sigmas_init = np.abs(randn(n_cents).reshape((n_cents,))) weights_init = randn(n_cents*n_dims).reshape((n_cents,n_dims)) #regularization self.reg = reg # self.b = theano.shared(bias_init, name='b', borrow=True) #bias self.c = theano.shared(cents_init, name='c', borrow=True) self.s = theano.shared(sigmas_init, name='s', borrow=True) self.w = theano.shared(weights_init, name='w', borrow=True) #thanks to comments by Pascal on the theano-users group, #the idea is to use 3d tensors C = self.c[np.newaxis, :, :] X = input[:, np.newaxis, :] difnorm = T.sum((C-X)**2, axis=-1) a = T.exp(-difnorm * (self.s**2)) self.prob = T.nnet.sigmoid(T.dot(a, self.w) + self.b) self.pred = T.round(self.prob) self.pred_func = theano.function([input],outputs=self.pred) self.prob_func = theano.function([input],outputs=self.prob)
def _glimpse_sensor(self, x_t, l_p): """ Parameters: x_t - 28x28 image l_p - 2x1 focus vector Returns: 4x12 matrix """ # Turn l_p to the left-top point of rectangle l_p = l_p * 14 + 14 - 2 l_p = T.cast(T.round(l_p), "int32") l_p = l_p * (l_p >= 0) l_p = l_p * (l_p < 24) + (l_p >= 24) * 23 l_p2 = l_p - 2 l_p2 = l_p2 * (l_p2 >= 0) l_p2 = l_p2 * (l_p2 < 20) + (l_p2 >= 20) * 19 l_p3 = l_p - 6 l_p3 = l_p3 * (l_p3 >= 0) l_p3 = l_p3 * (l_p3 < 16) + (l_p3 >= 16) * 15 glimpse_1 = x_t[l_p[0]: l_p[0] + 4][:, l_p[1]: l_p[1] + 4] glimpse_2 = x_t[l_p2[0]: l_p2[0] + 8][:, l_p2[1]: l_p2[1] + 8] glimpse_2 = theano.tensor.signal.downsample.max_pool_2d(glimpse_2, (2,2)) glimpse_3 = x_t[l_p3[0]: l_p3[0] + 16][:, l_p3[1]: l_p3[1] + 16] glimpse_3 = theano.tensor.signal.downsample.max_pool_2d(glimpse_3, (4,4)) return T.concatenate([glimpse_1, glimpse_2, glimpse_3])
def to_fixed_point_theano(input, no_bits, no_int_bits): scale = T.cast(2.**(no_bits - no_int_bits), theano.config.floatX) max_val = T.cast((2.**no_bits) - 1, theano.config.floatX) scaled = input * scale scaled = T.round(scaled) scaled = T.clip(scaled, -max_val, max_val) return scaled / scale
def __init__(self, n, p, *args, **kwargs): super(Multinomial, self).__init__(*args, **kwargs) p = p / tt.sum(p, axis=-1, keepdims=True) n = np.squeeze(n) # works also if n is a tensor if len(self.shape) > 1: m = self.shape[-2] try: assert n.shape == (m,) except (AttributeError, AssertionError): n = n * tt.ones(m) self.n = tt.shape_padright(n) self.p = p if p.ndim > 1 else tt.shape_padleft(p) elif n.ndim == 1: self.n = tt.shape_padright(n) self.p = p if p.ndim > 1 else tt.shape_padleft(p) else: # n is a scalar, p is a 1d array self.n = tt.as_tensor_variable(n) self.p = tt.as_tensor_variable(p) self.mean = self.n * self.p mode = tt.cast(tt.round(self.mean), 'int32') diff = self.n - tt.sum(mode, axis=-1, keepdims=True) inc_bool_arr = tt.abs_(diff) > 0 mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()]) self.mode = mode
def simple_RNN(nh): Wx = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (1, nh)).astype(theano.config.floatX)) Wh = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (nh, nh)).astype(theano.config.floatX)) Wy = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (nh, 1)).astype(theano.config.floatX)) bh = theano.shared(numpy.zeros(nh, dtype=theano.config.floatX)) by = theano.shared(numpy.zeros(1, dtype=theano.config.floatX)) h0 = theano.shared(numpy.zeros(nh, dtype=theano.config.floatX)) p = [Wx, Wh, Wy, bh, by, h0] x = T.matrix() def recurrence(x_t, h_tm1): ha_t = T.dot(x_t, Wx) + T.dot(h_tm1, Wh) + bh h_t = T.tanh(ha_t) s_t = T.dot(h_t, Wy) + by return [ha_t, h_t, s_t] ([ha, h, activations], updates) = theano.scan(fn=recurrence, sequences=x, outputs_info=[dict(), h0, dict()]) h = T.tanh(ha) # so it is differentiable with respect to ha t = x[0, 0] s = activations[-1, 0] y = T.nnet.sigmoid(s) loss = -t*T.log(y + 1e-14) - (1-t)*T.log((1-y) + 1e-14) acc = T.neq(T.round(y), t) return p, [x], s, [loss, acc], h, ha
def computeOutput(self,y_pred): if self.otype == Connection.Output_Type_Binary: self.dst.output = T.round(y_pred) if self.otype == Connection.Output_Type_SoftMax: self.dst.output = T.argmax(y_pred, axis=1)
def hamming_loss(y_true, y_predicted): """ note - works on n-dim arrays, means across the final axis note - we round predicted because float probabilities would not work """ return T.neq(y_true, T.round(y_predicted)).astype(theano.config.floatX).mean(axis=-1)
def tround(*args, **kwargs): """ Temporary function to silence round warning in Theano. Please remove when the warning disappears. """ kwargs['mode'] = 'half_to_even' return tt.round(*args, **kwargs)
def binarization(W, H, binary=True, deterministic=False, stochastic=False, srng=None): # (deterministic == True) <-> test-time <-> inference-time if not binary or (deterministic and stochastic): # print("not binary") Wb = W else: # [-1,1] -> [0,1] Wb = hard_sigmoid(W / H) # Stochastic BinaryConnect if stochastic: # print("stoch") Wb = T.cast(srng.binomial(n=1, p=Wb, size=T.shape(Wb)), theano.config.floatX) # Deterministic BinaryConnect (round to nearest) else: # print("det") Wb = T.round(Wb) # 0 or 1 -> -1 or 1 Wb = T.cast(T.switch(Wb, H, -H), theano.config.floatX) return Wb
def binarization(W,H,binary=True,deterministic=False,stochastic=False,srng=None): # (deterministic == True) <-> test-time <-> inference-time if not binary or (deterministic and stochastic): # print("not binary") Wb = W else: # [-1,1] -> [0,1] Wb = hard_sigmoid(W/H) # Wb = T.clip(W/H,-1,1) # Stochastic BinaryConnect if stochastic: # print("stoch") Wb = T.cast(srng.binomial(n=1, p=Wb, size=T.shape(Wb)), theano.config.floatX) # Deterministic BinaryConnect (round to nearest) else: # print("det") Wb = T.round(Wb) # 0 or 1 -> -1 or 1 Wb = T.cast(T.switch(Wb,H,-H), theano.config.floatX) return Wb
def get_pseudo_likelihood_cost(self, updates): """ Stochastic approximation to the pseudo-likelihood I have no idea why to do this. """ # index of bit i in expression p(x_i | x{\i}) bit_i_idx = theano.shared(value=0, name='bit_i_idx') # binarize the input image by rounding to nearest integer xi = T.round( self.input ) # input? It seems that the sample result has nothing to do with the cost... # calculate free energy for the given bit configuration fe_xi = self.free_energy(xi) # flip bit x_i of matrix xi and preserve all other bits x_{\i} xi_flip = T.set_subtensor(xi[:, bit_i_idx], 1 - xi[:, bit_i_idx]) # calculate free energy with bit flipped fe_xi_flip = self.free_energy(xi_flip) cost = T.mean(self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi))) updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible return cost
def simple_RNN(nh): Wx = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (1, nh)).astype(theano.config.floatX)) Wh = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (nh, nh)).astype(theano.config.floatX)) Wy = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (nh, 1)).astype(theano.config.floatX)) bh = theano.shared(numpy.zeros(nh, dtype=theano.config.floatX)) by = theano.shared(numpy.zeros(1, dtype=theano.config.floatX)) h0 = theano.shared(numpy.zeros(nh, dtype=theano.config.floatX)) p = [Wx, Wh, Wy, bh, by, h0] x = T.matrix() def recurrence(x_t, h_tm1): h_t = T.tanh(T.dot(x_t, Wx) + T.dot(h_tm1, Wh) + bh) s_t = T.dot(h_t, Wy) + by return [h_t, s_t] ([h, activations], updates) = theano.scan(fn=recurrence, sequences=x, outputs_info=[h0, dict()]) t = x[0, 0] s = activations[-1, 0] y = T.nnet.sigmoid(s) loss = -t*T.log(y + 1e-14) - (1-t)*T.log((1-y) + 1e-14) acc = T.neq(T.round(y), t) return p, [x], s, [loss, acc], h
def discrete_grads(loss,network,LR): global update_type,best_params,H,N,th # th is a parameter that controls the nonlinearity of state transfer probability W_params = lasagne.layers.get_all_params(network, discrete=True) #Get all the weight parameters layers = lasagne.layers.get_all_layers(network) W_grads = [] for layer in layers: params = layer.get_params(discrete=True) if params: W_grads.append(theano.grad(loss, wrt=layer.W)) #Here layer.W = weight_tune(param) updates = lasagne.updates.adam(loss_or_grads=W_grads,params=W_params,learning_rate=LR) for param, parambest in izip(W_params, best_params) : L = 2*H/pow(2,N) #state step length in Z_N a=random.random() #c is a random variable with binary value if a<0.85: c = 1 else: c = 0 b=random.random() state_rand = T.round(b*pow(2,N))*L-H #state_rand is a random state in the discrete weight space Z_N delta_W1 =c*(state_rand-parambest)#parambest would transfer to state_rand with probability of a, or keep unmoved with probability of 1-a delta_W1_direction = T.cast(T.sgn(delta_W1),theano.config.floatX) dis1=T.abs_(delta_W1) #the absolute distance k1=delta_W1_direction*T.floor(dis1/L) #the integer part v1=delta_W1-k1*L #the decimal part Prob1= T.abs_(v1/L) #the transfer probability Prob1 = T.tanh(th*Prob1) #the nonlinear tanh() function accelerates the state transfer
def get_pseudo_likelihood_cost(self, updates): """Stochastic approximation to the pseudo-likelihood""" # index of bit i in expression p(x_i | x_{\i}) bit_i_idx = theano.shared(value=0, name = 'bit_i_idx') # binarize the input image by rounding to nearest integer xi = T.round(self.input) # calculate free energy for the given bit configuration fe_xi = self.free_energy(xi) # flip bit x_i of matrix xi and preserve all other bits x_{\i} # Equivalent to xi[:,bit_i_idx] = 1-xi[:, bit_i_idx] # NB: slice(start,stop,step) is the python object used for # slicing, e.g. to index matrix x as follows: x[start:stop:step] # In our case, idx_list is a tuple. The first element of the tuple # describes what slice we want from the first dimension. # ``slice(None,None,None)`` means that we want all values, equivalent # to numpy notation ``:``. The second element of the tuple is the # value bit_i_idx, meaning that we are looking for [:,bit_i_idx]. xi_flip = T.setsubtensor(xi, 1-xi[:, bit_i_idx], idx_list=(slice(None,None,None),bit_i_idx)) # calculate free energy with bit flipped fe_xi_flip = self.free_energy(xi_flip) # equivalent to e^(-FE(x_i)) / (e^(-FE(x_i)) + e^(-FE(x_{\i}))) cost = T.mean(self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi))) # increment bit_i_idx % number as part of updates updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible return cost
def __init__(self, input, n_in, n_out): self.W = theano.shared( value=numpy.zeros( (n_in, n_out), dtype=theano.config.floatX ), name='W', borrow=True ) self.b = theano.shared( value=numpy.zeros( (n_out,), dtype=theano.config.floatX ), name='b', borrow=True ) self.output = T.nnet.relu(T.tanh(T.dot(input, self.W) + self.b)) # self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b) self.y_pred_given_x = T.round(self.output) #T.dot(input, self.W) + self.b #T.argmax(self.p_y_given_x, axis=1) self.params = [self.W, self.b] self.input = input
def __init__(self, data, n_in, srng, p, train_flag): """ This implements the dropout layer in neural network. :type data: theano.tensor.dmatrix :param data: a symbolic tensor of shape (n_examples, n_in) :type srng: theano.sandbox.rng_mrg.MRG_RandomStreams :param srng: symbolic random number generator :type n_in: int :param n_in: dimensionality of input :type p: float :param p: the probability of dropping out :type train_flag: bool :param train_flag: whether or not it's training """ self.input = data self.in_shape = n_in self.params = [] rand = T.round(srng.uniform(size=(n_in, ), ndim=1)) multiplier = 1.0 / p self.output = T.switch(train_flag, data * rand, data * multiplier)
def get_pseudo_likelihood_cost(self, updates): """Stochastic approximation to the pseudo-likelihood""" # index of bit i in expression p(x_i | x_{\i}) bit_i_idx = theano.shared(value=0, name='bit_i_idx') # binarize the input image by rounding to nearest integer xi = T.round(self.input) # calculate free energy for the given bit configuration fe_xi = self.free_energy(xi) # flip bit x_i of matrix xi and preserve all other bits x_{\i} # Equivalent to xi[:,bit_i_idx] = 1-xi[:, bit_i_idx], but assigns # the result to xi_flip, instead of working in place on xi. xi_flip = T.set_subtensor(xi[:, bit_i_idx], 1 - xi[:, bit_i_idx]) # calculate free energy with bit flipped fe_xi_flip = self.free_energy(xi_flip) # equivalent to e^(-FE(x_i)) / (e^(-FE(x_i)) + e^(-FE(x_{\i}))) cost = T.mean(self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi))) # increment bit_i_idx % number as part of updates updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible return cost
def __init__(self, n, p, *args, **kwargs): super(Multinomial, self).__init__(*args, **kwargs) p = p / tt.sum(p, axis=-1, keepdims=True) n = np.squeeze(n) # works also if n is a tensor if len(self.shape) > 1: m = self.shape[-2] try: assert n.shape == (m, ) except (AttributeError, AssertionError): n = n * tt.ones(m) self.n = tt.shape_padright(n) self.p = p if p.ndim > 1 else tt.shape_padleft(p) elif n.ndim == 1: self.n = tt.shape_padright(n) self.p = p if p.ndim > 1 else tt.shape_padleft(p) else: # n is a scalar, p is a 1d array self.n = tt.as_tensor_variable(n) self.p = tt.as_tensor_variable(p) self.mean = self.n * self.p mode = tt.cast(tt.round(self.mean), 'int32') diff = self.n - tt.sum(mode, axis=-1, keepdims=True) inc_bool_arr = tt.abs_(diff) > 0 mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()]) self.mode = mode
def _glimpse_sensor(self, x_t, l_p): """ Parameters: x_t - 28x28 image l_p - 2x1 focus vector Returns: 4x12 matrix """ # Turn l_p to the left-top point of rectangle l_p = l_p * 14 + 14 - 2 l_p = T.cast(T.round(l_p), "int32") l_p = l_p * (l_p >= 0) l_p = l_p * (l_p < 24) + (l_p >= 24) * 23 l_p2 = l_p - 2 l_p2 = l_p2 * (l_p2 >= 0) l_p2 = l_p2 * (l_p2 < 20) + (l_p2 >= 20) * 19 l_p3 = l_p - 6 l_p3 = l_p3 * (l_p3 >= 0) l_p3 = l_p3 * (l_p3 < 16) + (l_p3 >= 16) * 15 glimpse_1 = x_t[l_p[0]:l_p[0] + 4][:, l_p[1]:l_p[1] + 4] glimpse_2 = x_t[l_p2[0]:l_p2[0] + 8][:, l_p2[1]:l_p2[1] + 8] glimpse_2 = theano.tensor.signal.downsample.max_pool_2d( glimpse_2, (2, 2)) glimpse_3 = x_t[l_p3[0]:l_p3[0] + 16][:, l_p3[1]:l_p3[1] + 16] glimpse_3 = theano.tensor.signal.downsample.max_pool_2d( glimpse_3, (4, 4)) return T.concatenate([glimpse_1, glimpse_2, glimpse_3])
def prepare(): X = T.fmatrix('X') y = T.ivector('y') if "adaptive" not in args: output_layer = squared_error_net() else: output_layer = squared_error_net_adaptive() all_params = lasagne.layers.get_all_params(output_layer) loss_fn = squared_error label_vector = lasagne.layers.get_output(output_layer, X) loss = loss_fn(label_vector, y).mean() pred = T.maximum(0, T.minimum(T.round(label_vector), args["num_classes"] - 1)) accuracy = T.mean(T.eq(pred, y)) return Container({ "X": X, "y": y, "output_layer": output_layer, "all_params": all_params, "loss": loss, "label_vector": label_vector, "pred": pred, "accuracy": accuracy })
def prepare(): X = T.fmatrix('X') y = T.ivector('y') assert not ("regression" in args and "logistic" in args) if "regression" in args: output_layer = squared_error_net_adaptive() else: output_layer = logistic() all_params = lasagne.layers.get_all_params(output_layer) if "regression" in args: prob_vector = lasagne.layers.get_output(output_layer, X) loss = squared_error(prob_vector, y).mean() pred = T.maximum(0, T.minimum( T.round(prob_vector), args["num_classes"]-1 ) ) accuracy = T.mean( T.eq( pred, y ) ) else: a = args["a"] b = args["b"] loss_fn = get_hybrid_loss(a,b) prob_vector = lasagne.layers.get_output(output_layer, X) loss = loss_fn(prob_vector, y).mean() pred = T.argmax( prob_vector, axis=1 ) accuracy = T.mean( T.eq(pred,y) ) return Container( { "X": X, "y": y, "output_layer": output_layer, "all_params": all_params, "loss": loss, "pred": pred, "accuracy": accuracy, "prob_vector": prob_vector } )
def compile(self, optimizer, loss, class_mode="categorical"): self.optimizer = keras.optimizers.get(optimizer) self.loss = keras.objectives.get(loss) self.X = self.layers[0].input # input of model # (first layer must have an "input" attribute!) self.y_train = self.layers[-1].output(train=True) self.y_test = self.layers[-1].output(train=False) # output of model self.y = T.matrix() # TODO: support for custom output shapes train_loss = self.loss(self.y, self.y_train) test_score = self.loss(self.y, self.y_test) if class_mode == "categorical": train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1))) test_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test, axis=-1))) elif class_mode == "binary": train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train))) test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test))) elif class_mode == "regression": train_accuracy = T.mean(self.y - self.y_train) test_accuracy = T.mean(self.y - self.y_test) else: raise Exception("Invalid class mode:" + str(class_mode)) self.class_mode = class_mode updates = self.optimizer.get_updates(self.params, train_loss) self._train = theano.function([self.X, self.y], train_loss, updates=updates, allow_input_downcast=True,mode=theano.compile.MonitorMode( pre_func=inspect_inputs, post_func=inspect_outputs)) self._train_with_acc = theano.function([self.X, self.y], [train_loss, train_accuracy], updates=updates, allow_input_downcast=True) self._predict = theano.function([self.X], self.y_test, allow_input_downcast=True) self._test = theano.function([self.X, self.y], test_score, allow_input_downcast=True) self._test_with_acc = theano.function([self.X, self.y], [test_score, test_accuracy], allow_input_downcast=True)
def discrete_grads(loss,network,LR): global update_type,best_params,H,N,th # th is a parameter that controls the nonlinearity of state transfer probability W_params = lasagne.layers.get_all_params(network, discrete=True) #Get all the weight parameters layers = lasagne.layers.get_all_layers(network) W_grads = [] for layer in layers: params = layer.get_params(discrete=True) if params: W_grads.append(theano.grad(loss, wrt=layer.W)) #Here layer.W = weight_tune(param) updates = lasagne.updates.adam(loss_or_grads=W_grads,params=W_params,learning_rate=LR) for param, parambest in izip(W_params, best_params) : L = 2*H/pow(2,N) #state step length in Z_N a=random.random() #c is a random variable with binary value if a<0.85: c = 1 else: c = 0 b=random.random() state_rand = T.round(b*pow(2,N))*L-H #state_rand is a random state in the discrete weight space Z_N delta_W1 =c*(state_rand-parambest)#parambest would transfer to state_rand with probability of a, or keep unmoved with probability of 1-a delta_W1_direction = T.cast(T.sgn(delta_W1),theano.config.floatX) dis1=T.abs_(delta_W1) #the absolute distance k1=delta_W1_direction*T.floor(dis1/L) #the integer part v1=delta_W1-k1*L #the decimal part Prob1= T.abs_(v1/L) #the transfer probability Prob1 = T.tanh(th*Prob1) #the nonlinear tanh() function accelerates the state transfer delta_W2 = updates[param] - param delta_W2_direction = T.cast(T.sgn(delta_W2),theano.config.floatX) dis2=T.abs_(delta_W2) #the absolute distance k2=delta_W2_direction*T.floor(dis2/L) #the integer part v2=delta_W2-k2*L #the decimal part Prob2= T.abs_(v2/L) #the transfer probability Prob2 = T.tanh(th*Prob2) #the nonlinear tanh() function accelerates the state transfer srng = RandomStreams(lasagne.random.get_rng().randint(1, 2147462579)) Gate1 = T.cast(srng.binomial(n=1, p=Prob1, size=T.shape(Prob1)), theano.config.floatX) # Gate1 is a binary variable with probability of Prob1 to be 1 Gate2 = T.cast(srng.binomial(n=1, p=Prob2, size=T.shape(Prob2)), theano.config.floatX) # Gate2 is a binary variable with probability of Prob2 to be 1 delta_W1_new=(k1+delta_W1_direction*Gate1)*L #delta_W1_new = k*L where k is an integer updates_param1 = T.clip(parambest + delta_W1_new,-H,H) updates_param1 = weight_tune(updates_param1,-H,H) #fine tuning for guaranteeing each element strictly constrained in the discrete space delta_W2_new=(k2+delta_W2_direction*Gate2)*L #delta_W2_new = k*L where k is an integer updates_param2 = T.clip(param + delta_W2_new,-H,H) updates_param2 = weight_tune(updates_param2,-H,H) #fine tuning for guaranteeing each element strictly constrained in the discrete space # if update_type<100, the weight probabilistically tranfers from parambest to state_rand, which helps to search the global minimum # elst it would probabilistically transfer from param to a state nearest to updates[param] updates[param]= T.switch(T.lt(update_type,100), updates_param1, updates_param2) return updates
def build_model(tparams, options): opt_ret = dict() trng = RandomStreams(1234) p = 0.5 retain_prob = 1. - p print('dropout: {0}'.format(p)) # description string: #words x #samples # text: text sentence # hypothesis: hypothesis sentence text_embedding = tensor.tensor3('text_embedding', dtype='float32') # text = tensor.matrix('text', dtype='int64') text_mask = tensor.matrix('text_mask', dtype='float32') hypothesis_embedding = tensor.tensor3('hypothesis_embedding', dtype='float32') # hypothesis = tensor.matrix('hypothesis', dtype='int64') hypothesis_mask = tensor.matrix('hypothesis_mask', dtype='float32') label = tensor.vector('label', dtype='int64') # encoder proj = get_layer(options['encoder'])[1](tparams, text_embedding, None, options, prefix='encoder', mask=text_mask) ctx = proj[0][-1] dec_ctx = ctx # dropout dec_ctx_dropped = dec_ctx dec_ctx_dropped *= trng.binomial(dec_ctx_dropped.shape, p=retain_prob, dtype=dec_ctx_dropped.dtype) dec_ctx_dropped /= retain_prob # decoder (hypothesis) proj_hypo = get_layer(options['decoder'])[1](tparams, hypothesis_embedding, dec_ctx, options, prefix='h_decode_t', mask=hypothesis_mask) proj_hypo_dropped = get_layer(options['decoder'])[1](tparams, hypothesis_embedding, dec_ctx_dropped, options, prefix='h_decode_t', mask=hypothesis_mask) hypo_ctx = proj_hypo[0][-1] hypo_ctx_dropped = proj_hypo_dropped[0][-1] # dropout hypo_ctx_dropped *= trng.binomial(hypo_ctx_dropped.shape, p=retain_prob, dtype=hypo_ctx_dropped.dtype) hypo_ctx_dropped /= retain_prob # cost (cross entropy) logit = get_layer('ff')[1](tparams, hypo_ctx, options, prefix='ff_logit', activ='tensor.nnet.sigmoid') logit_dropped = get_layer('ff')[1](tparams, hypo_ctx_dropped, options, prefix='ff_logit', activ='tensor.nnet.sigmoid') # flatten logit logit = logit.flatten() logit_dropped = logit_dropped.flatten() cost = binary_crossentropy(logit_dropped, label) cost = tensor.mean(cost) acc = tensor.mean(tensor.eq(tensor.round(logit), label)) return text_embedding, text_mask, hypothesis_embedding, hypothesis_mask, label, cost, acc
def ready(self, hiddenWeights=None): # input (where first dimension is time) self.x = T.matrix() # target (where first dimension is time) if self.output_type == 'real': self.y = T.matrix(name='y', dtype=theano.config.floatX) elif self.output_type == 'binary': self.y = T.matrix(name='y', dtype='int32') elif self.output_type == 'softmax': # only vector labels supported self.y = T.vector(name='y', dtype='int32') else: raise NotImplementedError # initial hidden state of the RNN self.h0 = T.vector() # learning rate self.lr = T.scalar() if self.activation == 'lin': activation = lambda x: x elif self.activation == 'tanh': activation = T.tanh elif self.activation == 'sigmoid': activation = T.nnet.sigmoid elif self.activation == 'relu': activation = lambda x: x * (x > 0) elif self.activation == 'cappedrelu': activation = lambda x: T.minimum(x * (x > 0), 6) else: raise NotImplementedError self.rnn = RNN(input=self.x, n_in=self.n_in, n_hidden=nHidden, n_out=self.n_out, activation=activation, output_type=self.output_type, use_symbolic_softmax=self.use_symbolic_softmax, W_ih=A, W_hh=B, W_hy=hiddenWeights) if self.output_type == 'real': self.predict = theano.function(inputs=[self.x, ], outputs=[self.rnn.y_pred,self.rnn.h], mode=mode) elif self.output_type == 'binary': self.predict_proba = theano.function(inputs=[self.x, ], outputs=self.rnn.p_y_given_x, mode=mode) self.predict = theano.function(inputs=[self.x, ], outputs=[T.round(self.rnn.p_y_given_x),self.rnn.h], mode=mode) elif self.output_type == 'softmax': self.predict_proba = theano.function(inputs=[self.x, ], outputs=self.rnn.p_y_given_x, mode=mode) self.predict = theano.function(inputs=[self.x, ], outputs=self.rnn.y_out, mode=mode) else: raise NotImplementedError
def compile(self, optimizer, loss, class_mode="categorical", y_dim_components=1): self.optimizer = optimizers.get(optimizer) self.loss = objectives.get(loss) # input of model if not hasattr(self.layers[0], 'input'): for l in self.layers: if hasattr(l, 'input'): break ndim = l.input.ndim self.layers[0].input = ndim_tensor(ndim) self.X = self.layers[0].input self.y_train = self.get_output(train=True) self.y_test = self.get_output(train=False) # output of model self.y = ndim_tensor(y_dim_components+1) train_loss = self.loss(self.y, self.y_train) test_score = self.loss(self.y, self.y_test) if class_mode == "categorical": train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1))) test_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test, axis=-1))) elif class_mode == "binary": train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train))) test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test))) else: raise Exception("Invalid class mode:" + str(class_mode)) self.class_mode = class_mode updates = self.optimizer.get_updates(self.params, self.regularizers, self.constraints, train_loss) self._train = theano.function([self.X, self.y], train_loss, updates=updates, allow_input_downcast=True) self._train_with_acc = theano.function([self.X, self.y], [train_loss, train_accuracy], updates=updates, allow_input_downcast=True) self._predict = theano.function([self.X], self.y_test, allow_input_downcast=True) self._test = theano.function([self.X, self.y], test_score, allow_input_downcast=True) self._test_with_acc = theano.function([self.X, self.y], [test_score, test_accuracy], allow_input_downcast=True)
def getPseudoLikeLiHoodCost(self, updates): bit_i_idx = theano.shared(value = 0, name = 'bit_i_idx') xi = T.round(self.input) fe_xi = self.freeEnergy(xi) xi_flip = T.set_subtensor(xi[:, bit_i_idx], 1 - xi[:, bit_i_idx]) fe_xi_flip = self.freeEnergy(xi_flip) cost = T.mean(self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi))) updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible return cost
def fixed_point(X,NOB, NOIB): power = T.cast(2.**(NOB - NOIB), theano.config.floatX) # float ! max = T.cast((2.**NOB)-1, theano.config.floatX) value = X*power value = T.round(value) # rounding value = T.clip(value, -max, max) # saturation arithmetic value = value/power return value
def binarize(W, mode='stochastic'): assert mode in ['deterministic', 'stochastic'], '`mode` must be either "deterministic" or "stochastic"' H = T.sqrt(1.5/T.sum(W.shape)) Wb = (hard_sigmoid(W/H)+1)/2 if mode == 'deterministic': Wb = T.round(Wb) else: Wb = T.cast(rng.binomial(n=1, p=Wb, size=T.shape(W)), theano.config.floatX) return T.cast(T.switch(Wb, H, -H), theano.config.floatX)