def get_model(input_var, target_var, multiply_var): # input layer with unspecified batch size layer = InputLayer(shape=(None, 12, 64, 64), input_var=input_var) #InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var) layer = DimshuffleLayer(layer, (0, 'x', 1, 2, 3)) # Z-score? # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = Conv3DDNNLayer(incoming=layer, num_filters=1, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=sigmoid) layer_prediction = layer # Loss prediction = get_output(layer_prediction) loss = binary_crossentropy(prediction[:,0,:,:,:], target_var).mean() #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum params = get_all_params(layer_prediction, trainable=True) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, disabling dropout layers. test_prediction = get_output(layer_prediction, deterministic=True) test_loss = binary_crossentropy(test_prediction[:,0,:,:,:], target_var).mean() return test_prediction, prediction, loss, params
def get_model(input_images, input_position, input_mult, target_var): # number of SAX and distance between SAX slices #indexes = [] #for i in range(input_position.shape[0]): # indexes.append(numpy.where(input_position[i][:,0] == 0.)[0][0]) # input layer with unspecified batch size layer = InputLayer(shape=(None, 22, 30, 64, 64), input_var=input_images) #InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var) # Z-score? # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) shortcut = layer layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = ElemwiseSumLayer([layer, shortcut]) shortcut = layer layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = ElemwiseSumLayer([layer, shortcut]) shortcut = layer layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = ElemwiseSumLayer([layer, shortcut]) shortcut = layer layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = ElemwiseSumLayer([layer, shortcut]) shortcut = layer layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = ElemwiseSumLayer([layer, shortcut]) shortcut = layer layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = ElemwiseSumLayer([layer, shortcut]) layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = Conv3DDNNLayer(incoming=layer, num_filters=22, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=sigmoid) layer_max = ExpressionLayer(layer, lambda X: X.max(1), output_shape='auto') layer_min = ExpressionLayer(layer, lambda X: X.min(1), output_shape='auto') layer_prediction = layer # image prediction prediction = get_output(layer_prediction) loss = binary_crossentropy(prediction, target_var).mean() #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum params = get_all_params(layer_prediction, trainable=True) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, disabling dropout layers. test_prediction = get_output(layer_prediction, deterministic=True) test_loss = binary_crossentropy(test_prediction, target_var).mean() return test_prediction, prediction, loss, params
def get_model(input_var, target_var, multiply_var): # input layer with unspecified batch size layer = InputLayer( shape=(None, 12, 64, 64), input_var=input_var ) #InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var) layer = DimshuffleLayer(layer, (0, 'x', 1, 2, 3)) # Z-score? # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer layer = batch_norm( Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3, 3, 3), stride=(1, 1, 1), pad='same', nonlinearity=rectify)) layer = batch_norm( Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3, 3, 3), stride=(1, 1, 1), pad='same', nonlinearity=rectify)) layer = batch_norm( Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3, 3, 3), stride=(1, 1, 1), pad='same', nonlinearity=rectify)) layer = Conv3DDNNLayer(incoming=layer, num_filters=1, filter_size=(3, 3, 3), stride=(1, 1, 1), pad='same', nonlinearity=sigmoid) layer_prediction = layer # Loss prediction = get_output(layer_prediction) loss = binary_crossentropy(prediction[:, 0, :, :, :], target_var).mean() #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum params = get_all_params(layer_prediction, trainable=True) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, disabling dropout layers. test_prediction = get_output(layer_prediction, deterministic=True) test_loss = binary_crossentropy(test_prediction[:, 0, :, :, :], target_var).mean() return test_prediction, prediction, loss, params
def calc_loss_multi(prediction, targets): #we need to clip predictions when calculating the log-loss prediction = T.clip(prediction, 0.0000001, 0.9999999) #binary crossentropy is the best choice for a multi-class sigmoid output loss = T.mean(objectives.binary_crossentropy(prediction, targets)) return loss
def compile_model(input_var, target_var, net): prediction = layers.get_output(net['out']) loss = binary_crossentropy(prediction, target_var) loss = lasagne.objectives.aggregate(loss) params = layers.get_all_params(net['out'], trainable=True) updates = lasagne.updates.adam(loss, params, learning_rate=1e-5) test_prediction = layers.get_output(net['out'], deterministic=True) test_loss = binary_crossentropy(test_prediction, target_var) test_loss = lasagne.objectives.aggregate(test_loss) train_fn = theano.function([input_var, target_var], loss, updates=updates) val_fn = theano.function([input_var, target_var], test_loss) prob_fn = theano.function([input_var], test_prediction) return train_fn, val_fn, prob_fn
def test_binary_crossentropy(colvect): # symbolic version from lasagne.objectives import binary_crossentropy if not colvect: p, t = theano.tensor.matrices('pt') c = binary_crossentropy(p, t) else: # check that for convenience, comparing a prediction column vector # against a 1D target vector does not lead to broadcasting p, t = theano.tensor.vectors('pt') c = binary_crossentropy(p.dimshuffle(0, 'x'), t)[:, 0] # numeric version floatX = theano.config.floatX shape = (10, 20) if not colvect else (10, ) predictions = np.random.rand(*shape).astype(floatX) targets = np.random.rand(*shape).astype(floatX) crossent = (-targets * np.log(predictions) - (1 - targets) * np.log(1 - predictions)) # compare assert np.allclose(crossent, c.eval({p: predictions, t: targets}))
def build_loss(pred_var, target_var, resp_dur, t_ind): if t_ind == 0 or t_ind == 1 or t_ind == 4: loss = T.mean( T.mod( T.abs_(pred_var[:, -resp_dur:, :] - target_var[:, -resp_dur:, :]), np.pi)) elif t_ind == 2 or t_ind == 6 or t_ind == 8: loss = T.mean( binary_crossentropy(pred_var[:, -resp_dur:, -1], target_var[:, -resp_dur:, -1])) return loss
def binary_crossentropy_void(y_pred, y_true, y_mask): # Flatten y_true y_true = T.reshape(y_true, y_pred.shape) y_mask = T.reshape(y_mask, y_pred.shape) eps = 1e-12 y_pred = y_pred.clip(0 + eps, 1 - eps) error = y_mask * binary_crossentropy(y_pred, y_true) return T.mean(error)
def test_binary_crossentropy(colvect): # symbolic version from lasagne.objectives import binary_crossentropy if not colvect: p, t = theano.tensor.matrices('pt') c = binary_crossentropy(p, t) else: # check that for convenience, comparing a prediction column vector # against a 1D target vector does not lead to broadcasting p, t = theano.tensor.vectors('pt') c = binary_crossentropy(p.dimshuffle(0, 'x'), t)[:, 0] # numeric version floatX = theano.config.floatX shape = (10, 20) if not colvect else (10,) predictions = np.random.rand(*shape).astype(floatX) targets = np.random.rand(*shape).astype(floatX) crossent = (- targets * np.log(predictions) - (1-targets) * np.log(1-predictions)) # compare assert np.allclose(crossent, c.eval({p: predictions, t: targets}))
def test_binary_crossentropy(): # symbolic version from lasagne.objectives import binary_crossentropy p, t = theano.tensor.matrices('pt') c = binary_crossentropy(p, t) # numeric version floatX = theano.config.floatX predictions = np.random.rand(10, 20).astype(floatX) targets = np.random.rand(10, 20).astype(floatX) crossent = (- targets * np.log(predictions) - (1-targets) * np.log(1-predictions)) # compare assert np.allclose(crossent, c.eval({p: predictions, t: targets}))
def run(get_model, model_name): train_stream = ServerDataStream(('cases', 'image_features', 'image_targets', 'multiplier'), False, hwm=10) valid_stream = ServerDataStream(('cases', 'image_features', 'image_targets', 'multiplier'), False, hwm=10, port=5558) input_var = tensor.tensor4('image_features') target_var = tensor.tensor4('image_targets') multiply_var = tensor.matrix('multiplier') multiply_var = T.addbroadcast(multiply_var, 1) test_prediction, prediction, params = get_model(input_var, target_var, multiply_var) loss = binary_crossentropy(prediction, target_var).mean() loss.name = 'loss' valid_error = T.neq((test_prediction>0.5)*1., target_var).mean() valid_error.name = 'error' scale = Scale(0.1) algorithm = GradientDescent( cost=loss, parameters=params, step_rule=scale, #step_rule=Adam(), on_unused_sources='ignore' ) host_plot = 'http://localhost:5006' extensions = [ Timing(), TrainingDataMonitoring([loss], after_epoch=True), DataStreamMonitoring(variables=[loss, valid_error], data_stream=valid_stream, prefix="valid"), Plot('%s %s %s' % (model_name, datetime.date.today(), time.strftime('%H:%M')), channels=[['loss','valid_loss'],['valid_error']], after_epoch=True, server_url=host_plot), Printing(), # Checkpoint('train'), FinishAfter(after_n_epochs=10) ] main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions) cg = ComputationGraph(test_prediction) while True: main_loop.run() scale.learning_rate.set_value(numpy.float32(scale.learning_rate.get_value()*0.7)) numpy.savez('best_weights.npz', [param.get_value() for param in cg.shared_variables])
def similarity_iter(output_layer, match_layer, update_params, match_layer_w=0): X1 = T.tensor4() X2 = T.tensor4() y = T.ivector() # find the input layers # TODO this better all_layers = ll.get_all_layers(match_layer) # make image of all layers imwrite_architecture(all_layers, './layer_rep.png') input_1 = filter(lambda x: x.name == 'input1', all_layers)[0] input_2 = filter(lambda x: x.name == 'input2', all_layers)[0] descriptors_train, match_prob_train = ll.get_output([output_layer, match_layer], {input_1: X1, input_2: X2}) descriptors_eval, match_prob_eval = ll.get_output([output_layer, match_layer], {input_1: X1, input_2: X2}, deterministic=True) #descriptor_shape = ll.get_output_shape(output_layer, {input_1: X1, input_2: X2}) #print("Network output shape: %r" % (descriptor_shape,)) # distance minimization distance = lambda x: (x[:,0,:] - x[:,1,:] + 1e-7).norm(2, axis=1) #distance_eval = (descriptors_eval[:,0,:] - descriptors_eval[:,1,:] + 1e-7).norm(2, axis=1) # 9/21 squaring the loss seems to prevent it from getting to 0.5 really quickly (i.e. w/in 3 epochs) # let's see if it will learn something good margin = 1 decay = 0 reg = regularize_network_params(match_layer, l2) * decay loss = lambda x, z: ((1-match_layer_w)*T.mean(y*(distance(x)) + (1 - y)*(T.maximum(0, margin - distance(x))))/2 # constrastive loss + match_layer_w*T.mean(binary_crossentropy(z.T + 1e-7,y))) # matching loss loss_reg = lambda x, z: (loss(x,z) + reg) # this loss doesn't work since it just pushes all the descriptors near each other and then predicts 0 all the time for tha matching #jason_loss = lambda x, z: T.mean(distance(x)*y + (1-y)*binary_crossentropy(z.T + 1e-7,y)) #loss_eval = T.mean(y*(distance_eval**2) + (1 - y)*(T.maximum(0, 1 - distance_eval)**2)) all_params = ll.get_all_params(match_layer) # unsure how I would do this if there were truly two trainable branches... loss_train = loss_reg(descriptors_train, match_prob_train) loss_train.name = 'combined_loss' # for the names grads = T.grad(loss_train, all_params, add_names=True) #updates = adam(grads, all_params, **update_params) updates = nesterov_momentum(grads, all_params, **update_params) train_iter = theano.function([X1, X2, y], [loss_train, loss(descriptors_train, match_prob_train)] + grads, updates=updates) #theano.printing.pydotprint(loss, outfile='./loss_graph.png',var_with_name_simple=True) valid_iter = theano.function([X1, X2, y], loss(descriptors_eval, match_prob_eval)) return {'train':train_iter, 'valid':valid_iter, 'gradnames':[g.name for g in grads]}
def get_cost_L(self, inputs): # make it clear which get_output_for is used print('getting_cost_L') # inputs must obey the order. image_input, label_input = inputs encoder = self.encoder.get_output_for(self.concat_xy.get_output_for([image_input, label_input])) mu_z = self.encoder_mu.get_output_for(encoder) log_var_z = self.encoder_log_var.get_output_for(encoder) z = self.sampler.get_output_for([mu_z, log_var_z]) decoder = self.decoder.get_output_for(self.concat_yz.get_output_for([label_input, z])) reconstruct = self.decoder_x.get_output_for(decoder) l_x = objectives.binary_crossentropy(reconstruct, image_input).sum(1) l_z = ((mu_z ** 2 + T.exp(log_var_z) - 1 - log_var_z) * 0.5).sum(1) cost_L = l_x + l_z return cost_L
def get_cost_L(self, inputs): # make it clear which get_output_for is used print('getting_cost_L') # inputs must obey the order. image_input, label_input = inputs encoder = self.encoder.get_output_for( self.concat_xy.get_output_for([image_input, label_input])) mu_z = self.encoder_mu.get_output_for(encoder) log_var_z = self.encoder_log_var.get_output_for(encoder) z = self.sampler.get_output_for([mu_z, log_var_z]) decoder = self.decoder.get_output_for( self.concat_yz.get_output_for([label_input, z])) reconstruct = self.decoder_x.get_output_for(decoder) l_x = objectives.binary_crossentropy(reconstruct, image_input).sum(1) l_z = ((mu_z**2 + T.exp(log_var_z) - 1 - log_var_z) * 0.5).sum(1) cost_L = l_x + l_z return cost_L
def build_functions(self, deterministic=False): l_out = self.layer_output x_sym = T.lmatrix() y_sym = T.lvector() output = lasagne.layers.get_output(l_out, x_sym, deterministic=deterministic) pred = output.argmax(-1) #loss = objectives.categorical_crossentropy( output, y_sym ).mean() loss = objectives.binary_crossentropy(output, y_sym).mean() params = lasagne.layers.get_all_params(l_out) acc = T.mean(T.eq(output, y_sym)) #grad = T.grad( loss, params ) #updates = lasagne.updates.sgd( grad, params, learning_rate=0.01 ) #updates = lasagne.updates.adam() updates = lasagne.updates.adam(loss, params) f_train = theano.function([x_sym, y_sym], [loss, acc], updates=updates) f_train_pred = theano.function([x_sym, y_sym], [loss, acc, output], updates=updates) f_val = theano.function([x_sym, y_sym], [loss, acc]) f_predict = theano.function([x_sym], pred) f_test_predict = theano.function([x_sym], output) self.functions = Chibi_atlas({ 'train': f_train, 'train_predict': f_train_pred, 'val': f_val, 'predict': f_predict, 'test_predict': f_test_predict, }) return self.functions
def __init__( self, input_size, layers_config=[(64, 8, 2, 'valid'), (128, 3, 2, 'same')], code_layer_size=2, batch_norm=True, nonlinearity=rectify ): """"This class is made to support a variable number of layers. :type input_size: tuple of int :param input_size: Shape of the input i.e (None, 1, 28, 28) Means that it will have a defined at runtime amount of examples with one channel and of size 28 x 28. :type layers_config: list of tuples of ints :param layers_config: Configuration of the net. i.e. [(64, 5, 2, 'valid'), (32, 3, None, 'same')] Means the first layers will output 64 feature maps, use filters of size of 5 and be followed by a max-pooling layer of with a pool-size of 2. The second layer will output 32 feature maps, use filters of size 3 and will not be followed by a pooling layer. The 4th param is the padding. see: http://lasagne.readthedocs.org/en/latest/modules/layers/conv.html#lasagne.layers.Conv2DLayer :type code_layer_size: int :param code_layer_size: Determine the size of the code layer. :type batch_norm: bool :param batch_norm: If True, batch-normalization will be used. Otherwise, bias will be used. :type nonlinearity: Lasagne.nonlinearities :param nonlinearity: Define the activation function to use """ def bias_plus_nonlinearity(l, bias, nl): l = bias(l) l = NonlinearityLayer(l, nonlinearity=nl) return l self.x = T.tensor4('inputs') # the data is presented as rasterized images self.normalization_layer = BatchNormLayer if batch_norm else BiasLayer self.nonlinearity = nonlinearity self.code_layer_size = code_layer_size self.network_config_string = "" l = InputLayer(input_var=self.x, shape=input_size) invertible_layers = [] # Used to keep track of layers that will be inverted in the decoding phase """" Encoding """ for layer in layers_config: l = Conv2DLayer(l, num_filters=layer[0], filter_size=layer[1], nonlinearity=None, b=None, W=lasagne.init.GlorotUniform(), pad=layer[3]) invertible_layers.append(l) self.network_config_string += "(" + str(layer[0]) + ")" + str(layer[1]) + "c" print(l.output_shape) bias_plus_nonlinearity(l, self.normalization_layer, self.nonlinearity) if layer[2] is not None: # then we add a pooling layer l = MaxPool2DLayer(l, layer[2]) invertible_layers.append(l) self.network_config_string += "-" + str(layer[2]) + "p" print(l.output_shape) self.network_config_string += "-" # l = DenseLayer(l, num_units=l.output_shape[1], nonlinearity=None, b=None) # invertible_layers.append(l) # self.network_config_string += str(l.output_shape[1]) + "fc" # print(l.output_shape) l = DenseLayer(l, num_units=self.code_layer_size, nonlinearity=None, b=None) invertible_layers.append(l) self.network_config_string += str(self.code_layer_size) + "fc" print(l.output_shape) # Inspired by Hinton (2006) paper, the code layer is linear which allows to retain more info especially with # with code layers of small dimension l = bias_plus_nonlinearity(l, self.normalization_layer, linear) self.code_layer = get_output(l) """ Decoding """ # l = InverseLayer(l, invertible_layers.pop()) # Inverses the fully connected layer # print(l.output_shape) # l = bias_plus_nonlinearity(l, self.normalization_layer, self.nonlinearity) l = InverseLayer(l, invertible_layers.pop()) # Inverses the fully connected layer print(l.output_shape) l = bias_plus_nonlinearity(l, self.normalization_layer, self.nonlinearity) for i, layer in enumerate(layers_config[::-1]): if layer[2] is not None: l = InverseLayer(l, invertible_layers.pop()) # Inverse a max-pooling layer print(l.output_shape) l = InverseLayer(l, invertible_layers.pop()) # Inverse the convolutional layer print(l.output_shape) # last layer is a sigmoid because its a reconstruction and pixels values are between 0 and 1 nl = sigmoid if i is len(layers_config) - 1 else self.nonlinearity l = bias_plus_nonlinearity(l, self.normalization_layer, nl) # its own bias_nonlinearity self.network = l self.reconstruction = get_output(self.network) self.params = get_all_params(self.network, trainable=True) # Sum on axis 1-2-3 as they represent the image (channels, height, width). This means that we obtain the binary # _cross_entropy for every images of the mini-batch which we then take the mean. self.fine_tune_cost = T.sum(binary_crossentropy(self.reconstruction, self.x), axis=(1, 2, 3)).mean() self.test_cost = T.sum(binary_crossentropy(get_output(self.network), self.x), axis=(1,2,3)).mean()
def loss(x, t): return aggregate(binary_crossentropy(x, t))
def event_span_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats): print("Building model with 1D Convolution") vocab_size = wordEmbeddings.shape[1] wordDim = wordEmbeddings.shape[0] kw = 2 num_filters = seqlen-kw+1 stride = 1 #important context words as channels #CNN_sentence config filter_size=wordDim pool_size=seqlen-filter_size+1 input = InputLayer((None, seqlen, num_feats),input_var=input_var) batchsize, _, _ = input.input_var.shape emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) #emb.params[emb.W].remove('trainable') #(batchsize, seqlen, wordDim) #print get_output_shape(emb) reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats*wordDim)) #print get_output_shape(reshape) conv1d = Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()) #nOutputFrame = num_flters, #nOutputFrameSize = (num_feats*wordDim-filter_size)/stride +1 #print get_output_shape(conv1d) conv1d = DimshuffleLayer(conv1d, (0,2,1)) #print get_output_shape(conv1d) pool_size=num_filters maxpool = MaxPool1DLayer(conv1d, pool_size=pool_size) #print get_output_shape(maxpool) #forward = FlattenLayer(maxpool) #print get_output_shape(forward) hid = DenseLayer(maxpool, num_units=args.hiddenDim, nonlinearity=sigmoid) network = DenseLayer(hid, num_units=2, nonlinearity=softmax) prediction = get_output(network) loss = T.mean(binary_crossentropy(prediction,target_var)) lambda_val = 0.5 * 1e-4 layers = {emb:lambda_val, conv1d:lambda_val, hid:lambda_val, network:lambda_val} penalty = regularize_layer_params_weighted(layers, l2) loss = loss + penalty params = get_all_params(network, trainable=True) if args.optimizer == "sgd": updates = sgd(loss, params, learning_rate=args.step) elif args.optimizer == "adagrad": updates = adagrad(loss, params, learning_rate=args.step) elif args.optimizer == "adadelta": updates = adadelta(loss, params, learning_rate=args.step) elif args.optimizer == "nesterov": updates = nesterov_momentum(loss, params, learning_rate=args.step) elif args.optimizer == "rms": updates = rmsprop(loss, params, learning_rate=args.step) elif args.optimizer == "adam": updates = adam(loss, params, learning_rate=args.step) else: raise "Need set optimizer correctly" test_prediction = get_output(network, deterministic=True) test_loss = T.mean(binary_crossentropy(test_prediction,target_var)) train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True) test_acc = T.mean(binary_accuracy(test_prediction, target_var)) val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True) return train_fn, val_fn, network
def distance_capped_binary_crossentropy(preds, targets, distance): loss = binary_crossentropy(preds, targets) mask = T.gt(T.abs_(preds - targets), distance) return loss * mask
W=GlorotUniform(), nonlinearity=rectify) print(get_output_shape(lae_deconv7)) lae_unpool7 = Upscale2DLayer(lae_deconv7, (2, 2)) print(get_output_shape(lae_unpool7)) convae = Conv2DLayerFast(lae_unpool7, 1, (5, 5), pad=(2, 2), W=GlorotUniform(), nonlinearity=sigmoid) print(get_output_shape(convae)) print('[ConvAE] define loss, optimizer, and compile') Yae_pred_ = get_output(convae) loss_ = binary_crossentropy(Yae_pred_, Yae_) loss_ = loss_.mean() params_ = lasagne.layers.get_all_params(convae, trainable=True) updates_ = rmsprop(loss_, params_, learning_rate=confae['lr']) train_ae_fn = theano.function([Xae_, Yae_], loss_, updates=updates_) pred_ae_fn = theano.function([Xae_], Yae_pred_) ################## if confnet['is_aug']: ddatagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization= False, # divide inputs by std of the dataset
def get_options(batchsize, nepochs, plotevery, learningrate, normalizegrads, clipgrads, enabledebug, optimizer, yzeromean, yunitvar, noshuffle, nobatchnorm, remove5koutliers, coulombdim, datadir, outputdir): global batch_size batch_size = batchsize global epochs epochs = nepochs print("Changing pwd to {}".format(outputdir)) os.chdir(outputdir) mydir = os.path.join(os.getcwd(), datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) os.makedirs(mydir) os.chdir(mydir) app_name = sys.argv[0] global logger logger = get_logger(app_name=app_name, logfolder=mydir) # Load dataset w, h = coulombdim X, Y = load_data(datadir + os.sep + "coulomb.txt", datadir + os.sep + "energies.txt", w=w, h=h) if remove5koutliers: from get_idxs_to_keep import get_idxs_to_keep idxs = get_idxs_to_keep(datadir + os.sep + "energies.txt") X = X[idxs, :] Y = Y[idxs, :] logger.info("REMOVING 5k outliers.") Y, Y_mean, Y_std, Y_binarized = preprocess_targets(Y, zero_mean=yzeromean, unit_var=yunitvar) [X_train, X_test], [Y_train, Y_test], splits = get_data_splits(X, Y, splits=[90, 10]) [Y_binarized_train, Y_binarized_test] = np.split(Y_binarized, splits)[:-1] np.savez('Y_vals.npz', Y_train=Y_train, Y_test=Y_test, Y_binarized_test=Y_binarized_test, Y_binarized_train=Y_binarized_train, Y_mean=Y_mean, Y_std=Y_std) np.savez('X_vals.npz', X_train=X_train, X_test=X_test) dataDim = X.shape[1:] assert dataDim == ( w, h ), "The dimensions of data you have passed {} and the ones after loading datafile {} don't match !".format( (w, h), dataDim) outputDim = Y.shape[1] datapoints = len(X_train) print("datapoints = %d" % datapoints) # # making the datapoints shared variables # X_train = make_shared(X_train) # X_test = make_shared(X_test) # Y_train = make_shared(Y_train) # Y_test = make_shared(Y_test) # Y_binarized_train = make_shared(Y_binarized_train) # Y_binarized_test = make_shared(Y_binarized_test) # TODO !!!!I am here # print("Train set size {}, Train set (labelled) size {}, Test set size {}," + # "Validation set size {}".format( # train_set[0].size,train_set_labeled[0].size, # test_set[0].size, valid_set[0].size)) eigen_value_count = outputDim # Defining the model now. th_coulomb = T.ftensor3() th_energies = T.fmatrix() th_energies_bin = T.fmatrix() th_learningrate = T.fscalar() l_input = InputLayer(shape=(None, dataDim[0], dataDim[1]), input_var=th_coulomb, name="Input") l_input = FlattenLayer(l_input, name="FlattenInput") l_pseudo_bin = DenseLayer(l_input, num_units=2000, nonlinearity=sigmoid, name="PseudoBinarized") if not nobatchnorm: l_pseudo_bin = batch_norm(l_pseudo_bin) l_h1 = [] l_h2 = [] l_realOut = [] l_binOut = [] for branch_num in range(eigen_value_count): l_h1.append( DenseLayer(l_pseudo_bin, num_units=1000, nonlinearity=rectify, name="hidden_1_%d" % branch_num)) l_h2.append( DenseLayer(l_h1[-1], num_units=400, nonlinearity=rectify, name="hidden_2_%d" % branch_num)) l_realOut.append( DenseLayer(l_h2[-1], num_units=1, nonlinearity=linear, name="realOut_%d" % branch_num)) l_binOut.append( DenseLayer(l_h2[-1], num_units=1, nonlinearity=sigmoid, name="binOut")) l_realOut_cat = ConcatLayer(l_realOut, name="real_concat") l_binOut_cat = ConcatLayer(l_binOut, name="bin_concat") l_output = ElemwiseMergeLayer([l_binOut_cat, l_realOut_cat], T.mul, name="final_output") energy_output = get_output(l_output, deterministic=False) binary_output = get_output(l_binOut_cat, deterministic=False) # get deterministic output for validation energy_output_det = get_output(l_output, deterministic=True) binary_output_det = get_output(l_binOut_cat, deterministic=True) loss_real = T.mean(abs(energy_output - th_energies)) loss_binary = T.mean(binary_crossentropy(binary_output, th_energies_bin)) loss = loss_real + loss_binary # get loss output for validation loss_real_det = T.mean(abs(energy_output_det - th_energies)) loss_binary_det = T.mean( binary_crossentropy(binary_output_det, th_energies_bin)) loss_det = loss_real_det + loss_binary_det params = get_all_params(l_output, trainable=True) grad = T.grad(loss, params) if normalizegrads is not None: grad = lasagne.updates.total_norm_constraint(grad, max_norm=normalizegrads) if clipgrads is not None: grad = [T.clip(g, -clipgrads, clipgrads) for g in grad] optimization_algo = get_optimizer[optimizer] # updates = optimization_algo(grad, params, learning_rate=learningrate) updates = optimization_algo(grad, params, learning_rate=th_learningrate) train_fn = theano.function( [th_coulomb, th_energies, th_energies_bin, th_learningrate], [loss, energy_output], updates=updates, allow_input_downcast=True) get_grad = theano.function([th_coulomb, th_energies, th_energies_bin], grad) # get_updates = theano.function([th_data, th_labl], [updates.values()]) # val_fn = theano.function([th_coulomb, th_energies, th_energies_bin], [loss, energy_output], updates=updates, allow_input_downcast=True) val_fn = theano.function([th_coulomb, th_energies, th_energies_bin], [loss_det, energy_output_det], allow_input_downcast=True) datapoints = len(X_train) print("datapoints = %d" % datapoints) with open(os.path.join(mydir, "data.txt"), "w") as f: script = app_name for elem in [ "meta_seed", "dataDim", "batch_size", "epochs", "learningrate", "normalizegrads", "clipgrads", "enabledebug", "optimizer", "plotevery", "noshuffle", "nobatchnorm", "remove5koutliers", "coulombdim", "script", "datadir" ]: f.write("{} : {}\n".format(elem, eval(elem))) train_loss_lowest = np.inf test_loss_lowest = np.inf row_norms = np.linalg.norm(X_train, axis=-1) for epoch in range(epochs): batch_start = 0 train_loss = [] if learningrate == None: if epoch < 50: learning_rate = 0.0001 elif epoch < 100: learning_rate = 0.00001 elif epoch < 500: learning_rate = 0.000001 else: learning_rate = 0.0000001 else: learning_rate = eval(learningrate) if isinstance(learning_rate, float): pass elif isinstance(learning_rate, list): for epch, lrate in learning_rate: # ensure that last epoch is float("inf") if epoch <= epch: learning_rate = lrate break else: raise RuntimeError( "Invalid learning rate.Either \n 1) Float or 2) List [[epch, lrate],...,[float('inf'), lrate]]" ) logger.debug("learning rate {}".format(learning_rate)) indices = np.random.permutation(datapoints) minibatches = int(datapoints / batch_size) if not noshuffle: logger.debug("Shuffling Started.") X_train = coulomb_shuffle(X_train, row_norms) logger.debug("Shuffling complete.") for minibatch in range(minibatches): train_idxs = indices[batch_start:batch_start + batch_size] X_train_batch = X_train[train_idxs, :] Yr_train_batch = Y_train[train_idxs, :] Yb_train_batch = Y_binarized_train[train_idxs, :] train_output = train_fn(X_train_batch, Yr_train_batch, Yb_train_batch, learning_rate) batch_start = batch_start + batch_size train_loss.append(train_output[0]) if enabledebug: # Debugging information batchIdx = epoch * minibatches + minibatch fn = 'params_{:>010d}'.format() # saving params param_values = get_all_param_values(l_output) param_norm = np.linalg.norm( np.hstack([param.flatten() for param in param_values])) gradients = get_grad(X_train_batch, Yr_train_batch, Yb_train_batch) gradient_norm = np.linalg.norm( np.hstack([gradient.flatten() for gradient in gradients])) logger.debug( "Epoch : {:0>4} minibatch {:0>3} Gradient Norm : {:>0.4}, Param Norm : {:>0.4} GradNorm/ParamNorm : {:>0.4} (Values from Prev. Minibatch) Train loss {}" .format(epoch, minibatch, gradient_norm, param_norm, gradient_norm / param_norm, train_loss[-1])) param_names = [ param.__str__() for param in get_all_params(l_output) ] np.savez(fn + '.npz', **dict(zip(param_names, param_values))) np.savez('Y_train_pred_{}.npz'.format(batchIdx), Y_train_pred=train_output[1]) if train_loss[-1] < train_loss_lowest: train_loss_lowest = train_loss[-1] np.savez('Y_train_pred_best.npz', Y_train_pred=train_output[1]) logger.debug( "Found the best training prediction (Y_train_pred_best) at %d epoch %d minibatch" % (epoch, minibatch)) if np.isnan(gradient_norm): pdb.set_trace() if (epoch % plotevery == 0): logger.info("Epoch {} of {}".format(epoch, epochs)) fn = 'params_{:>03d}'.format(epoch) # saving params param_values = get_all_param_values(l_output) param_norm = np.linalg.norm( np.hstack([param.flatten() for param in param_values])) param_names = [ param.__str__() for param in get_all_params(l_output) ] if not enabledebug: np.savez(fn + '.npz', **dict(zip(param_names, param_values))) np.savez('Y_train_pred_{}.npz'.format(epoch), Y_train_pred=train_output[1]) mean_train_loss = np.mean(train_loss) if mean_train_loss < train_loss_lowest: train_loss_lowest = mean_train_loss np.savez('Y_train_pred_best.npz', Y_train_pred=train_output[1]) logger.info( "Found the best training prediction (Y_train_pred_best) at %d epoch" % epoch) gradients = get_grad(X_train_batch, Yr_train_batch, Yb_train_batch) gradient_norm = np.linalg.norm( np.hstack([gradient.flatten() for gradient in gradients])) logger.info( " Gradient Norm : {:>0.4}, Param Norm : {:>0.4} GradNorm/ParamNorm : {:>0.4} " .format(gradient_norm, param_norm, gradient_norm / param_norm)) logger.info(" Train loss {:>0.4}".format(np.mean(train_loss))) test_loss, test_prediction = val_fn(X_test, Y_test, Y_binarized_test) np.savez('Y_test_pred_{}.npz'.format(epoch), Y_test_pred=test_prediction) logger.info(" Test loss {}".format(test_loss)) if test_loss < test_loss_lowest: test_loss_lowest = test_loss np.savez('Y_test_pred_best.npz', Y_test_pred=test_prediction) logger.info( "Found the best test prediction (Y_test_pred_best) at %d epoch" % epoch)
def load_weights(): model_name = 'model_weights.npz' with np.load(model_name) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] set_all_param_values([RoadSegment], param_values) def save_weights(): np.savez('model_weights.npz', *get_all_param_values([RoadSegment])) road_segment = theano.function(inputs=[X], outputs=frs, allow_input_downcast=True) loss = binary_crossentropy(rs, Y) loss = loss.mean() IoU = T.and_(T.ge(rs, 0.5),T.ge(Y,0.5)).sum(axis=(1,2)) * \ T.minimum(T.inv(T.or_(T.ge(rs, 0.5),T.ge(Y,0.5)).sum(axis=(1,2))), 128.0 * 256.0) IoU = IoU.mean() test_loss = binary_crossentropy(frs, Y) test_loss = test_loss.mean() test_IoU = T.and_(T.ge(frs, 0.5),T.ge(Y,0.5)).sum(axis=(1,2)) * \ T.minimum(T.inv(T.or_(T.ge(frs, 0.5),T.ge(Y,0.5)).sum(axis=(1,2))), 128.0 * 256.0) test_IoU = test_IoU.mean() loss_function = theano.function(inputs=[X, Y, P], outputs=[loss, IoU],
def saliency_map(input, output, pred, X): score = -binary_crossentropy(output[:, pred], np.array([1])).sum() return np.abs(T.grad(score, input).eval({input: X}))
def event_span_classifier(args, input_var, input_mask_var, target_var, wordEmbeddings, seqlen): print("Building model with LSTM") vocab_size = wordEmbeddings.shape[1] wordDim = wordEmbeddings.shape[0] GRAD_CLIP = wordDim args.lstmDim = 150 input = InputLayer((None, seqlen),input_var=input_var) batchsize, seqlen = input.input_var.shape input_mask = InputLayer((None, seqlen),input_var=input_mask_var) emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) #emb.params[emb_1.W].remove('trainable') lstm = LSTMLayer(emb, num_units=args.lstmDim, mask_input=input_mask, grad_clipping=GRAD_CLIP, nonlinearity=tanh) lstm_back = LSTMLayer( emb, num_units=args.lstmDim, mask_input=input_mask, grad_clipping=GRAD_CLIP, nonlinearity=tanh, backwards=True) slice_forward = SliceLayer(lstm, indices=-1, axis=1) # out_shape (None, args.lstmDim) slice_backward = SliceLayer(lstm_back, indices=0, axis=1) # out_shape (None, args.lstmDim) concat = ConcatLayer([slice_forward, slice_backward]) hid = DenseLayer(concat, num_units=args.hiddenDim, nonlinearity=sigmoid) network = DenseLayer(hid, num_units=2, nonlinearity=softmax) prediction = get_output(network) loss = T.mean(binary_crossentropy(prediction,target_var)) lambda_val = 0.5 * 1e-4 layers = {emb:lambda_val, lstm:lambda_val, hid:lambda_val, network:lambda_val} penalty = regularize_layer_params_weighted(layers, l2) loss = loss + penalty params = get_all_params(network, trainable=True) if args.optimizer == "sgd": updates = sgd(loss, params, learning_rate=args.step) elif args.optimizer == "adagrad": updates = adagrad(loss, params, learning_rate=args.step) elif args.optimizer == "adadelta": updates = adadelta(loss, params, learning_rate=args.step) elif args.optimizer == "nesterov": updates = nesterov_momentum(loss, params, learning_rate=args.step) elif args.optimizer == "rms": updates = rmsprop(loss, params, learning_rate=args.step) elif args.optimizer == "adam": updates = adam(loss, params, learning_rate=args.step) else: raise "Need set optimizer correctly" test_prediction = get_output(network, deterministic=True) test_loss = T.mean(binary_crossentropy(test_prediction,target_var)) train_fn = theano.function([input_var, input_mask_var,target_var], loss, updates=updates, allow_input_downcast=True) test_acc = T.mean(binary_accuracy(test_prediction, target_var)) val_fn = theano.function([input_var, input_mask_var, target_var], [test_loss, test_acc], allow_input_downcast=True) return train_fn, val_fn, network
def train(options): # -------- setup options and data ------------------ np.random.seed(options['seed']) # Load options host = socket.gethostname() # get computer hostname start_time = datetime.datetime.now().strftime("%y-%m-%d-%H-%M") model = importlib.import_module(options['model_file']) # ---------- build model and compile --------------- input_batch = T.tensor4() # input image sequences target = T.tensor4() # target image print('Build model...') model = model.Model(**options['modelOptions']) print('Compile ...') net, outputs, filters = model.build_model(input_batch) # compute loss outputs = get_output(outputs + [filters]) output_frames = outputs[:-1] output_filter = outputs[-1] train_losses = [] for i in range(options['modelOptions']['target_seqlen']): output_frame = output_frames[i] if options['loss'] == 'squared_error': frame_loss = squared_error(output_frame, target[:, [i], :, :]) elif options['loss'] == 'binary_crossentropy': # Clipping to avoid NaN's in binary crossentropy: https://github.com/Lasagne/Lasagne/issues/436 output_frame = T.clip(output_frame, np.finfo(np.float32).eps, 1-np.finfo(np.float32).eps) frame_loss = binary_crossentropy(output_frame, target[:,[i],:,:]) else: assert False train_losses.append(aggregate(frame_loss)) train_loss = sum(train_losses) / options['modelOptions']['target_seqlen'] # update sh_lr = theano.shared(lasagne.utils.floatX(options['learning_rate'])) # to allow dynamic learning rate layers = get_all_layers(net) all_params = get_all_params(layers, trainable = True) updates = adam(train_loss, all_params, learning_rate=sh_lr) _train = theano.function([input_batch, target], train_loss, updates=updates, allow_input_downcast=True) _test = theano.function([input_batch, target], [train_loss, output_filter] + output_frames, allow_input_downcast=True) # ------------ data setup ---------------- print('Prepare data...') dataset = importlib.import_module(options['dataset_file']) dh = dataset.DataHandler(**options['datasetOptions']) # ------------ training setup ---------------- if options['pretrained_model_path'] is not None: checkpoint = pickle.load(open(options['pretrained_model_path'], 'rb')) model_values = checkpoint['model_values'] # overwrite the values of model parameters lasagne.layers.set_all_param_values(layers, model_values) history_train = checkpoint['history_train'] start_epoch = checkpoint['epoch'] + 1 options['batch_size'] = checkpoint['options']['batch_size'] sh_lr.set_value(floatX(checkpoint['options']['learning_rate'])) else: start_epoch = 0 history_train = [] # ------------ actual training ---------------- print 'Start training ...' input_seqlen = options['modelOptions']['input_seqlen'] for epoch in range(start_epoch, start_epoch + options['num_epochs']): epoch_start_time = time.time() history_batch = [] for batch_index in range(0, options['batches_per_epoch']): batch = dh.GetBatch() # generate data on the fly if options['dataset_file'] == 'datasets.stereoCarsColor': batch_input = batch[..., :input_seqlen].squeeze(axis=4) # first frames batch_target = batch[..., input_seqlen:].squeeze(axis=4) # last frame else: batch_input = batch[..., :input_seqlen].transpose(0,4,2,3,1).squeeze(axis=4) # first frames batch_target = batch[..., input_seqlen:].transpose(0,4,2,3,1).squeeze(axis=4) # last frame # train loss_train = _train(batch_input, batch_target) history_batch.append(loss_train) print("Epoch {} of {}, batch {} of {}, took {:.3f}s".format(epoch + 1, options['num_epochs'], batch_index+1, options['batches_per_epoch'], time.time() - epoch_start_time)) print(" training loss:\t{:.6f}".format(loss_train.item())) # clear the screen display.clear_output(wait=True) # print statistics history_train.append(np.mean(history_batch)) history_batch = [] print("Epoch {} of {}, took {:.3f}s".format(epoch + 1, options['num_epochs'], time.time() - epoch_start_time)) print(" training loss:\t{:.6f}".format(history_train[epoch].item())) # set new learning rate (maybe this is unnecessary with adam updates) if (epoch+1) % options['decay_after'] == 0: options['learning_rate'] = sh_lr.get_value() * 0.5 print "New LR:", options['learning_rate'] sh_lr.set_value(floatX(options['learning_rate'])) # save the model if (epoch+1) % options['save_after'] == 0: save_model(layers, epoch, history_train, start_time, host, options) print("Model saved")
def saliency_map(input, output, pred, Xb): import theano.tensor as T from lasagne.objectives import binary_crossentropy score = -binary_crossentropy(output[:, pred], np.array([1])).sum() heat_map_ = np.abs(T.grad(score, input).eval({input: Xb})) return heat_map_
allow_incomplete=True, include_all=True, skip_probability=0.25, offset_probability=0, n_rectangular_segments=N_SEGMENTS, rectangular_kwargs={'format': 'changepoints [0,1]'} ) net_dict = dict( save_plot_interval=SAVE_PLOT_INTERVAL, # loss_function=partial(ignore_inactive, loss_func=mdn_nll, seq_length=SEQ_LENGTH), # loss_function=lambda x, t: mdn_nll(x, t).mean(), # loss_function=lambda x, t: (mse(x, t) * MASK).mean(), # loss_function=lambda x, t: mse(x, t).mean(), loss_function=lambda x, t: binary_crossentropy(x, t).mean(), # loss_function=partial(scaled_cost, loss_func=mse), # loss_function=ignore_inactive, # loss_function=partial(scaled_cost3, ignore_inactive=False), # updates_func=momentum, updates_func=clipped_nesterov_momentum, updates_kwargs={'clip_range': (0, 10)}, learning_rate=1e-2, learning_rate_changes_by_iteration={ 1000: 1e-3, 5000: 1e-4 }, do_save_activations=True, auto_reshape=False, # plotter=CentralOutputPlotter # plotter=Plotter(n_seq_to_plot=32)
def load_weights(): model_name = 'model/' + dataset[5:] + '_model.npz' with np.load(model_name) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] set_all_param_values([FloorSegment, EdgeRegion], param_values) def save_weights(): np.savez('model_weights.npz', *get_all_param_values([RoadSegment])) road_segment = theano.function(inputs = [X], outputs = frs, allow_input_downcast = True) loss = binary_crossentropy(rs,Y) loss = loss.mean() IoU = T.and_(T.ge(rs, 0.5),T.ge(Y,0.5)).sum(axis=(1,2)) * \ T.minimum(T.inv(T.or_(T.ge(rs, 0.5),T.ge(Y,0.5)).sum(axis=(1,2))), 120.0 * 160.0) IoU = IoU.mean() loss_function = theano.function(inputs = [X,Y,P], outputs = [loss,IoU], allow_input_downcast = True) params = get_all_params(RoadSegment, trainable=True) updates = adam(loss, params, learning_rate = lr)
def compile_update_softmax(nnet, inputs, targets): """ create a softmax loss for network given in argument """ floatX = Cfg.floatX C = Cfg.C final_layer = nnet.all_layers[-1] trainable_params = lasagne.layers.get_all_params(final_layer, trainable=True) # Regularization if Cfg.weight_decay: l2_penalty = (floatX(0.5) / C) * get_l2_penalty(nnet) else: l2_penalty = T.cast(0, dtype='float32') # Backpropagation prediction = lasagne.layers.get_output(final_layer, inputs=inputs, deterministic=False) if Cfg.ad_experiment: train_loss = T.mean(l_objectives.binary_crossentropy( prediction.flatten(), targets), dtype='float32') train_acc = T.mean(l_objectives.binary_accuracy( prediction.flatten(), targets), dtype='float32') else: train_loss = T.mean(l_objectives.categorical_crossentropy( prediction, targets), dtype='float32') train_acc = T.mean(T.eq(T.argmax(prediction, axis=1), targets), dtype='float32') train_obj = T.cast(train_loss + l2_penalty, dtype='float32') updates = get_updates(nnet, train_obj, trainable_params, solver=nnet.solver) nnet.backprop = theano.function([inputs, targets], [train_obj, train_acc], updates=updates) # Forwardpropagation test_prediction = lasagne.layers.get_output(final_layer, inputs=inputs, deterministic=True) if Cfg.ad_experiment: test_loss = T.mean(l_objectives.binary_crossentropy( test_prediction.flatten(), targets), dtype='float32') test_acc = T.mean(l_objectives.binary_accuracy( test_prediction.flatten(), targets), dtype='float32') else: test_loss = T.mean(l_objectives.categorical_crossentropy( test_prediction, targets), dtype='float32') test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), targets), dtype='float32') test_obj = T.cast(test_loss + l2_penalty, dtype='float32') nnet.forward = theano.function( [inputs, targets], [test_obj, test_acc, test_prediction, l2_penalty, test_loss])
# target_stats={ # 'mean': np.array([ 0.04066789, 0.01881946, # 0.24639061, 0.17608672, 0.10273963], # dtype=np.float32), # 'std': np.array([ 0.11449792, 0.07338708, # 0.26608968, 0.33463112, 0.21250485], # dtype=np.float32)} ) N = 50 net_dict = dict( save_plot_interval=SAVE_PLOT_INTERVAL, # loss_function=partial(ignore_inactive, loss_func=mdn_nll, seq_length=SEQ_LENGTH), # loss_function=lambda x, t: mdn_nll(x, t).mean(), # loss_function=lambda x, t: mse(x, t).mean(), loss_function=lambda x, t: binary_crossentropy(x, t).mean(), # loss_function=partial(scaled_cost, loss_func=mse), # loss_function=ignore_inactive, # loss_function=partial(scaled_cost3, ignore_inactive=False), updates_func=momentum, learning_rate=1e-4, learning_rate_changes_by_iteration={ # 200: 1e-2, # 400: 1e-3, # 800: 1e-4 # 500: 1e-3 # 4000: 1e-03, # 6000: 5e-06, # 7000: 1e-06 # 2000: 5e-06 # 3000: 1e-05
def build_network_2dconv(args, input_var, target_var, wordEmbeddings, maxlen=60): print("Building model with 2D Convolution") vocab_size = wordEmbeddings.shape[1] wordDim = wordEmbeddings.shape[0] num_filters = 100 stride = 1 # CNN_sentence config filter_size = (3, wordDim) pool_size = (maxlen - 3 + 1, 1) input = InputLayer((None, maxlen), input_var=input_var) batchsize, seqlen = input.input_var.shape emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) emb.params[emb.W].remove("trainable") # (batchsize, maxlen, wordDim) reshape = ReshapeLayer(emb, (batchsize, 1, maxlen, wordDim)) conv2d = Conv2DLayer( reshape, num_filters=num_filters, filter_size=(filter_size), stride=stride, nonlinearity=rectify, W=GlorotUniform(), ) # (None, 100, 34, 1) maxpool = MaxPool2DLayer(conv2d, pool_size=pool_size) # (None, 100, 1, 1) forward = FlattenLayer(maxpool) # (None, 100) #(None, 50400) hid = DenseLayer(forward, num_units=args.hiddenDim, nonlinearity=sigmoid) network = DenseLayer(hid, num_units=2, nonlinearity=softmax) prediction = get_output(network) loss = T.mean(binary_crossentropy(prediction, target_var)) lambda_val = 0.5 * 1e-4 layers = {conv2d: lambda_val, hid: lambda_val, network: lambda_val} penalty = regularize_layer_params_weighted(layers, l2) loss = loss + penalty params = get_all_params(network, trainable=True) if args.optimizer == "sgd": updates = sgd(loss, params, learning_rate=args.step) elif args.optimizer == "adagrad": updates = adagrad(loss, params, learning_rate=args.step) elif args.optimizer == "adadelta": updates = adadelta(loss, params, learning_rate=args.step) elif args.optimizer == "nesterov": updates = nesterov_momentum(loss, params, learning_rate=args.step) elif args.optimizer == "rms": updates = rmsprop(loss, params, learning_rate=args.step) elif args.optimizer == "adam": updates = adam(loss, params, learning_rate=args.step) else: raise "Need set optimizer correctly" test_prediction = get_output(network, deterministic=True) test_loss = T.mean(binary_crossentropy(test_prediction, target_var)) train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True) test_acc = T.mean(binary_accuracy(test_prediction, target_var)) val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True) return train_fn, val_fn
def safe_binary_crossentropy(predictions, targets, eps=1e-4): # add eps for predictions that are smaller than eps predictions = predictions + T.le(predictions, eps) * eps # remove eps for predictions that are larger than 1 - eps predictions = predictions - T.ge(predictions, 1 - eps) * eps return binary_crossentropy(predictions, targets)
def run(get_model, model_name): train_stream = ServerDataStream( ('cases', 'image_features', 'image_targets', 'multiplier'), False, hwm=10) valid_stream = ServerDataStream( ('cases', 'image_features', 'image_targets', 'multiplier'), False, hwm=10, port=5558) input_var = tensor.tensor4('image_features') target_var = tensor.tensor4('image_targets') multiply_var = tensor.matrix('multiplier') multiply_var = T.addbroadcast(multiply_var, 1) test_prediction, prediction, params = get_model(input_var, target_var, multiply_var) loss = binary_crossentropy(prediction, target_var).mean() loss.name = 'loss' valid_error = T.neq((test_prediction > 0.5) * 1., target_var).mean() valid_error.name = 'error' scale = Scale(0.1) algorithm = GradientDescent( cost=loss, parameters=params, step_rule=scale, #step_rule=Adam(), on_unused_sources='ignore') host_plot = 'http://localhost:5006' extensions = [ Timing(), TrainingDataMonitoring([loss], after_epoch=True), DataStreamMonitoring(variables=[loss, valid_error], data_stream=valid_stream, prefix="valid"), Plot('%s %s %s' % (model_name, datetime.date.today(), time.strftime('%H:%M')), channels=[['loss', 'valid_loss'], ['valid_error']], after_epoch=True, server_url=host_plot), Printing(), # Checkpoint('train'), FinishAfter(after_n_epochs=10) ] main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions) cg = ComputationGraph(test_prediction) while True: main_loop.run() scale.learning_rate.set_value( numpy.float32(scale.learning_rate.get_value() * 0.7)) numpy.savez('best_weights.npz', [param.get_value() for param in cg.shared_variables])
def multi_task_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats, lambda_val=0.5 * 1e-4): print("Building multi task model with 1D Convolution") vocab_size = wordEmbeddings.shape[1] wordDim = wordEmbeddings.shape[0] kw = 2 num_filters = seqlen - kw + 1 stride = 1 filter_size = wordDim pool_size = num_filters input = InputLayer((None, seqlen, num_feats), input_var=input_var) batchsize, _, _ = input.input_var.shape #span emb1 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape1 = ReshapeLayer(emb1, (batchsize, seqlen, num_feats * wordDim)) conv1d_1 = DimshuffleLayer( Conv1DLayer(reshape1, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh, W=GlorotUniform()), (0, 2, 1)) maxpool_1 = MaxPool1DLayer(conv1d_1, pool_size=pool_size) hid_1 = DenseLayer(maxpool_1, num_units=args.hiddenDim, nonlinearity=sigmoid) network_1 = DenseLayer(hid_1, num_units=2, nonlinearity=softmax) """ #DocTimeRel emb2 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape2 = ReshapeLayer(emb2, (batchsize, seqlen, num_feats*wordDim)) conv1d_2 = DimshuffleLayer(Conv1DLayer(reshape2, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_2 = MaxPool1DLayer(conv1d_2, pool_size=pool_size) hid_2 = DenseLayer(maxpool_2, num_units=args.hiddenDim, nonlinearity=sigmoid) network_2 = DenseLayer(hid_2, num_units=5, nonlinearity=softmax) """ #Type emb3 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape3 = ReshapeLayer(emb3, (batchsize, seqlen, num_feats * wordDim)) conv1d_3 = DimshuffleLayer( Conv1DLayer(reshape3, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh, W=GlorotUniform()), (0, 2, 1)) maxpool_3 = MaxPool1DLayer(conv1d_3, pool_size=pool_size) hid_3 = DenseLayer(maxpool_3, num_units=args.hiddenDim, nonlinearity=sigmoid) network_3 = DenseLayer(hid_3, num_units=4, nonlinearity=softmax) #Degree emb4 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape4 = ReshapeLayer(emb4, (batchsize, seqlen, num_feats * wordDim)) conv1d_4 = DimshuffleLayer( Conv1DLayer(reshape4, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh, W=GlorotUniform()), (0, 2, 1)) maxpool_4 = MaxPool1DLayer(conv1d_4, pool_size=pool_size) hid_4 = DenseLayer(maxpool_4, num_units=args.hiddenDim, nonlinearity=sigmoid) network_4 = DenseLayer(hid_4, num_units=4, nonlinearity=softmax) #Polarity emb5 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape5 = ReshapeLayer(emb5, (batchsize, seqlen, num_feats * wordDim)) conv1d_5 = DimshuffleLayer( Conv1DLayer(reshape5, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh, W=GlorotUniform()), (0, 2, 1)) maxpool_5 = MaxPool1DLayer(conv1d_5, pool_size=pool_size) hid_5 = DenseLayer(maxpool_5, num_units=args.hiddenDim, nonlinearity=sigmoid) network_5 = DenseLayer(hid_5, num_units=3, nonlinearity=softmax) #ContextualModality emb6 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape6 = ReshapeLayer(emb6, (batchsize, seqlen, num_feats * wordDim)) conv1d_6 = DimshuffleLayer( Conv1DLayer(reshape6, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh, W=GlorotUniform()), (0, 2, 1)) maxpool_6 = MaxPool1DLayer(conv1d_6, pool_size=pool_size) hid_6 = DenseLayer(maxpool_6, num_units=args.hiddenDim, nonlinearity=sigmoid) network_6 = DenseLayer(hid_6, num_units=5, nonlinearity=softmax) """ #ContextualAspect emb7 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape7 = ReshapeLayer(emb7, (batchsize, seqlen, num_feats*wordDim)) conv1d_7 = DimshuffleLayer(Conv1DLayer(reshape7, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_7 = MaxPool1DLayer(conv1d_7, pool_size=pool_size) hid_7 = DenseLayer(maxpool_7, num_units=args.hiddenDim, nonlinearity=sigmoid) network_7 = DenseLayer(hid_7, num_units=4, nonlinearity=softmax) """ """ #Permanence emb8 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape8 = ReshapeLayer(emb8, (batchsize, seqlen, num_feats*wordDim)) conv1d_8 = DimshuffleLayer(Conv1DLayer(reshape8, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_8 = MaxPool1DLayer(conv1d_8, pool_size=pool_size) hid_8 = DenseLayer(maxpool_8, num_units=args.hiddenDim, nonlinearity=sigmoid) network_8 = DenseLayer(hid_8, num_units=4, nonlinearity=softmax) """ # Is this important? """ network_1_out, network_2_out, network_3_out, network_4_out, \ network_5_out, network_6_out, network_7_out, network_8_out = \ get_output([network_1, network_2, network_3, network_4, network_5, network_6, network_7, network_8]) """ network_1_out = get_output(network_1) network_3_out = get_output(network_3) network_4_out = get_output(network_4) network_5_out = get_output(network_5) network_6_out = get_output(network_6) loss_1 = T.mean(binary_crossentropy( network_1_out, target_var)) + regularize_layer_params_weighted( { emb1: lambda_val, conv1d_1: lambda_val, hid_1: lambda_val, network_1: lambda_val }, l2) updates_1 = adagrad(loss_1, get_all_params(network_1, trainable=True), learning_rate=args.step) train_fn_1 = theano.function([input_var, target_var], loss_1, updates=updates_1, allow_input_downcast=True) val_acc_1 = T.mean( binary_accuracy(get_output(network_1, deterministic=True), target_var)) val_fn_1 = theano.function([input_var, target_var], val_acc_1, allow_input_downcast=True) """ loss_2 = T.mean(categorical_crossentropy(network_2_out,target_var)) + regularize_layer_params_weighted({emb2:lambda_val, conv1d_2:lambda_val, hid_2:lambda_val, network_2:lambda_val} , l2) updates_2 = adagrad(loss_2, get_all_params(network_2, trainable=True), learning_rate=args.step) train_fn_2 = theano.function([input_var, target_var], loss_2, updates=updates_2, allow_input_downcast=True) val_acc_2 = T.mean(categorical_accuracy(get_output(network_2, deterministic=True), target_var)) val_fn_2 = theano.function([input_var, target_var], val_acc_2, allow_input_downcast=True) """ loss_3 = T.mean(categorical_crossentropy( network_3_out, target_var)) + regularize_layer_params_weighted( { emb3: lambda_val, conv1d_3: lambda_val, hid_3: lambda_val, network_3: lambda_val }, l2) updates_3 = adagrad(loss_3, get_all_params(network_3, trainable=True), learning_rate=args.step) train_fn_3 = theano.function([input_var, target_var], loss_3, updates=updates_3, allow_input_downcast=True) val_acc_3 = T.mean( categorical_accuracy(get_output(network_3, deterministic=True), target_var)) val_fn_3 = theano.function([input_var, target_var], val_acc_3, allow_input_downcast=True) loss_4 = T.mean(categorical_crossentropy( network_4_out, target_var)) + regularize_layer_params_weighted( { emb4: lambda_val, conv1d_4: lambda_val, hid_4: lambda_val, network_4: lambda_val }, l2) updates_4 = adagrad(loss_4, get_all_params(network_4, trainable=True), learning_rate=args.step) train_fn_4 = theano.function([input_var, target_var], loss_4, updates=updates_4, allow_input_downcast=True) val_acc_4 = T.mean( categorical_accuracy(get_output(network_4, deterministic=True), target_var)) val_fn_4 = theano.function([input_var, target_var], val_acc_4, allow_input_downcast=True) loss_5 = T.mean(categorical_crossentropy( network_5_out, target_var)) + regularize_layer_params_weighted( { emb5: lambda_val, conv1d_5: lambda_val, hid_5: lambda_val, network_5: lambda_val }, l2) updates_5 = adagrad(loss_5, get_all_params(network_5, trainable=True), learning_rate=args.step) train_fn_5 = theano.function([input_var, target_var], loss_5, updates=updates_5, allow_input_downcast=True) val_acc_5 = T.mean( categorical_accuracy(get_output(network_5, deterministic=True), target_var)) val_fn_5 = theano.function([input_var, target_var], val_acc_5, allow_input_downcast=True) loss_6 = T.mean(categorical_crossentropy( network_6_out, target_var)) + regularize_layer_params_weighted( { emb6: lambda_val, conv1d_6: lambda_val, hid_6: lambda_val, network_6: lambda_val }, l2) updates_6 = adagrad(loss_6, get_all_params(network_6, trainable=True), learning_rate=args.step) train_fn_6 = theano.function([input_var, target_var], loss_6, updates=updates_6, allow_input_downcast=True) val_acc_6 = T.mean( categorical_accuracy(get_output(network_6, deterministic=True), target_var)) val_fn_6 = theano.function([input_var, target_var], val_acc_6, allow_input_downcast=True) """ loss_7 = T.mean(categorical_crossentropy(network_7_out,target_var)) + regularize_layer_params_weighted({emb7:lambda_val, conv1d_7:lambda_val, hid_7:lambda_val, network_7:lambda_val} , l2) updates_7 = adagrad(loss_7, get_all_params(network_7, trainable=True), learning_rate=args.step) train_fn_7 = theano.function([input_var, target_var], loss_7, updates=updates_7, allow_input_downcast=True) val_acc_7 = T.mean(categorical_accuracy(get_output(network_7, deterministic=True), target_var)) val_fn_7 = theano.function([input_var, target_var], val_acc_7, allow_input_downcast=True) loss_8 = T.mean(categorical_crossentropy(network_8_out,target_var)) + regularize_layer_params_weighted({emb8:lambda_val, conv1d_8:lambda_val, hid_8:lambda_val, network_8:lambda_val} , l2) updates_8 = adagrad(loss_8, get_all_params(network_8, trainable=True), learning_rate=args.step) train_fn_8 = theano.function([input_var, target_var], loss_8, updates=updates_8, allow_input_downcast=True) val_acc_8 = T.mean(categorical_accuracy(get_output(network_8, deterministic=True), target_var)) val_fn_8 = theano.function([input_var, target_var], val_acc_8, allow_input_downcast=True) """ """ return train_fn_1, val_fn_1, network_1, train_fn_2, val_fn_2, network_2, train_fn_3, val_fn_3, \ network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \ train_fn_6, val_fn_6, network_6, train_fn_7, val_fn_7, network_7, train_fn_8, val_fn_8, network_8 """ return train_fn_1, val_fn_1, network_1, train_fn_3, val_fn_3, \ network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \ train_fn_6, val_fn_6, network_6
## Discriminator(D) D_layers, layer_X, layer_Y = bin_mnist.makeDiscriminator( MINIBATCH_SIZE, X, (MINIBATCH_SIZE, nc, npx, npx), Y, NUM_Y) # D output for Real Data p_real = ll.get_output(D_layers, inputs={layer_X: X}) # D output for Generated Data p_gen = ll.get_output(D_layers, inputs={layer_X: gX}) print 'getDisParams:' discrim_params, discrim_sp_params = bin_mnist.getDisParams() ## Costs # Cost function of D for real data = average of BCE(binary cross entropy) d_cost_real = lo.binary_crossentropy(p_real, T.ones(p_real.shape)).mean() # Cost function of D for gen data = average of BCE d_cost_gen = lo.binary_crossentropy(p_gen, T.zeros(p_gen.shape)).mean() # Const function of G = average of BCE g_cost_d = lo.binary_crossentropy(p_gen, T.ones(p_gen.shape)).mean() # total cost of D d_cost = d_cost_real + d_cost_gen # total cost of G g_cost = g_cost_d # total costs cost = [g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen]
#disc.append(ll.dropout(ll.DenseLayer(disc[-1],num_units=1000),p=0.3)) #disc.append(ll.dropout(ll.DenseLayer(disc[-1],num_units=500),p=0.2)) #disc.append(ll.dropout(ll.DenseLayer(disc[-1],num_units=250),p=0.3)) #disc.append(ll.GaussianNoiseLayer(disc[-1], sigma=0.01)) disc.append(ll.dropout(ll.DenseLayer(disc[-1], num_units=512,nonlinearity=nonlin.very_leaky_rectify),p=0.0)) disc.append(ll.DenseLayer(disc[-1],num_units=1,nonlinearity=nonlin.sigmoid)) disc_data=ll.get_output(disc[-1],inputs=Xvar) disc_gen=ll.get_output(disc[-1],gen_out) disc_params=ll.get_all_params(disc) #data_obj=T.mean(T.log(disc_data)) #objective function for data data_obj=lo.binary_crossentropy(disc_data,T.ones(batch_size)).mean() data_train=theano.function( inputs=[Xvar], outputs=data_obj, updates=lu.adam(data_obj,disc_params,learning_rate=lr), allow_input_downcast=True ) #gen_obj = T.mean(T.log(T.ones(batch_size) - disc_gen ) ) gen_obj=lo.binary_crossentropy(disc_gen,T.ones(batch_size)).mean() b=theano.function(inputs=[noisevar],outputs=disc_gen, allow_input_downcast=True)
def __init__(self, load_weights=True, is_training=True, model_name='dronet_weights.npz'): self.model_name = os.path.join( os.path.dirname(os.path.realpath(__file__)), model_name) def network(image): input_image = InputLayer(input_var=image, shape=(None, 1, 120, 160)) conv1 = Conv2DLayer(input_image, num_filters=32, filter_size=(5, 5), stride=(2, 2), nonlinearity=rectify, pad='same') pool1 = MaxPool2DLayer(conv1, pool_size=(3, 3), stride=(2, 2), pad=1) conv2 = batch_norm( Conv2DLayer(pool1, num_filters=32, filter_size=(3, 3), stride=(2, 2), nonlinearity=rectify, pad='same')) conv2 = batch_norm( Conv2DLayer(conv2, num_filters=32, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same')) downsample1 = Conv2DLayer(pool1, num_filters=32, filter_size=(1, 1), stride=(2, 2), nonlinearity=rectify, pad='same') input3 = ElemwiseSumLayer([downsample1, conv2]) conv3 = batch_norm( Conv2DLayer(input3, num_filters=64, filter_size=(3, 3), stride=(2, 2), nonlinearity=rectify, pad='same')) conv3 = batch_norm( Conv2DLayer(conv3, num_filters=64, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same')) downsample2 = Conv2DLayer(input3, num_filters=64, filter_size=(1, 1), stride=(2, 2), nonlinearity=rectify, pad='same') input4 = ElemwiseSumLayer([downsample2, conv3]) conv4 = batch_norm( Conv2DLayer(input4, num_filters=128, filter_size=(3, 3), stride=(2, 2), nonlinearity=rectify, pad='same')) conv4 = batch_norm( Conv2DLayer(conv4, num_filters=128, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same')) downsample3 = Conv2DLayer(input4, num_filters=128, filter_size=(1, 1), stride=(2, 2), nonlinearity=rectify, pad='same') input5 = ElemwiseSumLayer([downsample3, conv4]) flatten = DropoutLayer(FlattenLayer(input5), 0.5) prob_out = DenseLayer(flatten, num_units=1, nonlinearity=sigmoid) turn_angle = DenseLayer(flatten, num_units=1, nonlinearity=tanh) return prob_out, turn_angle # declare the variables used in the network self.X = T.ftensor4() self.Y = T.fmatrix() self.Z = T.fmatrix() # Lasagne object for the network self.CollisionProbability, self.TurnAngle = network(self.X) if is_training: # collision probability for training # and testing. Output is a theano object self.collision_prob = get_output(self.CollisionProbability) self.collision_prob_test = get_output(self.CollisionProbability, deterministic=True) # turn angle for training anf testing. # Output is a theano object. self.turn_angle = get_output(self.TurnAngle) self.turn_angle_test = get_output(self.TurnAngle, deterministic=True) # Loss for the network. self.collision_loss = binary_crossentropy(self.collision_prob, self.Y).mean() self.turn_loss = squared_error(self.turn_angle, self.Z).mean() # Loss to call for testing and validation. self.test_collision_loss = binary_crossentropy( self.collision_prob_test, self.Y).mean() self.test_turn_loss = squared_error(self.turn_angle_test, self.Z).mean() # network parameters for training. self.collision_params = get_all_params(self.CollisionProbability, trainable=True) self.turn_params = get_all_params(self.TurnAngle, trainable=True) # network updates self.collision_updates = adam(self.collision_loss, self.collision_params, learning_rate=0.001) self.turn_updates = adam(self.turn_loss, self.turn_params, learning_rate=0.00005) # get test loss self.test_collision = theano.function( inputs=[self.X, self.Y], outputs=self.test_collision_loss, allow_input_downcast=True) self.test_turn = theano.function(inputs=[self.X, self.Z], outputs=self.test_turn_loss, allow_input_downcast=True) # training functions self.train_collision = theano.function( inputs=[self.X, self.Y], outputs=self.collision_loss, updates=self.collision_updates, allow_input_downcast=True) self.train_turn = theano.function(inputs=[self.X, self.Z], outputs=self.turn_loss, updates=self.turn_updates, allow_input_downcast=True) else: # collision probability for # testing. Output is a theano object self.collision_prob_test = get_output(self.CollisionProbability, deterministic=True) # turn angle for testing. # Output is a theano object. self.turn_angle_test = get_output(self.TurnAngle, deterministic=True) # run the network to calculate collision probability # and turn angle given an input. self.dronet = theano.function( inputs=[self.X], outputs=[self.turn_angle_test, self.collision_prob_test], allow_input_downcast=True) def load(): with np.load(self.model_name) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] set_all_param_values([self.CollisionProbability, self.TurnAngle], param_values) if load_weights: load()
def get_options(batchsize, nepochs, plotevery, learningrate, normalizegrads, clipgrads, enabledebug, optimizer, yzeromean, yunitvar, datadir, outputdir): global batch_size batch_size = batchsize global epochs epochs = nepochs print("Changing pwd to {}".format(outputdir)) os.chdir(outputdir) mydir = os.path.join(os.getcwd(), datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) os.makedirs(mydir) os.chdir(mydir) app_name = sys.argv[0] global logger logger = get_logger(app_name=app_name, logfolder=mydir) # Load dataset X, Y = load_data(datadir + os.sep + "coulomb.txt", datadir + os.sep + "spectra_-30_0_300.txt") pdb.set_trace() Y, Y_mean, Y_std, Y_binarized = preprocess_targets(Y, zero_mean=yzeromean, unit_var=yunitvar) [X_train, X_test], [Y_train, Y_test], splits = get_data_splits(X, Y, splits=[90, 10]) [Y_binarized_train, Y_binarized_test] = np.split(Y_binarized, splits)[:-1] np.savez('Y_vals.npz', Y_train=Y_train, Y_test=Y_test, Y_binarized_test=Y_binarized_test, Y_binarized_train=Y_binarized_train, Y_mean=Y_mean, Y_std=Y_std) np.savez('X_vals.npz', X_train=X_train, X_test=X_test) dataDim = X.shape[1:] outputDim = Y.shape[1] datapoints = len(X_train) print("datapoints = %d" % datapoints) # making the datapoints shared variables X_train = make_shared(X_train) X_test = make_shared(X_test) Y_train = make_shared(Y_train) Y_test = make_shared(Y_test) Y_binarized_train = make_shared(Y_binarized_train) Y_binarized_test = make_shared(Y_binarized_test) # TODO !!!!I am here # print("Train set size {}, Train set (labelled) size {}, Test set size {}," + # "Validation set size {}".format( # train_set[0].size,train_set_labeled[0].size, # test_set[0].size, valid_set[0].size)) # Defining the model now. th_coulomb = T.ftensor4() th_energies = T.fmatrix() th_energies_bin = T.fmatrix() indices = T.ivector() l_input = InputLayer(shape=(None, 1, 29, 29), input_var=th_coulomb, name="Input") l_conv1 = Conv2DLayer(l_input, 5, 3, pad="same", name="conv1") l_conv2 = Conv2DLayer(l_conv1, 5, 3, pad="same", name="conv2") l_maxpool1 = MaxPool2DLayer(l_conv2, (2, 2), name="maxpool1") l_conv3 = Conv2DLayer(l_maxpool1, 5, 2, pad="same", name="conv3") l_maxpool2 = MaxPool2DLayer(l_conv3, (2, 2), name="maxpool2") l_conv4 = Conv2DLayer(l_maxpool2, 5, 2, pad="same", name="conv4") l_flatten = FlattenLayer(l_conv4, name="flatten") l_realOut = DenseLayer(l_flatten, num_units=outputDim, nonlinearity=linear, name="realOut") l_binOut = DenseLayer(l_flatten, num_units=outputDim, nonlinearity=sigmoid, name="binOut") l_output = ElemwiseMergeLayer([l_binOut, l_realOut], T.mul) energy_output = get_output(l_output) binary_output = get_output(l_binOut) # loss_real = T.sum(abs(energy_output - th_energies)) loss_real = T.mean((energy_output - th_energies)**2) loss_binary = T.sum(binary_crossentropy(binary_output, th_energies_bin)) loss = loss_real + loss_binary params = get_all_params(l_output) grad = T.grad(loss, params) if normalizegrads is not None: grad = lasagne.updates.total_norm_constraint(grad, max_norm=normalizegrads) if clipgrads is not None: grad = [T.clip(g, -clipgrads, clipgrads) for g in grad] optimization_algo = get_optimizer[optimizer] updates = optimization_algo(grad, params, learning_rate=learningrate) # train_fn = theano.function([th_coulomb, th_energies, th_energies_bin], [loss, energy_output], updates=updates, allow_input_downcast=True) train_fn = theano.function( [indices], [loss, energy_output], updates=updates, allow_input_downcast=True, givens={ th_coulomb: X_train[indices, :], th_energies: Y_train[indices, :], th_energies_bin: Y_binarized_train[indices, :] }) # get_grad = theano.function([th_coulomb, th_energies, th_energies_bin], grad) get_grad = theano.function( [indices], grad, allow_input_downcast=True, givens={ th_coulomb: X_train[indices, :], th_energies: Y_train[indices, :], th_energies_bin: Y_binarized_train[indices, :] }) get_convOutput = theano.function( [indices], [get_output(l_conv1), get_output(l_conv2)], allow_input_downcast=True, givens={ th_coulomb: X_train[indices, :], th_energies: Y_train[indices, :], th_energies_bin: Y_binarized_train[indices, :] }) # get_updates = theano.function([th_data, th_labl], [updates.values()]) val_fn = theano.function([], [loss, energy_output], updates=updates, allow_input_downcast=True, givens={ th_coulomb: X_test, th_energies: Y_test, th_energies_bin: Y_binarized_test }) with open(os.path.join(mydir, "data.txt"), "w") as f: script = app_name for elem in [ "meta_seed", "dataDim", "batch_size", "epochs", "learningrate", "normalizegrads", "clipgrads", "enabledebug", "optimizer", "script" ]: f.write("{} : {}\n".format(elem, eval(elem))) train_loss_lowest = np.inf test_loss_lowest = np.inf for epoch in range(epochs): batch_start = 0 train_loss = [] indices = np.random.permutation(datapoints) minibatches = int(datapoints / batch_size) for minibatch in range(minibatches): train_idxs = indices[batch_start:batch_start + batch_size] # X_train_batch = X_train[train_idxs,:] # Yr_train_batch = Y_train[train_idxs,:] # Yb_train_batch = Y_binarized_train[train_idxs, :] # train_output = train_fn(X_train_batch, Yr_train_batch, Yb_train_batch) train_output = train_fn(train_idxs) batch_start = batch_start + batch_size train_loss.append(train_output[0]) pdb.set_trace() if enabledebug: # Debugging information batchIdx = epoch * minibatches + minibatch fn = 'params_{:>010d}'.format() # saving params param_values = get_all_param_values(l_output) param_norm = np.linalg.norm( np.hstack([ np.asarray(param).flatten() for param in param_values ])) # gradients = get_grad(X_train_batch, Yr_train_batch, Yb_train_batch) gradients = get_grad(train_idxs) gradient_norm = np.linalg.norm( np.hstack([ np.asarray(gradient).flatten() for gradient in gradients ])) logger.debug( "Epoch : {:0>4} minibatch {:0>3} Gradient Norm : {:>0.4}, Param Norm : {:>0.4} GradNorm/ParamNorm : {:>0.4} (Values from Prev. Minibatch) Train loss {}" .format(epoch, minibatch, gradient_norm, param_norm, gradient_norm / param_norm, train_loss[-1])) param_names = [ param.__str__() for param in get_all_params(l_output) ] np.savez(fn + '.npz', **dict(zip(param_names, param_values))) np.savez('Y_train_pred_{}.npz'.format(batchIdx), Y_train_pred=train_output[1]) if train_loss[-1] < train_loss_lowest: train_loss_lowest = train_loss[-1] np.savez('Y_train_pred_best.npz', Y_train_pred=train_output[1]) if np.isnan(gradient_norm): pdb.set_trace() if (epoch % plotevery == 0): logger.info("Epoch {} of {}".format(epoch, epochs)) fn = 'params_{:>03d}'.format(epoch) # saving params param_values = get_all_param_values(l_output) param_norm = np.linalg.norm( np.hstack( [np.asarray(param).flatten() for param in param_values])) param_names = [ param.__str__() for param in get_all_params(l_output) ] if not enabledebug: np.savez(fn + '.npz', **dict(zip(param_names, param_values))) np.savez('Y_train_pred_{}.npz'.format(epoch), Y_train_pred=train_output[1]) mean_train_loss = np.mean(train_loss) if mean_train_loss < train_loss_lowest: train_loss_lowest = mean_train_loss np.savez('Y_train_pred_best.npz', Y_train_pred=train_output[1]) # gradients = get_grad(X_train_batch, Yr_train_batch, Yb_train_batch) gradients = get_grad(train_idxs) gradient_norm = np.linalg.norm( np.hstack([ np.asarray(gradient).flatten() for gradient in gradients ])) logger.info( " Gradient Norm : {}, Param Norm : {} GradNorm/ParamNorm : {} " .format(gradient_norm, param_norm, gradient_norm / param_norm)) logger.info(" Train loss {:>0.4}".format(mean_train_loss)) # test_loss, test_prediction = val_fn(X_test, Y_test, Y_binarized_test) test_loss, test_prediction = val_fn() np.savez('Y_test_pred_{}.npz'.format(epoch), Y_test_pred=test_prediction) logger.info(" Test loss {}".format(test_loss)) if test_loss < test_loss_lowest: test_loss_lowest = test_loss np.savez('Y_test_pred_best.npz', Y_test_pred=test_prediction)
ann = DenseLayer(ann, DIM_H1) #, nonlinearity=sigmoid) ann = DenseLayer(ann, DIM_H2) #, nonlinearity=sigmoid) ann = DenseLayer(ann, 2, nonlinearity=softmax) return ann if __name__ == '__main__': x = T.fmatrix() t = T.fvector() ann = network(x) prediction = get_output(ann)[:, 1] predict = function([x], outputs=prediction) loss = binary_crossentropy(prediction, t).mean() # L2 regularization if L2_REGULARIZATION: l2_penalty = ALPHA * regularize_network_params(ann, l2) loss += l2_penalty.mean() updates = sgd(loss_or_grads=loss, params=get_all_params(ann, trainable=True), learning_rate=LR) train = function([x, t], outputs=loss, updates=updates, allow_input_downcast=True, mode='FAST_COMPILE')
def multi_task_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats, lambda_val = 0.5 * 1e-4): print("Building multi task model with 1D Convolution") vocab_size = wordEmbeddings.shape[1] wordDim = wordEmbeddings.shape[0] kw = 2 num_filters = seqlen-kw+1 stride = 1 filter_size=wordDim pool_size=num_filters input = InputLayer((None, seqlen, num_feats),input_var=input_var) batchsize, _, _ = input.input_var.shape emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats*wordDim)) conv1d_1 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_1 = MaxPool1DLayer(conv1d_1, pool_size=pool_size) hid_1 = DenseLayer(maxpool_1, num_units=args.hiddenDim, nonlinearity=sigmoid) network_1 = DenseLayer(hid_1, num_units=2, nonlinearity=softmax) conv1d_2 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_2 = MaxPool1DLayer(conv1d_2, pool_size=pool_size) hid_2 = DenseLayer(maxpool_2, num_units=args.hiddenDim, nonlinearity=sigmoid) network_2 = DenseLayer(hid_2, num_units=4, nonlinearity=softmax) conv1d_3 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_3 = MaxPool1DLayer(conv1d_3, pool_size=pool_size) hid_3 = DenseLayer(maxpool_3, num_units=args.hiddenDim, nonlinearity=sigmoid) network_3 = DenseLayer(hid_3, num_units=3, nonlinearity=softmax) conv1d_4 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_4 = MaxPool1DLayer(conv1d_4, pool_size=pool_size) hid_4 = DenseLayer(maxpool_4, num_units=args.hiddenDim, nonlinearity=sigmoid) network_4 = DenseLayer(hid_4, num_units=3, nonlinearity=softmax) conv1d_5 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_5 = MaxPool1DLayer(conv1d_5, pool_size=pool_size) hid_5 = DenseLayer(maxpool_5, num_units=args.hiddenDim, nonlinearity=sigmoid) network_5 = DenseLayer(hid_5, num_units=2, nonlinearity=softmax) conv1d_6 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_6 = MaxPool1DLayer(conv1d_6, pool_size=pool_size) hid_6 = DenseLayer(maxpool_6, num_units=args.hiddenDim, nonlinearity=sigmoid) network_6 = DenseLayer(hid_6, num_units=4, nonlinearity=softmax) conv1d_7 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_7 = MaxPool1DLayer(conv1d_7, pool_size=pool_size) hid_7 = DenseLayer(maxpool_7, num_units=args.hiddenDim, nonlinearity=sigmoid) network_7 = DenseLayer(hid_7, num_units=3, nonlinearity=softmax) conv1d_8 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_8 = MaxPool1DLayer(conv1d_8, pool_size=pool_size) hid_8 = DenseLayer(maxpool_8, num_units=args.hiddenDim, nonlinearity=sigmoid) network_8 = DenseLayer(hid_8, num_units=3, nonlinearity=softmax) # Is this important? network_1_out, network_2_out, network_3_out, network_4_out, \ network_5_out, network_6_out, network_7_out, network_8_out = \ get_output([network_1, network_2, network_3, network_4, network_5, network_6, network_7, network_8]) loss_1 = T.mean(binary_crossentropy(network_1_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_1:lambda_val, hid_1:lambda_val, network_1:lambda_val} , l2) updates_1 = adagrad(loss_1, get_all_params(network_1, trainable=True), learning_rate=args.step) train_fn_1 = theano.function([input_var, target_var], loss_1, updates=updates_1, allow_input_downcast=True) val_acc_1 = T.mean(binary_accuracy(get_output(network_1, deterministic=True), target_var)) val_fn_1 = theano.function([input_var, target_var], val_acc_1, allow_input_downcast=True) loss_2 = T.mean(categorical_crossentropy(network_2_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_2:lambda_val, hid_2:lambda_val, network_2:lambda_val} , l2) updates_2 = adagrad(loss_2, get_all_params(network_2, trainable=True), learning_rate=args.step) train_fn_2 = theano.function([input_var, target_var], loss_2, updates=updates_2, allow_input_downcast=True) val_acc_2 = T.mean(categorical_accuracy(get_output(network_2, deterministic=True), target_var)) val_fn_2 = theano.function([input_var, target_var], val_acc_2, allow_input_downcast=True) loss_3 = T.mean(categorical_crossentropy(network_3_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_3:lambda_val, hid_3:lambda_val, network_3:lambda_val} , l2) updates_3 = adagrad(loss_3, get_all_params(network_3, trainable=True), learning_rate=args.step) train_fn_3 = theano.function([input_var, target_var], loss_3, updates=updates_3, allow_input_downcast=True) val_acc_3 = T.mean(categorical_accuracy(get_output(network_3, deterministic=True), target_var)) val_fn_3 = theano.function([input_var, target_var], val_acc_3, allow_input_downcast=True) loss_4 = T.mean(categorical_crossentropy(network_4_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_4:lambda_val, hid_4:lambda_val, network_4:lambda_val} , l2) updates_4 = adagrad(loss_4, get_all_params(network_4, trainable=True), learning_rate=args.step) train_fn_4 = theano.function([input_var, target_var], loss_4, updates=updates_4, allow_input_downcast=True) val_acc_4 = T.mean(categorical_accuracy(get_output(network_4, deterministic=True), target_var)) val_fn_4 = theano.function([input_var, target_var], val_acc_4, allow_input_downcast=True) loss_5 = T.mean(binary_crossentropy(network_5_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_5:lambda_val, hid_5:lambda_val, network_5:lambda_val} , l2) updates_5 = adagrad(loss_5, get_all_params(network_5, trainable=True), learning_rate=args.step) train_fn_5 = theano.function([input_var, target_var], loss_5, updates=updates_5, allow_input_downcast=True) val_acc_5 = T.mean(binary_accuracy(get_output(network_5, deterministic=True), target_var)) val_fn_5 = theano.function([input_var, target_var], val_acc_5, allow_input_downcast=True) loss_6 = T.mean(categorical_crossentropy(network_6_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_6:lambda_val, hid_6:lambda_val, network_6:lambda_val} , l2) updates_6 = adagrad(loss_6, get_all_params(network_6, trainable=True), learning_rate=args.step) train_fn_6 = theano.function([input_var, target_var], loss_6, updates=updates_6, allow_input_downcast=True) val_acc_6 = T.mean(categorical_accuracy(get_output(network_6, deterministic=True), target_var)) val_fn_6 = theano.function([input_var, target_var], val_acc_6, allow_input_downcast=True) loss_7 = T.mean(categorical_crossentropy(network_7_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_7:lambda_val, hid_7:lambda_val, network_7:lambda_val} , l2) updates_7 = adagrad(loss_7, get_all_params(network_7, trainable=True), learning_rate=args.step) train_fn_7 = theano.function([input_var, target_var], loss_7, updates=updates_7, allow_input_downcast=True) val_acc_7 = T.mean(categorical_accuracy(get_output(network_7, deterministic=True), target_var)) val_fn_7 = theano.function([input_var, target_var], val_acc_7, allow_input_downcast=True) loss_8 = T.mean(categorical_crossentropy(network_8_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_8:lambda_val, hid_8:lambda_val, network_8:lambda_val} , l2) updates_8 = adagrad(loss_8, get_all_params(network_8, trainable=True), learning_rate=args.step) train_fn_8 = theano.function([input_var, target_var], loss_8, updates=updates_8, allow_input_downcast=True) val_acc_8 = T.mean(categorical_accuracy(get_output(network_8, deterministic=True), target_var)) val_fn_8 = theano.function([input_var, target_var], val_acc_8, allow_input_downcast=True) return train_fn_1, val_fn_1, network_1, train_fn_2, val_fn_2, network_2, train_fn_3, val_fn_3, \ network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \ train_fn_6, val_fn_6, network_6, train_fn_7, val_fn_7, network_7, train_fn_8, val_fn_8, network_8
def __init__(self, load_weights = True, is_training = True, model_name = 'dronet_weights.npz'): self.model_name = os.path.join(os.path.dirname(os.path.realpath(__file__)), model_name) def network(image): input_image = InputLayer(input_var = image, shape = (None, 1, 120, 160)) conv1 = Conv2DLayer(input_image, num_filters = 32, filter_size = (5,5), stride = (2,2), nonlinearity = rectify, pad = 'same') pool1 = MaxPool2DLayer(conv1, pool_size = (3,3), stride = (2,2), pad = 1) conv2 = batch_norm(Conv2DLayer(pool1, num_filters = 32, filter_size = (3,3), stride = (2,2), nonlinearity = rectify, pad = 'same')) conv2 = batch_norm(Conv2DLayer(conv2, num_filters = 32, filter_size = (3,3), stride = (1,1), nonlinearity = rectify, pad = 'same')) downsample1 = Conv2DLayer(pool1, num_filters = 32, filter_size = (1,1), stride = (2,2), nonlinearity = rectify, pad = 'same') input3 = ElemwiseSumLayer([downsample1, conv2]) conv3 = batch_norm(Conv2DLayer(input3, num_filters = 64, filter_size = (3,3), stride = (2,2), nonlinearity = rectify, pad = 'same')) conv3 = batch_norm(Conv2DLayer(conv3, num_filters = 64, filter_size = (3,3), stride = (1,1), nonlinearity = rectify, pad = 'same')) downsample2 = Conv2DLayer(input3, num_filters = 64, filter_size = (1,1), stride = (2,2), nonlinearity = rectify, pad = 'same') input4 = ElemwiseSumLayer([downsample2, conv3]) conv4 = batch_norm(Conv2DLayer(input4, num_filters = 128, filter_size = (3,3), stride = (2,2), nonlinearity = rectify, pad = 'same')) conv4 = batch_norm(Conv2DLayer(conv4, num_filters = 128, filter_size = (3,3), stride = (1,1), nonlinearity = rectify, pad = 'same')) downsample3 = Conv2DLayer(input4, num_filters = 128, filter_size = (1,1), stride = (2,2), nonlinearity = rectify, pad = 'same') input5 = ElemwiseSumLayer([downsample3, conv4]) flatten = DropoutLayer(FlattenLayer(input5), 0.5) prob_out = DenseLayer(flatten, num_units = 1, nonlinearity = sigmoid) turn_angle = DenseLayer(flatten, num_units = 1, nonlinearity = tanh) return prob_out, turn_angle # declare the variables used in the network self.X = T.ftensor4() self.Y = T.fmatrix() self.Z = T.fmatrix() # Lasagne object for the network self.CollisionProbability, self.TurnAngle = network(self.X) if is_training: # collision probability for training # and testing. Output is a theano object self.collision_prob = get_output(self.CollisionProbability) self.collision_prob_test = get_output(self.CollisionProbability, deterministic=True) # turn angle for training anf testing. # Output is a theano object. self.turn_angle = get_output(self.TurnAngle) self.turn_angle_test = get_output(self.TurnAngle, deterministic=True) # Loss for the network. self.collision_loss = binary_crossentropy(self.collision_prob, self.Y).mean() self.turn_loss = squared_error(self.turn_angle, self.Z).mean() # Loss to call for testing and validation. self.test_collision_loss = binary_crossentropy(self.collision_prob_test, self.Y).mean() self.test_turn_loss = squared_error(self.turn_angle_test, self.Z).mean() # network parameters for training. self.collision_params = get_all_params(self.CollisionProbability, trainable=True) self.turn_params = get_all_params(self.TurnAngle, trainable=True) # network updates self.collision_updates = adam(self.collision_loss, self.collision_params, learning_rate = 0.001) self.turn_updates = adam(self.turn_loss, self.turn_params, learning_rate = 0.00005) # get test loss self.test_collision = theano.function(inputs = [self.X, self.Y], outputs = self.test_collision_loss, allow_input_downcast = True) self.test_turn = theano.function(inputs = [self.X, self.Z], outputs = self.test_turn_loss, allow_input_downcast = True) # training functions self.train_collision = theano.function(inputs = [self.X, self.Y], outputs = self.collision_loss, updates = self.collision_updates, allow_input_downcast = True) self.train_turn = theano.function(inputs = [self.X, self.Z], outputs = self.turn_loss, updates = self.turn_updates, allow_input_downcast = True) else: # collision probability for # testing. Output is a theano object self.collision_prob_test = get_output(self.CollisionProbability, deterministic=True) # turn angle for testing. # Output is a theano object. self.turn_angle_test = get_output(self.TurnAngle, deterministic=True) # run the network to calculate collision probability # and turn angle given an input. self.dronet = theano.function(inputs = [self.X], outputs = [self.turn_angle_test, self.collision_prob_test], allow_input_downcast = True) def load(): with np.load(self.model_name) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] set_all_param_values([self.CollisionProbability, self.TurnAngle], param_values) if load_weights: load()