def create_blstm_dropout(input_vars, mask_vars, num_inputs, hidden_layer_size, num_outputs, dropout=0.2, noise=0.2): network = InputLayer((None, None, num_inputs), input_vars) mask = InputLayer((None, None), mask_vars) batch_size_theano, seqlen, _ = network.input_var.shape network = GaussianNoiseLayer(network, sigma=noise) for i in range(4): forward = LSTMLayer(network, hidden_layer_size, mask_input=mask, learn_init=True) backward = LSTMLayer(network, hidden_layer_size, mask_input=mask, learn_init=True, backwards=True) network = DropoutLayer( GaussianNoiseLayer(ElemwiseSumLayer([forward, backward]), noise), dropout) network = ReshapeLayer(network, (-1, hidden_layer_size)) network = DenseLayer(network, num_outputs, nonlinearity=softmax) network = ReshapeLayer(network, (batch_size_theano, seqlen, num_outputs)) return network
def build_network(input_var, num_input_channels, num_classes): conv_defs = { 'W': lasagne.init.HeNormal('relu'), 'b': lasagne.init.Constant(0.0), 'filter_size': (3, 3), 'stride': (1, 1), 'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1) } nin_defs = { 'W': lasagne.init.HeNormal('relu'), 'b': lasagne.init.Constant(0.0), 'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1) } dense_defs = { 'W': lasagne.init.HeNormal(1.0), 'b': lasagne.init.Constant(0.0), 'nonlinearity': lasagne.nonlinearities.softmax } wn_defs = { 'momentum': config.batch_normalization_momentum } net = InputLayer ( name='input', shape=(None, num_input_channels, 28, 28), input_var=input_var) net = GaussianNoiseLayer(net, name='noise', sigma=config.augment_noise_stddev) net = WN(Conv2DLayer (net, name='conv1a', num_filters=32, pad='same', **conv_defs), **wn_defs) net = WN(Conv2DLayer (net, name='conv1b', num_filters=64, pad='same', **conv_defs), **wn_defs) # net = WN(Conv2DLayer (net, name='conv1c', num_filters=128, pad='same', **conv_defs), **wn_defs) net = MaxPool2DLayer (net, name='pool1', pool_size=(2, 2)) net = DropoutLayer (net, name='drop1', p=.5) net = WN(Conv2DLayer (net, name='conv2a', num_filters=32, pad='same', **conv_defs), **wn_defs) net = WN(Conv2DLayer (net, name='conv2b', num_filters=64, pad='same', **conv_defs), **wn_defs) # net = WN(Conv2DLayer (net, name='conv2c', num_filters=256, pad='same', **conv_defs), **wn_defs) net = MaxPool2DLayer (net, name='pool2', pool_size=(2, 2)) net = DropoutLayer (net, name='drop2', p=.5) net = WN(Conv2DLayer (net, name='conv3a', num_filters=32, pad=0, **conv_defs), **wn_defs) # net = WN(NINLayer (net, name='conv3b', num_units=256, **nin_defs), **wn_defs) net = WN(NINLayer (net, name='conv3c', num_units=256, **nin_defs), **wn_defs) net = GlobalPoolLayer (net, name='pool3') net = WN(DenseLayer (net, name='dense', num_units=num_classes, **dense_defs), **wn_defs) # net = GaussianNoiseLayer(net, name='noise', sigma=config.augment_noise_stddev) # net = WN(DenseLayer (net, name='dense1', num_units=256, **dense_defs), **wn_defs) # net = DropoutLayer (net, name='drop1', p=.5) # net = WN(DenseLayer (net, name='dense2', num_units=256, **dense_defs), **wn_defs) # net = DropoutLayer (net, name='drop2', p=.5) # net = WN(DenseLayer (net, name='dense3', num_units=256, **dense_defs), **wn_defs) # # net = WN(DenseLayer (net, name='dense4', num_units=num_classes, **dense_defs), **wn_defs) return net
def create_lstm(input_vars, num_inputs, hidden_layer_size, num_outputs): network = InputLayer((None, None, num_inputs), input_vars) batch_size_theano, seqlen, _ = network.input_var.shape network = GaussianNoiseLayer(network, sigma=0.01) for i in range(1): network = LSTMLayer(network, hidden_layer_size, learn_init=True) network = ReshapeLayer(network, (-1, hidden_layer_size)) network = DenseLayer(network, num_outputs, nonlinearity=softmax) network = ReshapeLayer(network, (batch_size_theano, seqlen, num_outputs)) return network
def create_dnn(input_vars, num_inputs, hidden_layer_size, num_outputs): network = InputLayer((None, None, num_inputs), input_vars) batch_size_theano, seqlen, _ = network.input_var.shape network = GaussianNoiseLayer(network, sigma=0.01) network = ReshapeLayer(network, (-1, 129)) for i in range(1): network = DenseLayer(network, hidden_layer_size, W=GlorotUniform(), b=Constant(1.0), nonlinearity=leaky_rectify) network = ReshapeLayer(network, (-1, hidden_layer_size)) network = DenseLayer(network, num_outputs, nonlinearity=softmax) network = ReshapeLayer(network, (batch_size_theano, seqlen, num_outputs)) return network
def build_network(): conv_defs = { 'W': lasagne.init.HeNormal('relu'), 'b': lasagne.init.Constant(0.0), 'filter_size': (3, 3), 'stride': (1, 1), 'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1) } nin_defs = { 'W': lasagne.init.HeNormal('relu'), 'b': lasagne.init.Constant(0.0), 'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1) } dense_defs = { 'W': lasagne.init.HeNormal(1.0), 'b': lasagne.init.Constant(0.0), 'nonlinearity': lasagne.nonlinearities.softmax } wn_defs = { 'momentum': .999 } net = InputLayer ( name='input', shape=(None, 3, 32, 32)) net = GaussianNoiseLayer(net, name='noise', sigma=.15) net = WN(Conv2DLayer (net, name='conv1a', num_filters=128, pad='same', **conv_defs), **wn_defs) net = WN(Conv2DLayer (net, name='conv1b', num_filters=128, pad='same', **conv_defs), **wn_defs) net = WN(Conv2DLayer (net, name='conv1c', num_filters=128, pad='same', **conv_defs), **wn_defs) net = MaxPool2DLayer (net, name='pool1', pool_size=(2, 2)) net = DropoutLayer (net, name='drop1', p=.5) net = WN(Conv2DLayer (net, name='conv2a', num_filters=256, pad='same', **conv_defs), **wn_defs) net = WN(Conv2DLayer (net, name='conv2b', num_filters=256, pad='same', **conv_defs), **wn_defs) net = WN(Conv2DLayer (net, name='conv2c', num_filters=256, pad='same', **conv_defs), **wn_defs) net = MaxPool2DLayer (net, name='pool2', pool_size=(2, 2)) net = DropoutLayer (net, name='drop2', p=.5) net = WN(Conv2DLayer (net, name='conv3a', num_filters=512, pad=0, **conv_defs), **wn_defs) net = WN(NINLayer (net, name='conv3b', num_units=256, **nin_defs), **wn_defs) net = WN(NINLayer (net, name='conv3c', num_units=128, **nin_defs), **wn_defs) net = GlobalPoolLayer (net, name='pool3') net = WN(DenseLayer (net, name='dense', num_units=10, **dense_defs), **wn_defs) return net
def __init__(self, n_in, n_filters, filter_sizes, n_out, pool_sizes=None, n_hidden=(512), ccf=False, trans_func=rectify, out_func=softmax, dense_dropout=0.0, stats=2, input_noise=0.0, batch_norm=False, conv_dropout=0.0): super(CNN, self).__init__(n_in, n_hidden, n_out, trans_func) self.outf = out_func self.log = "" # Define model using lasagne framework dropout = True if not dense_dropout == 0.0 else False # Overwrite input layer sequence_length, n_features = n_in self.l_in = InputLayer(shape=(None, sequence_length, n_features)) l_prev = self.l_in # Separate into raw values and statistics sequence_length -= stats stats_layer = SliceLayer(l_prev, indices=slice(sequence_length, None), axis=1) stats_layer = ReshapeLayer(stats_layer, (-1, stats * n_features)) print('Stats layer shape', stats_layer.output_shape) l_prev = SliceLayer(l_prev, indices=slice(0, sequence_length), axis=1) print('Conv input layer shape', l_prev.output_shape) # Apply input noise l_prev = GaussianNoiseLayer(l_prev, sigma=input_noise) if ccf: self.log += "\nAdding cross-channel feature layer" l_prev = ReshapeLayer(l_prev, (-1, 1, sequence_length, n_features)) l_prev = Conv2DLayer(l_prev, num_filters=4 * n_features, filter_size=(1, n_features), nonlinearity=None) n_features *= 4 if batch_norm: l_prev = batch_norm_layer(l_prev) l_prev = ReshapeLayer(l_prev, (-1, n_features, sequence_length)) l_prev = DimshuffleLayer(l_prev, (0, 2, 1)) # 2D Convolutional layers l_prev = ReshapeLayer(l_prev, (-1, 1, sequence_length, n_features)) l_prev = DimshuffleLayer(l_prev, (0, 3, 2, 1)) # Add the convolutional filters for n_filter, filter_size, pool_size in zip(n_filters, filter_sizes, pool_sizes): self.log += "\nAdding 2D conv layer: %d x %d" % (n_filter, filter_size) l_prev = Conv2DLayer(l_prev, num_filters=n_filter, filter_size=(filter_size, 1), nonlinearity=self.transf, pad=filter_size // 2) if batch_norm: l_prev = batch_norm_layer(l_prev) if pool_size > 1: self.log += "\nAdding max pooling layer: %d" % pool_size l_prev = Pool2DLayer(l_prev, pool_size=(pool_size, 1)) self.log += "\nAdding dropout layer: %.2f" % conv_dropout l_prev = TiedDropoutLayer(l_prev, p=conv_dropout) print("Conv out shape", get_output_shape(l_prev)) # Global pooling layer l_prev = GlobalPoolLayer(l_prev, pool_function=T.mean, name='Global Mean Pool') print("GlobalPoolLayer out shape", get_output_shape(l_prev)) # Concatenate stats l_prev = ConcatLayer((l_prev, stats_layer), axis=1) for n_hid in n_hidden: self.log += "\nAdding dense layer with %d units" % n_hid print("Dense input shape", get_output_shape(l_prev)) l_prev = DenseLayer(l_prev, n_hid, init.GlorotNormal(), init.Normal(1e-3), self.transf) if batch_norm: l_prev = batch_norm_layer(l_prev) if dropout: self.log += "\nAdding dense dropout with probability: %.2f" % dense_dropout l_prev = DropoutLayer(l_prev, p=dense_dropout) if batch_norm: self.log += "\nUsing batch normalization" self.model = DenseLayer(l_prev, num_units=n_out, nonlinearity=out_func) self.model_params = get_all_params(self.model) self.sym_x = T.tensor3('x') self.sym_t = T.matrix('t')
def buildFCN8_DAE(input_concat_h_vars, input_mask_var, n_classes, nb_in_channels=3, path_weights='/Tmp/romerosa/itinf/models/', model_name='fcn8_model.npz', trainable=False, load_weights=False, pretrained=False, freeze=False, pretrained_path='/data/lisatmp4/romerosa/itinf/models/camvid/', pascal=False, return_layer='probs_dimshuffle', concat_h=['input'], noise=0.1, dropout=0.5): ''' Build fcn8 model as DAE ''' net = {} pos = 0 assert all([el in ['pool1', 'pool2', 'pool3', 'pool4', 'input'] for el in concat_h]) # Contracting path net['input'] = InputLayer((None, nb_in_channels, None, None), input_mask_var) # Add noise # Noise if noise > 0: # net['noisy_input'] = GaussianNoiseLayerSoftmax(net['input'], # sigma=noise) net['noisy_input'] = GaussianNoiseLayer(net['input'], sigma=noise) in_layer = 'noisy_input' else: in_layer = 'input' pos, out = model_helpers.concatenate(net, in_layer, concat_h, input_concat_h_vars, pos, net['input'].output_shape[1]) # pool 1 net['conv1_1'] = ConvLayer( net[out], 64, 3, pad=100, flip_filters=False) net['conv1_2'] = ConvLayer( net['conv1_1'], 64, 3, pad='same', flip_filters=False) net['pool1'] = PoolLayer(net['conv1_2'], 2) pos, out = model_helpers.concatenate(net, 'pool1', concat_h, input_concat_h_vars, pos, net['pool1'].output_shape[1]) # pool 2 net['conv2_1'] = ConvLayer( net[out], 128, 3, pad='same', flip_filters=False) net['conv2_2'] = ConvLayer( net['conv2_1'], 128, 3, pad='same', flip_filters=False) net['pool2'] = PoolLayer(net['conv2_2'], 2) pos, out = model_helpers.concatenate(net, 'pool2', concat_h, input_concat_h_vars, pos, net['pool2'].output_shape[1]) # pool 3 net['conv3_1'] = ConvLayer( net[out], 256, 3, pad='same', flip_filters=False) net['conv3_2'] = ConvLayer( net['conv3_1'], 256, 3, pad='same', flip_filters=False) net['conv3_3'] = ConvLayer( net['conv3_2'], 256, 3, pad='same', flip_filters=False) net['pool3'] = PoolLayer(net['conv3_3'], 2) pos, out = model_helpers.concatenate(net, 'pool3', concat_h, input_concat_h_vars, pos, net['pool3'].output_shape[1]) # pool 4 net['conv4_1'] = ConvLayer( net[out], 512, 3, pad='same', flip_filters=False) net['conv4_2'] = ConvLayer( net['conv4_1'], 512, 3, pad='same', flip_filters=False) net['conv4_3'] = ConvLayer( net['conv4_2'], 512, 3, pad='same', flip_filters=False) net['pool4'] = PoolLayer(net['conv4_3'], 2) pos, out = model_helpers.concatenate(net, 'pool4', concat_h, input_concat_h_vars, pos, net['pool4'].output_shape[1]) # pool 5 net['conv5_1'] = ConvLayer( net[out], 512, 3, pad='same', flip_filters=False) net['conv5_2'] = ConvLayer( net['conv5_1'], 512, 3, pad='same', flip_filters=False) net['conv5_3'] = ConvLayer( net['conv5_2'], 512, 3, pad='same', flip_filters=False) net['pool5'] = PoolLayer(net['conv5_3'], 2) pos, out = model_helpers.concatenate(net, 'pool5', concat_h, input_concat_h_vars, pos, net['pool5'].output_shape[1]) # fc6 net['fc6'] = ConvLayer( net[out], 4096, 7, pad='valid', flip_filters=False) net['fc6_dropout'] = DropoutLayer(net['fc6'], p=dropout) # fc7 net['fc7'] = ConvLayer( net['fc6_dropout'], 4096, 1, pad='valid', flip_filters=False) net['fc7_dropout'] = DropoutLayer(net['fc7'], p=dropout) net['score_fr'] = ConvLayer( net['fc7_dropout'], n_classes, 1, pad='valid', flip_filters=False) # Upsampling path # Unpool net['score2'] = DeconvLayer(net['score_fr'], n_classes, 4, stride=2, crop='valid', nonlinearity=linear) net['score_pool4'] = ConvLayer(net['pool4'], n_classes, 1, pad='same') net['score_fused'] = ElemwiseSumLayer((net['score2'], net['score_pool4']), cropping=[None, None, 'center', 'center']) # Unpool net['score4'] = DeconvLayer(net['score_fused'], n_classes, 4, stride=2, crop='valid', nonlinearity=linear) net['score_pool3'] = ConvLayer(net['pool3'], n_classes, 1, pad='valid') net['score_final'] = ElemwiseSumLayer((net['score4'], net['score_pool3']), cropping=[None, None, 'center', 'center']) # Unpool net['upsample'] = DeconvLayer(net['score_final'], n_classes, 16, stride=8, crop='valid', nonlinearity=linear) upsample_shape = lasagne.layers.get_output_shape(net['upsample'])[1] net['input_tmp'] = InputLayer((None, upsample_shape, None, None), input_mask_var) net['score'] = ElemwiseMergeLayer((net['input_tmp'], net['upsample']), merge_function=lambda input, deconv: deconv, cropping=[None, None, 'center', 'center']) # Final dimshuffle, reshape and softmax net['final_dimshuffle'] = \ lasagne.layers.DimshuffleLayer(net['score'], (0, 2, 3, 1)) laySize = lasagne.layers.get_output(net['final_dimshuffle']).shape net['final_reshape'] = \ lasagne.layers.ReshapeLayer(net['final_dimshuffle'], (T.prod(laySize[0:3]), laySize[3])) net['probs'] = lasagne.layers.NonlinearityLayer(net['final_reshape'], nonlinearity=softmax) # Load weights if load_weights: pretrained = False with np.load(os.path.join(path_weights, model_name)) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(net['probs'], param_values) # In case we want to re-use the weights of an FCN8 model pretrained from images (not GT) if pretrained: print 'Loading pretrained weights' if pascal: path_weights = '/data/lisatmp4/romerosa/itinf/models/camvid/pascal-fcn8s-tvg-dag.mat' if 'tvg' in path_weights: str_filter = 'f' str_bias = 'b' else: str_filter = '_filter' str_bias = '_bias' W = sio.loadmat(path_weights) # Load the parameter values into the net num_params = W.get('params').shape[1] str_ind = [''.join(x for x in concat if x.isdigit()) for concat in concat_h] list_of_lays = ['conv' + str(int(x)+1) + '_1' for x in str_ind if x] list_of_lays += ['conv1_1'] if nb_in_channels != 3 or 'input' in concat_h else [] print list_of_lays for i in range(num_params): # Get layer name from the saved model name = str(W.get('params')[0][i][0])[3:-2] # Get parameter value param_value = W.get('params')[0][i][1] # Load weights if name.endswith(str_filter): raw_name = name[:-len(str_filter)] if raw_name not in list_of_lays: print 'Copying weights for ' + raw_name if 'score' not in raw_name and \ 'upsample' not in raw_name and \ 'final' not in raw_name and \ 'probs' not in raw_name: # print 'Initializing layer ' + raw_name param_value = param_value.T param_value = np.swapaxes(param_value, 2, 3) net[raw_name].W.set_value(param_value) else: print 'Ignoring ' + raw_name # Load bias terms if name.endswith(str_bias): raw_name = name[:-len(str_bias)] if 'score' not in raw_name and \ 'upsample' not in raw_name and \ 'final' not in raw_name and \ 'probs' not in raw_name: param_value = np.squeeze(param_value) net[raw_name].b.set_value(param_value) else: with np.load(os.path.join(pretrained_path, model_name)) as f: start = 0 if nb_in_channels == f['arr_%d' % 0].shape[1] \ else 2 param_values = [f['arr_%d' % i] for i in range(start, len(f.files))] all_layers = lasagne.layers.get_all_layers(net['probs']) all_layers = [l for l in all_layers if (not isinstance(l, InputLayer) and not isinstance(l, GaussianNoiseLayerSoftmax) and not isinstance(l,GaussianNoiseLayer))] all_layers = all_layers[1:] if start > 0 else all_layers # Freeze parameters after last concatenation layer last_concat = [idx for idx,l in enumerate(all_layers) if isinstance(l,ConcatLayer)][-1] count = 0 for ixd, layer in enumerate(all_layers): layer_params = layer.get_params() for p in layer_params: if hasattr(layer, 'input_layer') and not isinstance(layer.input_layer, ConcatLayer): p.set_value(param_values[count]) if freeze: model_helpers.freezeParameters(layer, single=True) if isinstance(layer.input_layer, ConcatLayer) and idx == last_concat: print('freezing') freeze = True count += 1 # Do not train if not trainable: model_helpers.freezeParameters(net['probs']) # Go back to 4D net['probs_reshape'] = ReshapeLayer(net['probs'], (laySize[0], laySize[1], laySize[2], n_classes)) net['probs_dimshuffle'] = DimshuffleLayer(net['probs_reshape'], (0, 3, 1, 2)) return net[return_layer]
def build_network(input_var, num_input_channel, num_features, num_classes): conv_defs = { 'W': lasagne.init.HeNormal('relu'), 'b': lasagne.init.Constant(0.0), 'stride': 1, 'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1) } nin_defs = { 'W': lasagne.init.HeNormal('relu'), 'b': lasagne.init.Constant(0.0), 'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1) } dense_defs = { 'W': lasagne.init.HeNormal(1.0), 'b': lasagne.init.Constant(0.0), 'nonlinearity': lasagne.nonlinearities.softmax } wn_defs = {'momentum': config.batch_normalization_momentum} # layer_in = InputLayer ( name='input', shape=(None, num_input_channel, num_features), input_var=input_var) # bidirectional lstm # l_in = InputLayer(shape=(config.minibatch_size, num_input_channel, num_features), input_var=input_var) # l_in = GaussianNoiseLayer(l_in, name='noise', sigma=config.augment_noise_stddev) # l_forward_1 = LSTMLayer(l_in, num_units=48) # l_backward_1 = LSTMLayer(l_in, num_units=48, # backwards=True) # l_recurrent_1 = ElemwiseSumLayer([l_forward_1, l_backward_1]) # l_recurrent_1 = DropoutLayer(l_recurrent_1, p=0.5) # l_forward_2 = LSTMLayer(l_recurrent_1, num_units=96) # l_backward_2 = LSTMLayer(l_recurrent_1, num_units=96, # backwards=True) # l_recurrent_2 = ElemwiseSumLayer([l_forward_2, l_backward_2]) # l_recurrent_2 = DropoutLayer(l_recurrent_2, p=0.5) # l_forward_22 = LSTMLayer(l_recurrent_2, num_units=192) # l_backward_22 = LSTMLayer(l_recurrent_2, num_units=192, # backwards=True) # l_recurrent_22 = ElemwiseSumLayer([l_forward_22, l_backward_22]) # l_recurrent_22 = DropoutLayer(l_recurrent_22, p=0.5) # l_forward_3 = LSTMLayer(l_recurrent_22, num_units=128) # l_backward_3 = LSTMLayer(l_recurrent_22, num_units=128, # backwards=True) # l_recurrent_3 = ElemwiseSumLayer([l_forward_3, l_backward_3]) # l_recurrent_3 = DropoutLayer(l_recurrent_3, p=0.5) # # # l_reshape = ReshapeLayer(l_recurrent_3, (config.minibatch_size*num_features, 64)) # net = DenseLayer(l_recurrent_2, num_units=num_classes, # nonlinearity=lasagne.nonlinearities.softmax) # DLSTM net = InputLayer(name='input', shape=(None, num_input_channel, num_features), input_var=input_var) net = GaussianNoiseLayer(net, name='noise', sigma=config.augment_noise_stddev) net = LSTMLayer(net, name='l_forward1', num_units=48, grad_clipping=config.GRAD_CLIP, cell_init=lasagne.init.HeNormal(gain='relu'), hid_init=lasagne.init.HeNormal(gain='relu'), nonlinearity=lasagne.nonlinearities.LeakyRectify(0.1)) # net = BatchNormLayer (net) net = DropoutLayer(net, p=.2) # net = LSTMLayer (net, name='l_forward2', num_units=96, grad_clipping=config.GRAD_CLIP,cell_init=lasagne.init.HeNormal(gain='relu'),hid_init=lasagne.init.HeNormal(gain='relu'), nonlinearity=lasagne.nonlinearities.LeakyRectify(0.1)) # net = BatchNormLayer (net) # net = DropoutLayer (net, p=.2) # net = LSTMLayer (net, name='l_forward3', num_units=128, grad_clipping=config.GRAD_CLIP,cell_init=lasagne.init.HeNormal(gain='relu'),hid_init=lasagne.init.HeNormal(gain='relu'), nonlinearity=lasagne.nonlinearities.LeakyRectify(0.1)) # net = BatchNormLayer (net) # net = DropoutLayer (net, p=.2) # net = LSTMLayer (net, name='l_forward4', num_units=64, grad_clipping=config.GRAD_CLIP,cell_init=lasagne.init.HeNormal(gain='relu'),hid_init=lasagne.init.HeNormal(gain='relu'), nonlinearity=lasagne.nonlinearities.LeakyRectify(0.1), only_return_final=True) # net = BatchNormLayer (net) # net = DropoutLayer (net, p=.2) # ####out of memory--gpu, cannot increase to overfit:( # net = WN(DenseLayer (net, name='dense', num_units=num_classes, W = lasagne.init.HeNormal(gain='relu'), nonlinearity=lasagne.nonlinearities.softmax)) # net = WN(DenseLayer (net, name='dense', num_units=num_classes, **dense_defs), **wn_defs) # layers = {lstm1: 0.1, lstm2: 0.1, lstm3: 0.5} # l2_penalty = regularize_layer_params_weighted(layers, l2) * 1e-4 # l1_penalty = regularize_layer_params(lstm3, l1) * 1e-4 return net
def __init__(self, n_in, n_filters, filter_sizes, n_out, pool_sizes=None, n_hidden=(), ccf=False, rcl=(), rcl_dropout=0.0, trans_func=rectify, out_func=softmax, dropout_probability=0.0, batch_norm=False, stats=0): super(RCNN, self).__init__(n_in, n_hidden, n_out, trans_func) self.outf = out_func self.log = "" # Define model using lasagne framework dropout = True if not dropout_probability == 0.0 else False # Overwrite input layer sequence_length, n_features = n_in self.l_in = InputLayer(shape=(None, sequence_length+stats, n_features), name='Input') l_prev = self.l_in print("Input shape: ", get_output_shape(l_prev)) # Separate into raw values and statistics if stats > 0: stats_layer = SliceLayer(l_prev, indices=slice(sequence_length, None), axis=1) stats_layer = ReshapeLayer(stats_layer, (-1, stats*n_features)) l_prev = SliceLayer(l_prev, indices=slice(0, sequence_length), axis=1) # Input noise sigma = 0.05 self.log += "\nGaussian input noise: %02f" % sigma l_prev = GaussianNoiseLayer(l_prev, sigma=sigma) if ccf: self.log += "\nAdding cross-channel feature layer" l_prev = ReshapeLayer(l_prev, (-1, 1, sequence_length, n_features), name='Reshape') l_prev = Conv2DLayer(l_prev, num_filters=4*n_features, filter_size=(1, n_features), nonlinearity=None, b=None, name='Conv2D') l_prev = BatchNormalizeLayer(l_prev, normalize=batch_norm, nonlinearity=self.transf) n_features *= 4 l_prev = ReshapeLayer(l_prev, (-1, n_features, sequence_length), name='Reshape') l_prev = DimshuffleLayer(l_prev, (0, 2, 1), name='Dimshuffle') # Convolutional layers l_prev = ReshapeLayer(l_prev, (-1, 1, sequence_length, n_features), name='Reshape') l_prev = DimshuffleLayer(l_prev, (0, 3, 2, 1), name='Dimshuffle') for n_filter, filter_size, pool_size in zip(n_filters, filter_sizes, pool_sizes): self.log += "\nAdding 2D conv layer: %d x %d" % (n_filter, filter_size) l_prev = Conv2DLayer(l_prev, num_filters=n_filter, filter_size=(filter_size, 1), pad="same", nonlinearity=None, b=None, name='Conv2D') l_prev = BatchNormalizeLayer(l_prev, normalize=batch_norm, nonlinearity=self.transf) if pool_size > 1: self.log += "\nAdding max pooling layer: %d" % pool_size l_prev = Pool2DLayer(l_prev, pool_size=(pool_size, 1), name='Max Pool') self.log += "\nAdding dropout layer: %.2f" % rcl_dropout l_prev = TiedDropoutLayer(l_prev, p=rcl_dropout, name='Dropout') self.log += "\nConv out shape: %s" % str(get_output_shape(l_prev)) # Recurrent Convolutional layers filter_size = filter_sizes[0] for t in rcl: self.log += "\nAdding recurrent conv layer: t: %d, filter size: %s" % (t, filter_size) l_prev = RecurrentConvLayer(l_prev, t=t, filter_size=filter_size, nonlinearity=self.transf, normalize=batch_norm) self.log += "\nAdding max pool layer: 2" l_prev = Pool2DLayer(l_prev, pool_size=(2, 1), name='Max Pool') print("RCL out shape", get_output_shape(l_prev)) l_prev = GlobalPoolLayer(l_prev, name='Global Max Pool') print("GlobalPoolLayer out shape", get_output_shape(l_prev)) # Append statistics if stats > 0: l_prev = ConcatLayer((l_prev, stats_layer), axis=1) for n_hid in n_hidden: self.log += "\nAdding dense layer with %d units" % n_hid print("Dense input shape", get_output_shape(l_prev)) l_prev = DenseLayer(l_prev, num_units=n_hid, nonlinearity=None, b=None, name='Dense') l_prev = BatchNormalizeLayer(l_prev, normalize=batch_norm, nonlinearity=self.transf) if dropout: self.log += "\nAdding output dropout with probability %.2f" % dropout_probability l_prev = DropoutLayer(l_prev, p=dropout_probability, name='Dropout') if batch_norm: self.log += "\nUsing batch normalization" self.model = DenseLayer(l_prev, num_units=n_out, nonlinearity=out_func, name='Output') self.model_params = get_all_params(self.model) self.sym_x = T.tensor3('x') self.sym_t = T.matrix('t')
def build_autoencoder(layer, nonlinearity='same', b=init.Constant(0.)): """ Unfolds a stack of layers into a symmetric autoencoder with tied weights. Given a :class:`Layer` instance, this function builds a symmetric autoencoder with tied weights. Parameters ---------- layer : a :class:`Layer` instance or a tuple The :class:`Layer` instance with respect to which a symmetric autoencoder is built. nonlinearity : 'same', list, callable, or None The nonlinearities that are applied to the decoding layer. If 'same', each decoder layer has the same nonlinearity as its corresponding encoder layer. If a list is provided, it must contain nonlinearities for each decoding layer. Otherwise, if a single nonlinearity is provided, it is applied to all decoder layers. If set to ``None``, all nonlinearities for the decoder layers are set to lasagne.nonlinearities.identity. b : callable, Theano shared variable, numpy array, list or None An initializer for the decoder biases. By default, all decoder biases are initialized to lasagne.init.Constant(0.). If a shared variable or a numpy array is provided, the shape must match the incoming shape (only in case all incoming shapes are the same). Additianlly, a list containing initializers for the biases of each decoder layer can be provided. If set to ``None``, the decoder layers will have no biases, and pass through their input instead. Returns ------- layer: :class:`Layer` instance The output :class:`Layer` of the symmetric autoencoder with tied weights. encoder: :class:`Layer` instance The code :class:`Layer` of the autoencoder (see Notes) Notes ----- The encoder (input) :class:`Layer` is changed using `unfold_bias_and_nonlinearity_layers`. Therefore, this layer is not the code layer anymore, because it has got its bias and nonlinearity stripped off. Examples -------- >>> from lasagne.layers import InputLayer, DenseLayer >>> from lasagne.layers import build_autoencoder >>> l_in = InputLayer((100, 20)) >>> l1 = DenseLayer(l_in, num_units=50) >>> l2 = DenseLayer(l1, num_units=10) >>> l_ae, l2 = build_autoencoder(l2, nonlinearity='same', b=None) """ if isinstance(nonlinearity, (tuple, list)): n_idx = 0 if isinstance(b, (tuple, list)): b_idx = 0 encoder = unfold_bias_and_nonlinearity_layers(layer) layers = get_all_layers(encoder) autoencoder_layers = [encoder] kwargs_b = dict(b=None) kwargs_n = dict(nonlinearity=nonlinearities.identity) for i, layer in enumerate(layers[::-1]): incoming = autoencoder_layers[-1] if isinstance(layer, InputLayer): continue elif isinstance(layer, BiasLayer): if b is None: kwargs_b = dict(b=None) elif isinstance(b, (tuple, list)): kwargs_b = dict(b=b[b_idx]) b_idx += 1 else: kwargs_b = dict(b=b) elif isinstance(layer, NonlinearityLayer): if nonlinearity == 'same': kwargs_n = dict(nonlinearity=layer.nonlinearity) elif nonlinearity is None: kwargs_n = dict(nonlinearity=nonlinearities.identity) elif isinstance(nonlinearity, (tuple, list)): kwargs_n = dict(nonlinearity=nonlinearity[n_idx]) n_idx += 1 else: kwargs_n = dict(nonlinearity=nonlinearity) elif isinstance(layer, DropoutLayer): a_layer = DropoutLayer(incoming=incoming, p=layer.p, rescale=layer.rescale) autoencoder_layers.append(a_layer) elif isinstance(layer, GaussianNoiseLayer): a_layer = GaussianNoiseLayer(incoming=incoming, sigma=layer.sigma) autoencoder_layers.append(a_layer) else: a_layer = InverseLayer(incoming=incoming, layer=layer) if hasattr(layer, 'b'): a_layer = BiasLayer(incoming=a_layer, **kwargs_b) if hasattr(layer, 'nonlinearity'): a_layer = NonlinearityLayer(incoming=a_layer, **kwargs_n) autoencoder_layers.append(a_layer) return autoencoder_layers[-1], encoder
def buildDAE_contextmod(input_concat_h_vars, input_mask_var, n_classes, path_weights='/Tmp/romerosa/itinf/models/', model_name='dae_model.npz', trainable=False, load_weights=False, out_nonlin=linear, concat_h=['input'], noise=0.1): ''' Build context module Parameters ---------- input_concat_h_vars: list of theano tensors, variables to concatenate input_mask_var: theano tensor, input to context module n_classes: int, number of classes path_weights: string, path to weights directory trainable: bool, whether the model is trainable (freeze parameters or not) load_weights: bool, whether to load pretrained weights out_nonlin: output nonlinearity concat_h: list of strings, names of layers we want to concatenate noise: float, noise ''' # context module does not reduce the image resolution assert all([el in ['input'] for el in concat_h]) net = {} pos = 0 # Contracting path net['input'] = InputLayer((None, n_classes, None, None), input_mask_var) # Noise if noise > 0: # net['noisy_input'] = GaussianNoiseLayerSoftmax(net['input'], # sigma=noise) net['noisy_input'] = GaussianNoiseLayer(net['input'], sigma=noise) in_next = 'noisy_input' else: in_next = 'input' pos, out = model_helpers.concatenate(net, in_next, concat_h, input_concat_h_vars, pos, 3) class IdentityInit(Initializer): """ We adapt the same initializiation method than in the paper""" def sample(self, shape): n_filters, n_filters2, filter_size, filter_size2 = shape assert ((n_filters == n_filters2) & (filter_size == filter_size2)) assert (filter_size % 2 == 1) W = np.zeros(shape, dtype='float32') for i in range(n_filters): W[i, i, filter_size / 2, filter_size / 2] = 1. return W net['conv1'] = Conv2DLayer(net[out], n_classes, 3, pad='same', nonlinearity=rectify, flip_filters=False) net['pad1'] = PadLayer(net['conv1'], width=32, val=0, batch_ndim=2) net['dilconv1'] = DilatedConv2DLayer(net['pad1'], n_classes, 3, 1, W=IdentityInit(), nonlinearity=rectify) net['dilconv2'] = DilatedConv2DLayer(net['dilconv1'], n_classes, 3, 2, W=IdentityInit(), nonlinearity=rectify) net['dilconv3'] = DilatedConv2DLayer(net['dilconv2'], n_classes, 3, 4, W=IdentityInit(), nonlinearity=rectify) net['dilconv4'] = DilatedConv2DLayer(net['dilconv3'], n_classes, 3, 8, W=IdentityInit(), nonlinearity=rectify) net['dilconv5'] = DilatedConv2DLayer(net['dilconv4'], n_classes, 3, 16, W=IdentityInit(), nonlinearity=rectify) net['dilconv6'] = DilatedConv2DLayer(net['dilconv5'], n_classes, 3, 1, W=IdentityInit(), nonlinearity=rectify) net['dilconv7'] = DilatedConv2DLayer(net['dilconv6'], n_classes, 1, 1, W=IdentityInit(), nonlinearity=linear) # Final dimshuffle, reshape and softmax net['final_dimshuffle'] = DimshuffleLayer(net['dilconv7'], (0, 2, 3, 1)) laySize = lasagne.layers.get_output(net['final_dimshuffle']).shape net['final_reshape'] = ReshapeLayer(net['final_dimshuffle'], (T.prod(laySize[0:3]), laySize[3])) net['probs'] = NonlinearityLayer(net['final_reshape'], nonlinearity=out_nonlin) # Go back to 4D net['probs_reshape'] = ReshapeLayer( net['probs'], (laySize[0], laySize[1], laySize[2], n_classes)) net['probs_dimshuffle'] = DimshuffleLayer(net['probs_reshape'], (0, 3, 1, 2)) # print('Input to last layer: ', net['probs_dimshuffle'].input_shape) print(net.keys()) # Load weights if load_weights: with np.load(os.path.join(path_weights, model_name)) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(net['probs_dimshuffle'], param_values) # Do not train if not trainable: model_helpers.freezeParameters(net['probs_dimshuffle'], single=False) return net['probs_dimshuffle']
# ## LSTM Network Architecture # And then to define the LSTM-RNN model: num_batch = None # use none to enable variable batch size input_seq_len = None # use none to enable variable sequence length num_feat = num_feat num_classes = nClasses + 1 soft = lasagne.nonlinearities.softmax tanh = lasagne.nonlinearities.tanh identity = lasagne.nonlinearities.identity l_in = InputLayer(shape=(num_batch, input_seq_len, num_feat)) batchsize, seqlen, _ = l_in.input_var.shape l_noise = GaussianNoiseLayer(l_in, sigma=0.6) # l_mask = InputLayer(shape=(batchsize, seqlen)) # l_rnn_1 = LSTMLayer(l_noise, num_units=L1_UNITS, mask_input=l_mask) l_rnn_1 = LSTMLayer(l_noise, num_units=L1_UNITS) l_rnn_2 = LSTMLayer(l_rnn_1, num_units=L2_UNITS) l_shp = ReshapeLayer(l_rnn_2, (-1, L2_UNITS)) l_out = DenseLayer(l_shp, num_units=num_classes, nonlinearity=identity) l_out_shp = ReshapeLayer(l_out, (batchsize, seqlen, num_classes)) l_out_softmax = NonlinearityLayer(l_out, nonlinearity=soft) l_out_softmax_shp = ReshapeLayer(l_out_softmax, (batchsize, seqlen, num_classes)) output_lin_ctc = L.get_output(l_out_shp) network_output = L.get_output(l_out_softmax_shp) all_params = L.get_all_params(l_rnn_2, trainable=True)
def RNN_Compute(layer1, layer2, lrate, batch_size, epochs, train, test, train_labels, test_labels): # Number of Units in hidden layers L1_UNITS = layer1 L2_UNITS = layer2 # Training Params LEARNING_RATE = lrate N_BATCH = batch_size NUM_EPOCHS = epochs num_feat = train.shape[1] num_classes = np.unique(test).size # Generate sequence masks (redundant here) mask_train = np.ones((train.shape[0], train.shape[1])) mask_test = np.ones((test.shape[0], test.shape[1])) # Model tanh = lasagne.nonlinearities.tanh relu = lasagne.nonlinearities.rectify soft = lasagne.nonlinearities.softmax # Network Architecture l_in = InputLayer(shape=(None, None, num_feat)) batchsize, seqlen, _ = l_in.input_var.shape l_noise = GaussianNoiseLayer(l_in, sigma=0.6) l_mask = InputLayer(shape=(batchsize, seqlen)) l_rnn_1 = LSTMLayer(l_noise, num_units=L1_UNITS, mask_input=l_mask) l_in_drop = lasagne.layers.DropoutLayer(l_rnn_1, p=0.25) l_rnn_2 = LSTMLayer(l_in_drop, num_units=L2_UNITS) l_in_drop2 = lasagne.layers.DropoutLayer(l_rnn_2, p=0.1) l_shp = ReshapeLayer(l_in_drop2, (-1, L2_UNITS)) l_dense = DenseLayer(l_shp, num_units=num_classes, nonlinearity=soft) l_out = ReshapeLayer(l_dense, (batchsize, seqlen, num_classes)) # Symbols and Cost Function target_values = T.ivector('target_output') network_output = L.get_output(l_out) predicted_values = network_output[:, -1] prediction = T.argmax(predicted_values, axis=1) all_params = L.get_all_params(l_out, trainable=True) cost = lasagne.objectives.categorical_crossentropy(predicted_values, target_values) cost = cost.mean() # Compute SGD updates for training print("Computing updates ...") updates = lasagne.updates.rmsprop(cost, all_params, LEARNING_RATE) # Theano functions for training and computing cost print("Compiling functions ...") training = theano.function( [l_in.input_var, target_values, l_mask.input_var], cost, updates=updates, allow_input_downcast=True) predict = theano.function([l_in.input_var, l_mask.input_var], prediction, allow_input_downcast=True) compute_cost = theano.function([l_in.input_var, target_values, l_mask.input_var], cost, allow_input_downcast=True) # Training print("Training ...") num_batches_train = int(np.ceil(len(train) / N_BATCH)) train_losses = [] valid_losses = [] for epoch in range(NUM_EPOCHS): now = time.time losses = [] batch_shuffle = np.random.choice(train.shape[0], train.shape[0], False) sequences = train[batch_shuffle] labels = train_labels[batch_shuffle] train_masks = mask_train[batch_shuffle] for batch in range(num_batches_train): batch_slice = slice(N_BATCH * batch, N_BATCH * (batch + 1)) X_batch = sequences[batch_slice] y_batch = labels[batch_slice] m_batch = train_masks[batch_slice] loss = training(X_batch, y_batch, m_batch) losses.append(loss) train_loss = np.mean(losses) train_losses.append(train_loss) valid_loss = compute_cost(test, test_labels, mask_test) valid_losses.append(valid_loss) test_pred = predict(test, mask_test) accuracy = sklearn.metrics.accuracy_score(test_labels, test_pred) print('Current epoch:', epoch + 1, '|', 'Number of Epochs:', NUM_EPOCHS, '|', 'Train loss:', train_loss, '|', 'Validation loss:', valid_loss, '|', 'Accuracy:', accuracy)
def build_NAF_controller(input_layer = None, action_dimensions=1, exploration=1e-2, additive_exploration=True, mean_layer = None, V_layer = None, L_layer = None, action_low=-np.inf, action_high=np.inf, ): ''' Builds the regular NAF controller and outputs a dictionary of lasagne layers for each component. :param input_layer: layer which is used to predict policy parameters. MUST be present unless both V_layer, L_layer and mean_layer are given :param action_dimensions: amount of action params to predict. :param exploration: if a layer is given, uses it to compute the action with exploration (details: see additive_additive_exploration) Alternatively, if a number or a symbolic scalar is given, uses it as sigma for gaussian exploration noise, thus action = mean_layer + N(0,exploration). To adjust :param additive_exploration: if True(default), adds exploration term to the mean_layer. if False, uses exploration param as the picked actions including exploration (ignoring mean values) :param mean_layer: layer which returns optimal actions (Mu). If not given, uses DenseLayer(input_layer) :param V_layer: layer which returns state value baseline (V(state)) :param L_layer: a layer which is used to compute the advantage term A(u) = -1/2 * (u - mu)^T * P_layer * (u - mu) :param action_low: minimum value for an action (float or np array for each action) :param action_high: maximum value for an action (float or np array for each action) :returns: a dict of 'means','actions','action_qvalues','state_values', 'advantage' to respective lasagne layers :rtype: collections.OrderedDict ''' #TODO write tests for the damn thing if input_layer is None: assert (mean_layer is not None) and (V_layer is not None) and (L_layer is not None) #policy if mean_layer is None: mean_layer = DenseLayer(input_layer,num_units=action_dimensions, nonlinearity=None,name="qnaf.weights") assert len(mean_layer.output_shape)==2 and mean_layer.output_shape[-1] == action_dimensions #mean layer, clipped to action limits, used only for action picking mean_clipped = NonlinearityLayer(mean_layer, lambda a: a.clip(action_low, action_high)) #action with exploration if not isinstance(exploration,InputLayer): #exploration is a number assert additive_exploration action_layer = GaussianNoiseLayer(mean_clipped,sigma=exploration) else:#exploration is a lasagne layer if additive_exploration: action_layer = ElemwiseSumLayer([mean_clipped,exploration]) else: action_layer = exploration assert tuple(action_layer.output_shape)== tuple(mean_layer.output_shape) #state value if V_layer is None: V_layer = DenseLayer(input_layer,num_units=1,name="qnaf.state_value") assert len(V_layer.output_shape)==2 and V_layer.output_shape[-1]==1 #L matrix (lower triangular if L_layer is None: L_layer = LowerTriangularLayer(input_layer,matrix_diag=action_dimensions,name="qnaf.L") assert len(L_layer.output_shape)==3 #shape must be [batch,action,aciton] assert L_layer.output_shape[1] == L_layer.output_shape[2] == action_dimensions advantage_layer = NAFLayer(action_layer,mean_layer,L_layer,name="qnaf.advantage") Q_layer = ElemwiseSumLayer([V_layer,advantage_layer]) return OrderedDict([ #means aka optimal actions aka Mu ('means', mean_layer), #actual actions after exploration ('actions', action_layer), # qvalue for actions from action_layer ('action_qvalues',Q_layer), # qvalue for optimal actions aka V aka Q(Mu) ('state_value', V_layer), # advantage term (negative) ('advantage', advantage_layer) ])
def define_network(input_var): batch_size = None net = {} net['input'] = InputLayer(shape=(batch_size, P.CHANNELS, P.INPUT_SIZE, P.INPUT_SIZE), input_var=input_var) nonlinearity = nonlinearities.leaky_rectify if P.GAUSSIAN_NOISE > 0: net['input'] = GaussianNoiseLayer(net['input'], sigma=P.GAUSSIAN_NOISE) def contraction(depth, deepest): n_filters = filter_for_depth(depth) incoming = net['input'] if depth == 0 else net['pool{}'.format(depth - 1)] net['conv{}_1'.format(depth)] = Conv2DLayer(incoming, num_filters=n_filters, filter_size=3, pad='valid', W=HeNormal(gain='relu'), nonlinearity=nonlinearity) net['conv{}_2'.format(depth)] = Conv2DLayer( net['conv{}_1'.format(depth)], num_filters=n_filters, filter_size=3, pad='valid', W=HeNormal(gain='relu'), nonlinearity=nonlinearity) if P.BATCH_NORMALIZATION: net['conv{}_2'.format(depth)] = batch_norm( net['conv{}_2'.format(depth)], alpha=P.BATCH_NORMALIZATION_ALPHA) if not deepest: net['pool{}'.format(depth)] = MaxPool2DLayer( net['conv{}_2'.format(depth)], pool_size=2, stride=2) def expansion(depth, deepest): n_filters = filter_for_depth(depth) incoming = net['conv{}_2'.format(depth + 1)] if deepest else net[ '_conv{}_2'.format(depth + 1)] upscaling = Upscale2DLayer(incoming, 4) net['upconv{}'.format(depth)] = Conv2DLayer(upscaling, num_filters=n_filters, filter_size=2, stride=2, W=HeNormal(gain='relu'), nonlinearity=nonlinearity) if P.SPATIAL_DROPOUT > 0: bridge_from = DropoutLayer(net['conv{}_2'.format(depth)], P.SPATIAL_DROPOUT) else: bridge_from = net['conv{}_2'.format(depth)] net['bridge{}'.format(depth)] = ConcatLayer( [net['upconv{}'.format(depth)], bridge_from], axis=1, cropping=[None, None, 'center', 'center']) net['_conv{}_1'.format(depth)] = Conv2DLayer( net['bridge{}'.format(depth)], num_filters=n_filters, filter_size=3, pad='valid', W=HeNormal(gain='relu'), nonlinearity=nonlinearity) #if P.BATCH_NORMALIZATION: # net['_conv{}_1'.format(depth)] = batch_norm(net['_conv{}_1'.format(depth)]) if P.DROPOUT > 0: net['_conv{}_1'.format(depth)] = DropoutLayer( net['_conv{}_1'.format(depth)], P.DROPOUT) net['_conv{}_2'.format(depth)] = Conv2DLayer( net['_conv{}_1'.format(depth)], num_filters=n_filters, filter_size=3, pad='valid', W=HeNormal(gain='relu'), nonlinearity=nonlinearity) for d in range(NET_DEPTH): #There is no pooling at the last layer deepest = d == NET_DEPTH - 1 contraction(d, deepest) for d in reversed(range(NET_DEPTH - 1)): deepest = d == NET_DEPTH - 2 expansion(d, deepest) # Output layer net['out'] = Conv2DLayer(net['_conv0_2'], num_filters=P.N_CLASSES, filter_size=(1, 1), pad='valid', nonlinearity=None) #import network_repr #print network_repr.get_network_str(net['out']) logging.info('Network output shape ' + str(lasagne.layers.get_output_shape(net['out']))) return net
def transfer_encoder(*l_in, transfer_from, freeze_at, terminate_at): # NOTE: this model has its own noise layer at the top !!! from experiments.a_data import cachedir from experiments.utils import reload_best_hmm, reload_best_rnn report = shelve.open(os.path.join(cachedir, transfer_from)) if report['meta']['model'] == "hmm": # reload pretrained model _, recognizer, _ = reload_best_hmm(report) posterior = recognizer.posterior if freeze_at == "embedding": # build model l_embedding = l_in[0] l_embedding = lasagne.layers.DropoutLayer(l_embedding, p=0.3) l_logits = lasagne.layers.DenseLayer( l_embedding, posterior.nstates, num_leading_axes=2, nonlinearity=None) l_posteriors = lasagne.layers.NonlinearityLayer(l_logits, softmax) # load but don't freeze for p1, p2 in zip(posterior.l_raw.get_params(), l_logits.get_params()): p2.set_value(p1.get_value()) if terminate_at == "embedding": return {'l_out': l_embedding, 'warmup': posterior.warmup} elif terminate_at == "logits": return {'l_out': l_logits, 'warmup': posterior.warmup} if terminate_at == "posteriors": return {'l_out': l_posteriors, 'warmup': posterior.warmup} else: raise ValueError() elif freeze_at == "logits": # build model l_logits = l_in[0] l_logits = GaussianNoiseLayer(l_logits, sigma=4) l_posteriors = NonlinearityLayer(l_logits, softmax) if terminate_at == "logits": return {'l_out': l_logits, 'warmup': posterior.warmup} if terminate_at == "posteriors": return {'l_out': l_posteriors, 'warmup': posterior.warmup} else: raise ValueError() else: raise ValueError elif report['meta']['model'] == "rnn": # reload pretrained model _, recognizer, _ = reload_best_rnn(report) old_layers = lasagne.layers.get_all_layers(recognizer["l_feats"]) if freeze_at == "inputs": # build model if report['meta']['modality'] == 'skel': encoder_dict = skel_encoder(*l_in, **report['args']['encoder_kwargs']) elif report['meta']['modality'] == 'bgr': encoder_dict = bgr_encoder(*l_in, **report['args']['encoder_kwargs']) elif report['meta']['modality'] == 'fusion': encoder_dict = fusion_encoder(*l_in, **report['args']['encoder_kwargs']) else: raise ValueError() l_embedding = encoder_dict['l_out'] # load parameters params = lasagne.layers.get_all_param_values(old_layers) new_layers = lasagne.layers.get_all_layers(l_embedding) lasagne.layers.set_all_param_values(new_layers, params) if terminate_at == "embedding": return {'l_out': l_embedding, 'warmup': encoder_dict['warmup']} else: raise ValueError() elif freeze_at == "embedding": return {'l_out': l_in[0], 'warmup': 7} else: raise ValueError() else: raise ValueError()
def buildFCN_down(input_var, concat_h_vars, nb_features_to_concat, padding, n_classes=21, concat_layers=['pool5'], noise=0.1, n_filters=64, conv_before_pool=1, additional_pool=0, dropout=0., bn=0, ae_h=False): ''' Build fcn contracting path. The contracting path is built by combining convolution and pooling layers, at least until the last concatenation is reached. The last concatenation can be eventually followed by extra convolution and pooling layers. Parameters ---------- input_var: theano tensor, input of the network concat_h_vars: list of theano tensors, intermediate inputs of the network nb_features_to_concat: number of feature maps that the layer that we want to concatenate has padding: padding of the input layer n_classes: int, number of classes concat_layers: list intermediate layers names (layers we want to concatenate) noise: float, noise n_filters: int, number of filters of each convolution (increases every time resolution is downsampled) conv_before_pool: int, number of convolutions before a pooling layer additional_pool: int, number of poolings following the concatenation of the last layer layer in `concat_h_vars` and `concat_layers` dropout: float, dropout probability ''' assert all([ el in ['pool1', 'pool2', 'pool3', 'pool4', 'input', 'pool5'] for el in concat_layers ]) if 'pool' in concat_layers[-1]: n_pool = int(concat_layers[-1][-1]) else: n_pool = 0 net = {} pos = 0 # # Contracting path # # input net['input'] = InputLayer((None, n_classes, None, None), input_var) # Noise if noise > 0: # net['noisy_input'] = GaussianNoiseLayerSoftmax(net['input'], # sigma=noise) net['noisy_input'] = GaussianNoiseLayer(net['input'], sigma=noise) # net['noisy_input'] = GaussianNoiseLayerClip(net['input'], sigma=noise) # TODO: Be careful!!! in_next = 'noisy_input' else: in_next = 'input' # check whether we need to concatenate concat_h pos, out = model_helpers.concatenate(net, in_next, concat_layers, concat_h_vars, pos, nb_features_to_concat) if concat_layers[-1] == 'input' and additional_pool == 0: raise ValueError('It seems your DAE will have no conv/pooling layers!') # start building the network for p in range(n_pool + additional_pool): # add conv + pool # freeze params of the pre-h layers if ae_h and p == n_pool and net != {} and 'pool' in concat_layers[-1]: model_helpers.freezeParameters(net['pool' + str(p)]) for i in range(1, conv_before_pool + 1): # Choose padding type: this is defined according to the # layers: # - if have several concats, we padding 100 in the first # layer for fcn8 compatibility # - if concatenation is only performed at the input, we # don't pad if p == 0 and i == 1 and len(concat_layers) == 1 and \ concat_layers[-1] != 'input' and padding > 0: pad_type = padding else: pad_type = 'same' # Define conv (follow vgg scheme, limited to 6 due to memory # constraints) if p < 6: filters_conv = n_filters * (2**p) # add conv layer net['conv' + str(p + 1) + '_' + str(i)] = ConvLayer( net[out], filters_conv, 3, pad=pad_type, flip_filters=False) out = 'conv' + str(p + 1) + '_' + str(i) # add dropout layer # if p > n_pool and dropout > 0.: if dropout > 0: net[out + '_drop'] = DropoutLayer(net[out], p=dropout) out += '_drop' if bn: net[out + '_bn'] = BatchNormLayer(net[out]) out += '_bn' # Define pool if p == n_pool - 1: layer_name = 'h_to_recon' else: layer_name = None # add pooling layer net['pool' + str(p + 1)] = PoolLayer(net[out], 2, name=layer_name) out = 'pool' + str(p + 1) laySize = net['pool' + str(p + 1)].input_shape n_cl = laySize[1] print('Number of feature maps (out):', n_cl) # check whether concatenation is required if p < n_pool: pos, out = model_helpers.concatenate(net, 'pool' + str(p + 1), concat_layers, concat_h_vars, pos, nb_features_to_concat) last_layer = out return net, last_layer
def build_feat_emb_nets(embedding_source, n_feats, n_samples_unsup, input_var_unsup, n_hidden_u, n_hidden_t_enc, n_hidden_t_dec, gamma, encoder_net_init, decoder_net_init, save_path, random_proj=False, decoder_encoder_unit_ratio=1, embedding_noise=0.0): """ decoder_encoder_unit_ratio : int (default=1) Specifies the ratio between the number of units in output layer of the decoder and the number of inputs to the encoder. If 1, the decoder will have the same number of outputs as inputs to the encoder. If 2, for instance, the decoder will output 2 values for each input value to the encoder. This parameter is used when the decoder is used to reconstruct discrete values but the encoder takes in continuous values, for instance. """ nets = [] embeddings = [] if not embedding_source: # meaning we haven't done any unsup pre-training encoder_net = InputLayer((n_feats, n_samples_unsup), input_var_unsup) # Add random noise to the unsupervised input used to compute the # feature embedding encoder_net = GaussianNoiseLayer(encoder_net, sigma=embedding_noise) for i, out in enumerate(n_hidden_u): encoder_net = DenseLayer(encoder_net, num_units=out, nonlinearity=rectify, b=None) if random_proj: freezeParameters(encoder_net) # encoder_net = DropoutLayer(encoder_net) # encoder_net = BatchNormLayer(encoder_net) feat_emb = lasagne.layers.get_output(encoder_net) pred_feat_emb = theano.function([], feat_emb) else: # meaning we have done some unsup pre-training if os.path.exists(embedding_source): # embedding_source is a path itself path_to_load = embedding_source else: # fetch the embedding_source file in save_path path_to_load = os.path.join(save_path.rsplit('/', 1)[0], embedding_source) if embedding_source[-3:] == "npz": feat_emb_val = np.load(path_to_load).items()[0][1] else: feat_emb_val = np.load(path_to_load) feat_emb_val = feat_emb_val.astype('float32') # Get only allelic frequency #feat_emb_val = 0.0 * feat_emb_val[:, 0::3] + 0.5 * feat_emb_val[:, 1::3] + 1.0 * feat_emb_val[:, 2::3] feat_emb = theano.shared(feat_emb_val, 'feat_emb') encoder_net = InputLayer((n_feats, feat_emb_val.shape[1]), feat_emb) # Add random noise to the feature embedding encoder_net = GaussianNoiseLayer(encoder_net, sigma=embedding_noise) # Build transformations (f_theta, f_theta') network and supervised network # f_theta (ou W_enc) encoder_net_W_enc = encoder_net for i, hid in enumerate(n_hidden_t_enc): encoder_net_W_enc = DenseLayer(encoder_net_W_enc, num_units=hid, nonlinearity=tanh, W=Uniform(encoder_net_init), b=None) enc_feat_emb = lasagne.layers.get_output(encoder_net_W_enc) nets.append(encoder_net_W_enc) embeddings.append(enc_feat_emb) # f_theta' (ou W_dec) if gamma > 0: # meaning we are going to train to reconst the fat data encoder_net_W_dec = encoder_net for i, hid in enumerate(n_hidden_t_dec): if i == len(n_hidden_t_dec) - 1: # This is the last layer in the network so adjust the size # of the hidden layer and add a reshape so that the output will # be a matrix of size: # (n_feats * decoder_encoder_unit_ratio, hid) scaled_hid = hid * decoder_encoder_unit_ratio encoder_net_W_dec = DenseLayer(encoder_net_W_dec, num_units=scaled_hid, nonlinearity=tanh, W=Uniform(decoder_net_init), b=None) encoder_net_W_dec = ReshapeLayer(encoder_net_W_dec, ((-1, hid))) else: encoder_net_W_dec = DenseLayer(encoder_net_W_dec, num_units=hid, nonlinearity=tanh, W=Uniform(decoder_net_init), b=None) dec_feat_emb = lasagne.layers.get_output(encoder_net_W_dec) else: encoder_net_W_dec = None dec_feat_emb = None nets.append(encoder_net_W_dec) embeddings.append(dec_feat_emb) return [nets, embeddings, pred_feat_emb if not embedding_source else []]
def __init__( self, input_layer=None, action_dimensions=1, exploration=1e-2, additive_exploration=True, mean_layer=None, V_layer=None, P_layer=None, ): ''' <writeme> ALL ACTIONS ARE (-inf,inf) by default! :param input_layer: layer which is used to predict policy parameters. MUST be present unless both V_layer, L_layer and mean_layer are given :param action_dimensions: amount of action params to predict. :param exploration: if a layer is given, uses it to compute the action with exploration (details: see additive_additive_exploration) Alternatively, if a number or a symbolic scalar is given, uses it as sigma for gaussian exploration noise, thus action = mean_layer + N(0,exploration). To adjust :param additive_exploration: if True(default), adds exploration term to the mean_layer. if False, uses exploration param as the picked actions including exploration (ignoring mean values) :param mean_layer: layer which returns optimal actions (Mu). If not given, uses DenseLayer(input_layer) :param V_layer: layer which returns state value baseline (V(state)) :param P_layer: a layer which is used to compute the advantage term A(u) = -1/2 * (u - mu)^T * P_layer * (u - mu) ''' if input_layer is None: assert (mean_layer is not None) and (V_layer is not None) and (P_layer is not None) if mean_layer is None: mean_layer = DenseLayer(input_layer, num_units=action_dimensions, nonlinearity=None) assert np.prod(V_layer.output_shape[1:]) == action_dimensions if V_layer is None: V_layer = DenseLayer(input_layer, num_units=action_dimensions) if P_layer is None: raise NotImplementedError("still writing the code") assert P_layer.output_shape.is_okay() if not isinstance(exploration, InputLayer): #exploration is a number assert additive_exploration action_layer = GaussianNoiseLayer(mean_layer, sigma=exploration) else: #exploration is a lasagne layer if additive_exploration: action_layer = ElemwiseSumLayer([mean_layer, exploration]) else: action_layer = exploration incomings = [mean_layer, action_layer, V_layer, P_layer] batch_size = mean_layer.output_shape[0] shapes_dict = { 'state_value': (batch_size, ), #means aka optimal actions aka Mu 'means': (batch_size, action_dimensions), # qvalue for optimal actions aka V aka Q(Mu) 'qvalues_optimal': (batch_size, action_dimensions), #actual actions after exploration 'actions': (batch_size, action_dimensions), # qvalue for actions from action_layer 'qvalues_actual': (batch_size, action_dimensions), } super(NAFLayer, self).__init__(incomings, shapes_dict)