def build_network(): l_in = L.InputLayer((None, SEQUENCE_LEN, 256)) l_forward = L.RecurrentLayer(l_in, num_units=16) l_backward = L.RecurrentLayer(l_in, num_units=16, backwards=True) l_concat = L.ConcatLayer([l_forward, l_backward]) l_out = L.DenseLayer(l_concat, num_units=2, nonlinearity=T.nnet.softmax) return l_out
def build_1Dregression_v1(input_var=None, input_width=None, nin_units=12, h_num_units=[64, 64], h_grad_clip=1.0, output_width=1): """ A stacked bidirectional RNN network for regression, alternating with dense layers and merging of the two directions, followed by a feature mean pooling in the time direction, with a linear dim-reduction layer at the start Args: input_var (theano 3-tensor): minibatch of input sequence vectors input_width (int): length of input sequences nin_units (list): number of NIN features h_num_units (int list): no. of units in hidden layer in each stack from bottom to top h_grad_clip (float): gradient clipping maximum value output_width (int): size of output layer (e.g. =1 for 1D regression) Returns: output layer (Lasagne layer object) """ # Non-linearity hyperparameter nonlin = lasagne.nonlinearities.LeakyRectify(leakiness=0.15) # Input layer l_in = LL.InputLayer(shape=(None, 22, input_width), input_var=input_var) batchsize = l_in.input_var.shape[0] # NIN-layer l_in = LL.NINLayer(l_in, num_units=nin_units, nonlinearity=lasagne.nonlinearities.linear) l_in_1 = LL.DimshuffleLayer(l_in, (0, 2, 1)) # RNN layers for h in h_num_units: # Forward layers l_forward_0 = LL.RecurrentLayer(l_in_1, nonlinearity=nonlin, num_units=h, backwards=False, learn_init=True, grad_clipping=h_grad_clip, unroll_scan=True, precompute_input=True) l_forward_0a = LL.ReshapeLayer(l_forward_0, (-1, h)) l_forward_0b = LL.DenseLayer(l_forward_0a, num_units=h, nonlinearity=nonlin) l_forward_0c = LL.ReshapeLayer(l_forward_0b, (batchsize, input_width, h)) # Backward layers l_backward_0 = LL.RecurrentLayer(l_in_1, nonlinearity=nonlin, num_units=h, backwards=True, learn_init=True, grad_clipping=h_grad_clip, unroll_scan=True, precompute_input=True) l_backward_0a = LL.ReshapeLayer(l_backward_0, (-1, h)) l_backward_0b = LL.DenseLayer(l_backward_0a, num_units=h, nonlinearity=nonlin) l_backward_0c = LL.ReshapeLayer(l_backward_0b, (batchsize, input_width, h)) l_in_1 = LL.ElemwiseSumLayer([l_forward_0c, l_backward_0c]) # Output layers network_0a = LL.ReshapeLayer(l_in_1, (-1, h_num_units[-1])) network_0b = LL.DenseLayer(network_0a, num_units=output_width, nonlinearity=nonlin) network_0c = LL.ReshapeLayer(network_0b, (batchsize, input_width, output_width)) output_net_1 = LL.FlattenLayer(network_0c, outdim=2) output_net_2 = LL.FeaturePoolLayer(output_net_1, pool_size=input_width, pool_function=T.mean) return output_net_2
def build_rnn_net(input_var=None, input_width=None, input_dim=None, nin_units=80, h_num_units=[64, 64], h_grad_clip=1.0, output_width=1): """ A stacked bidirectional RNN network for regression, alternating with dense layers and merging of the two directions, followed by a feature mean pooling in the time direction, with a linear dim-reduction layer at the start add dropout for generalizations Args: input_var (theano 3-tensor): minibatch of input sequence vectors input_width (int): length of input sequences nin_units (list): number of NIN features h_num_units (int list): no. of units in hidden layer in each stack from bottom to top h_grad_clip (float): gradient clipping maximum value output_width (int): size of output layer (e.g. =1 for 1D regression) Returns: output layer (Lasagne layer object) """ # Non-linearity hyperparameter leaky_ratio = 0.3 nonlin = lasagne.nonlinearities.LeakyRectify(leakiness=leaky_ratio) # Input layer l_in = LL.InputLayer(shape=(None, input_width, input_dim), input_var=input_var) batchsize = l_in.input_var.shape[0] # NIN-layer #l_in_1 = LL.NINLayer(l_in, num_units=nin_units, #nonlinearity=lasagne.nonlinearities.linear) l_in_1 = l_in #l_in_d = LL.DropoutLayer(l_in, p = 0.8) Do not use drop out now, for the first rnn layer is 256 # currently, we do not drop features # RNN layers # dropout in the first two (total three) or three (total five) layers counter = -1 drop_ends = 2 for h in h_num_units: counter += 1 # Forward layers l_forward_0 = LL.RecurrentLayer( l_in_1, nonlinearity=nonlin, num_units=h, W_in_to_hid=lasagne.init.Normal(0.01, 0), #W_in_to_hid=lasagne.init.He(initializer, math.sqrt(2/(1+0.15**2))), W_hid_to_hid=lasagne.init.Orthogonal( math.sqrt(2 / (1 + leaky_ratio**2))), backwards=False, learn_init=True, grad_clipping=h_grad_clip, #gradient_steps = 20, unroll_scan=True, precompute_input=True) l_forward_0a = LL.ReshapeLayer(l_forward_0, (-1, h)) if (counter < drop_ends and counter % 2 != 0): l_forward_0a = LL.DropoutLayer(l_forward_0a, p=0.2) else: l_forward_0a = l_forward_0a l_forward_0b = LL.DenseLayer(l_forward_0a, num_units=h, nonlinearity=nonlin) l_forward_0c = LL.ReshapeLayer(l_forward_0b, (batchsize, input_width, h)) l_forward_out = l_forward_0c # Backward layers l_backward_0 = LL.RecurrentLayer( l_in_1, nonlinearity=nonlin, num_units=h, W_in_to_hid=lasagne.init.Normal(0.01, 0), #W_in_to_hid=lasagne.init.He(initializer, math.sqrt(2/(1+0.15**2))), W_hid_to_hid=lasagne.init.Orthogonal( math.sqrt(2 / (1 + leaky_ratio**2))), backwards=True, learn_init=True, grad_clipping=h_grad_clip, #gradient_steps = 20, unroll_scan=True, precompute_input=True) l_backward_0a = LL.ReshapeLayer(l_backward_0, (-1, h)) if (counter < drop_ends and counter % 2 == 0): l_backward_0a = LL.DropoutLayer(l_backward_0a, p=0.2) else: l_backward_0a = l_backward_0a l_backward_0b = LL.DenseLayer(l_backward_0a, num_units=h, nonlinearity=nonlin) l_backward_0c = LL.ReshapeLayer(l_backward_0b, (batchsize, input_width, h)) l_backward_out = l_backward_0c l_in_1 = LL.ElemwiseSumLayer([l_forward_out, l_backward_out]) # Output layers network_0a = LL.DenseLayer(l_in_1, num_units=1, num_leading_axes=2, nonlinearity=nonlin) output_net = LL.FlattenLayer(network_0a, outdim=2) return output_net
def __init__(self, full_length, output_size, meta_size, depth=2, encoder_size=64, decoder_size=64): latent_size = 16 input_var = TT.tensor3(dtype='float32') meta_var = TT.tensor3(dtype='float32') target_var = TT.matrix() cut_weights = TT.vector(dtype='float32') input_layer = layers.InputLayer((None, None, output_size), input_var=input_var) meta_layer = layers.InputLayer((None, None, meta_size), input_var=meta_var) meta_layer = layers.DropoutLayer(meta_layer, p=0.2) concat_input_layer = layers.ConcatLayer([input_layer, meta_layer], axis=-1) # encoder lstm_layer = layers.RecurrentLayer(concat_input_layer, encoder_size / 2, learn_init=True) lstm_layer = layers.RecurrentLayer(lstm_layer, encoder_size / 2, learn_init=True) lstm_layer = layers.ReshapeLayer(lstm_layer, (-1, encoder_size / 2)) encoded = layers.DenseLayer(lstm_layer, latent_size) encoded = layers.batch_norm(encoded) dense = encoded for idx in xrange(depth): dense = layers.DenseLayer(dense, decoder_size) dense = layers.batch_norm(dense) mu_and_logvar_x_layer = layers.DenseLayer(dense, full_length * 2, nonlinearity=nonlinearities.linear) mu_x_layer = layers.SliceLayer(mu_and_logvar_x_layer, slice(0, full_length), axis=1) mu_x_layer = layers.ReshapeLayer(mu_x_layer, (-1, full_length, full_length)) logvar_x_layer = layers.SliceLayer(mu_and_logvar_x_layer, slice(full_length, None), axis=1) logvar_x_layer = layers.ReshapeLayer(logvar_x_layer, (-1, full_length, full_length)) l2_norm = regularization.regularize_network_params(mu_and_logvar_x_layer, regularization.l2) loss = neg_log_likelihood( target_var, layers.get_output(mu_x_layer, deterministic=False), layers.get_output(logvar_x_layer, deterministic=False), cut_weights ) + 1e-4 * l2_norm test_loss = neg_log_likelihood( target_var, layers.get_output(mu_x_layer, deterministic=False), layers.get_output(logvar_x_layer, deterministic=False), cut_weights ) + 1e-4 * l2_norm params = layers.get_all_params(mu_and_logvar_x_layer, trainable=True) param_updates = updates.adadelta(loss.mean(), params) self._train_fn = theano.function( [input_var, meta_var, target_var, cut_weights], updates=param_updates, outputs=loss.mean() ) self._loss_fn = theano.function( [input_var, meta_var, target_var, cut_weights], outputs=test_loss.mean() ) self._predict_fn = theano.function( [input_var, meta_var], outputs=[ layers.get_output(mu_x_layer, deterministic=True), layers.get_output(logvar_x_layer, deterministic=True) ] )