Exemplo n.º 1
0
def create_decoder(z_hat_in, z_noise, num_units, norm_list, layer_num):
    i = layer_num
    dense = DenseLayer(z_hat_in, num_units=num_units, name='dec_dense%i' % i,
                       W=init, nonlinearity=identity)
    normalize = NormalizeLayer(dense, name='dec_normalize%i' % i)
    u = ScaleAndShiftLayer(normalize, name='dec_scale%i' % i)
    z_hat = DenoiseLayer(u_net=u, z_net=get_unlab(z_noise), name='dec_denoise%i' % i)
    mean = ListIndexLayer(norm_list, index=1, name='dec_index_mean%i' % i)
    var = ListIndexLayer(norm_list, index=2, name='dec_index_var%i' % i)
    z_hat_bn = DecoderNormalizeLayer(z_hat, mean=mean, var=var,
                                     name='dec_decnormalize%i' % i)
    return z_hat, z_hat_bn
Exemplo n.º 2
0
def create_encoder(incoming, num_units, nonlinearity, layer_num):
    i = layer_num
    z_pre = DenseLayer(
        incoming=incoming, num_units=num_units, nonlinearity=identity, b=None,
        name='enc_dense%i' % i, W=init)
    norm_list = NormalizeLayer(
        z_pre, return_stats=True, name='enc_normalize%i' % i,
        stat_indices=unlabeled_slice)
    z = ListIndexLayer(norm_list, index=0, name='enc_index%i' % i)
    z_noise = GaussianNoiseLayer(z, sigma=noise, name='enc_noise%i' % i)
    h = NonlinearityLayer(
        ScaleAndShiftLayer(z_noise, name='enc_scale%i' % i),
        nonlinearity=nonlinearity, name='enc_nonlin%i' % i)
    return h, z, z_noise, norm_list
Exemplo n.º 3
0
    num_units=latent_size,
    nonlinearity=lasagne.nonlinearities.identity,
    name='ENC_LOG_VAR')

#sample layer
l_z = SampleLayer(mean=l_mu,
                  log_var=l_log_var,
                  eq_samples=sym_eq_samples,
                  iw_samples=sym_iw_samples)

#Normalizing Flow
l_logdet_J = []
l_zk = l_z
for i in range(nflows):
    l_nf = NormalizingPlanarFlowLayer(l_zk)
    l_zk = ListIndexLayer(l_nf, index=0)
    l_logdet_J += [ListIndexLayer(l_nf, index=1)
                   ]  #we need this for the cost function

# Generative model q(x|z)
l_dec_h1 = denselayer(l_zk,
                      num_units=nhidden,
                      name='DEC_DENSE2',
                      nonlinearity=nonlin_dec)
l_dec_h1 = denselayer(l_dec_h1,
                      num_units=nhidden,
                      name='DEC_DENSE1',
                      nonlinearity=nonlin_dec)
l_dec_x_mu = lasagne.layers.DenseLayer(
    l_dec_h1,
    num_units=num_features,
Exemplo n.º 4
0
    def __init__(self, settings):

        # Call constructor of base model
        super(SRNN_timit, self).__init__()

        # Define initializers for the parameters
        if settings.init_rnn == 'uniform':
            init_rnn = lasagne.init.Uniform(range=settings.init_range)
        elif settings.init_rnn == 'orthogonal':
            init_rnn = lasagne.init.Orthogonal()
        else:
            raise ValueError('Invalid initializer \'' + settings.init_rnn +
                             '\'')

        if settings.init_mlp == 'uniform':
            init_mlp = lasagne.init.GlorotUniform()
        elif settings.init_mlp == 'normal':
            init_mlp = lasagne.init.GlorotNormal()
        else:
            raise ValueError('Invalid initializer \'' + settings.init_mlp +
                             '\'')

        # For stability
        init_last_layer_mlp = lasagne.init.Uniform(range=settings.init_range)

        def dense_layer(l,
                        num_units,
                        nonlinearity,
                        name,
                        W=init_mlp,
                        b=lasagne.init.Constant(0.)):
            l = lasagne.layers.DenseLayer(l,
                                          num_units=num_units,
                                          name=name + "-dense",
                                          W=W,
                                          b=b,
                                          nonlinearity=nonlinearity)
            return l

        # Define MLP to be used in the encoding and decoding networks
        def mlp(input_layer,
                num_units,
                nonlinearity,
                name,
                num_mlp_layers=1,
                W=init_mlp,
                b=lasagne.init.Constant(0.)):
            output_layer = input_layer
            for i in range(num_mlp_layers):
                output_layer = dense_layer(output_layer,
                                           num_units=num_units,
                                           name=name + '_' + str(i + 1),
                                           nonlinearity=nonlinearity,
                                           W=W,
                                           b=b)
            return output_layer

        # Define nonlinearities for encoder and decoder
        def clipped_very_leaky_rectify(x):
            return T.clip(theano.tensor.nnet.relu(x, 1. / 3),
                          -settings.range_nonlin, settings.range_nonlin)

        def get_nonlinearity(nonlin):
            if nonlin == 'rectify':
                return lasagne.nonlinearities.rectify
            elif nonlin == 'very_leaky_rectify':
                return lasagne.nonlinearities.very_leaky_rectify
            elif nonlin == 'tanh':
                return lasagne.nonlinearities.tanh
            elif nonlin == 'clipped_very_leaky_rectify':
                return clipped_very_leaky_rectify
            else:
                raise ValueError('Invalid non-linearity \'' + nonlin + '\'')

        nonlin_encoder = get_nonlinearity(settings.nonlinearity_encoder)
        nonlin_decoder = get_nonlinearity(settings.nonlinearity_decoder)

        ## INPUTS
        self.u_sym = T.tensor3()
        self.u_sym.tag.test_value = np.random.randn(
            settings.batch_size, settings.sequence_length,
            settings.output_dim).astype('float32')

        self.x_sym = T.tensor3()
        self.x_sym.tag.test_value = np.random.randn(
            settings.batch_size, settings.sequence_length,
            settings.output_dim).astype('float32')

        # To handle sequences of different lengths
        self.sym_mask = T.matrix()
        self.sym_mask.tag.test_value = np.ones(
            (settings.batch_size, settings.sequence_length)).astype('float32')

        # Input layer for the inputs of the GRU network
        self.input_layer_u = lasagne.layers.InputLayer(
            (settings.batch_size, None, settings.output_dim),
            self.u_sym,
            name="input_layer_u")

        self.input_layer_mask = lasagne.layers.InputLayer(
            (settings.batch_size, None),
            self.sym_mask,
            name="input_layer_mask")

        input_layer_u_flat = lasagne.layers.ReshapeLayer(
            self.input_layer_u, (-1, settings.output_dim),
            name="input_layer_u_flat")

        u_dense1_flat = lasagne.layers.DenseLayer(
            input_layer_u_flat,
            num_units=settings.num_hidden_mlp,
            nonlinearity=nonlin_decoder,
            name="u_dense1_flat")
        u_dense2_flat = lasagne.layers.DenseLayer(
            u_dense1_flat,
            num_units=settings.num_hidden_mlp,
            nonlinearity=nonlin_decoder,
            name="u_dense2_flat")

        u_emb_dropout_flat = lasagne.layers.DropoutLayer(
            u_dense2_flat, p=settings.p_emb_u_drop, name="u_mlp_dropout_flat")

        u_emb_dropout = lasagne.layers.ReshapeLayer(
            u_emb_dropout_flat,
            (settings.batch_size, -1, settings.num_hidden_mlp))

        ## MODEL SETUP
        # We first initialize the shared variables for the initial deterministic hidden stated (initialized to 0).  Due
        # to the way we have divided the data in batches we can use the last hidden state of the current batch to
        # initialize the hidden state of the following batch.
        self.d_init_sh = theano.shared(
            np.zeros((settings.batch_size, settings.latent_size_d),
                     dtype=theano.config.floatX))

        self.input_layer_d_tm1 = lasagne.layers.InputLayer(
            (None, settings.latent_size_d), name="input_layer_d_tm1")
        # We first compute the output from the RNN (deterministic hidden state)
        # First GRU layer
        # Inputs: a (batch_size x sequence_length x hidden_layer_dim) matrix coming from the dropout layer
        #         and a (batch_size x hidden_layer_dim) initial hidden state
        # Output: a (batch_size x sequence_length x latent_size_d) matrix
        self.d_layer = lasagne.layers.GRULayer(
            u_emb_dropout,
            num_units=settings.latent_size_d,
            resetgate=lasagne.layers.Gate(W_in=init_rnn,
                                          W_hid=init_rnn,
                                          W_cell=None),
            updategate=lasagne.layers.Gate(W_in=init_rnn,
                                           W_hid=init_rnn,
                                           W_cell=None),
            hidden_update=lasagne.layers.Gate(
                W_in=init_rnn,
                W_hid=init_rnn,
                W_cell=None,
                nonlinearity=lasagne.nonlinearities.tanh),
            learn_init=False,
            hid_init=self.input_layer_d_tm1,
            mask_input=self.input_layer_mask,
            unroll_scan=settings.unroll_scan,
            name="d_layer")

        # We add dropout to all non-recurrent connections
        self.d_dropout_layer = lasagne.layers.DropoutLayer(
            self.d_layer, p=settings.p_d_drop, name="d_dropout_layer")

        # Define x inputs to the encoder
        self.input_layer_x = lasagne.layers.InputLayer(
            (settings.batch_size, None, settings.output_dim),
            self.x_sym,
            name="input_layer_x")

        input_layer_x_flat = lasagne.layers.ReshapeLayer(
            self.input_layer_x, (-1, settings.output_dim),
            name="input_layer_x_flat")

        # I share the parameters (also nonlinearities) with the mlp after u, these are like feature extractors
        x_dense1_flat = lasagne.layers.DenseLayer(
            input_layer_x_flat,
            num_units=settings.num_hidden_mlp,
            nonlinearity=nonlin_decoder,
            name="x_dense1_flat",
            W=u_dense1_flat.W,
            b=u_dense1_flat.b)
        x_dense2_flat = lasagne.layers.DenseLayer(
            x_dense1_flat,
            num_units=settings.num_hidden_mlp,
            nonlinearity=nonlin_decoder,
            name="x_dense2_flat",
            W=u_dense2_flat.W,
            b=u_dense2_flat.b)

        x_emb_dropout_flat = lasagne.layers.DropoutLayer(
            x_dense2_flat, p=settings.p_emb_x_drop, name="x_mlp_dropout_flat")

        x_emb_dropout = lasagne.layers.ReshapeLayer(
            x_emb_dropout_flat,
            (settings.batch_size, -1, settings.num_hidden_mlp),
            name='x_emb_dropout')

        input_a_layer = lasagne.layers.ConcatLayer(
            [self.d_dropout_layer, x_emb_dropout],
            axis=2,
            name="input_a_layer")

        if settings.smoothing:
            print "Doing smoothing"
            # The hidden state is intialized with zeros
            a_layer = lasagne.layers.GRULayer(
                input_a_layer,
                num_units=settings.latent_size_a,
                resetgate=lasagne.layers.Gate(W_in=init_rnn,
                                              W_hid=init_rnn,
                                              W_cell=None),
                updategate=lasagne.layers.Gate(W_in=init_rnn,
                                               W_hid=init_rnn,
                                               W_cell=None),
                hidden_update=lasagne.layers.Gate(
                    W_in=init_rnn,
                    W_hid=init_rnn,
                    W_cell=None,
                    nonlinearity=lasagne.nonlinearities.tanh),
                learn_init=False,
                backwards=True,
                unroll_scan=settings.unroll_scan,
                mask_input=self.input_layer_mask,
                name="a_layer")

        else:  # We only do filtering
            print "Doing filtering"
            input_a_layer_flat = lasagne.layers.ReshapeLayer(
                input_a_layer, (-1, [2]), name="input_a_layer_flat")
            a_layer_flat = mlp(input_a_layer_flat,
                               settings.latent_size_a,
                               nonlin_encoder,
                               "a_layer_flat",
                               num_mlp_layers=settings.num_layers_mlp)

            a_layer = lasagne.layers.ReshapeLayer(
                a_layer_flat,
                (settings.batch_size, -1, settings.latent_size_a))

        # Define shared variables for quantities to be updated across batches (truncated BPTT)
        self.z_init_sh = theano.shared(
            np.zeros((settings.batch_size, settings.latent_size_z),
                     dtype=theano.config.floatX))

        self.input_layer_z_tm1 = lasagne.layers.InputLayer(
            (None, settings.latent_size_z), name="input_layer_z_tm1")

        self.mean_prior_init_sh = theano.shared(
            np.zeros((settings.batch_size, settings.latent_size_z),
                     dtype=theano.config.floatX))

        self.input_layer_mean_prior_tm1 = lasagne.layers.InputLayer(
            (None, settings.latent_size_z), name="input_layer_mean_prior_tm1")

        self.log_var_prior_init_sh = theano.shared(
            np.zeros((settings.batch_size, settings.latent_size_z),
                     dtype=theano.config.floatX))

        self.input_layer_log_var_prior_tm1 = lasagne.layers.InputLayer(
            (None, settings.latent_size_z),
            name="input_layer_log_var_prior_tm1")

        # Define MLPs to be used in StochsticRecurrentLayer
        mlp_prior_input_dim = settings.latent_size_d + settings.latent_size_z

        input_prior_mlp = lasagne.layers.InputLayer(
            (None, mlp_prior_input_dim))
        mean_prior_dense1 = lasagne.layers.DenseLayer(
            input_prior_mlp,
            num_units=settings.num_hidden_mlp,
            nonlinearity=nonlin_decoder,
            name="mean_prior_dense1")
        mean_prior_dense2 = lasagne.layers.DenseLayer(
            mean_prior_dense1,
            num_units=settings.latent_size_z,
            W=init_last_layer_mlp,
            nonlinearity=None,
            name="mean_prior_dense2")
        log_var_prior_dense1 = lasagne.layers.DenseLayer(
            input_prior_mlp,
            num_units=settings.num_hidden_mlp,
            nonlinearity=nonlin_decoder,
            name="log_var_prior_dense1")
        log_var_prior_dense2 = lasagne.layers.DenseLayer(
            log_var_prior_dense1,
            num_units=settings.latent_size_z,
            W=init_last_layer_mlp,
            nonlinearity=None,
            name="log_var_prior_dense2")

        mlp_q_input_dim = settings.latent_size_a + settings.latent_size_z  # [input_q_n, z_previous]

        input_q_mlp = lasagne.layers.InputLayer((None, mlp_q_input_dim))
        mean_q_dense1 = lasagne.layers.DenseLayer(
            input_q_mlp,
            num_units=settings.num_hidden_mlp,
            nonlinearity=nonlin_encoder,
            name="mean_q_dense1")
        mean_q_dense2 = lasagne.layers.DenseLayer(
            mean_q_dense1,
            num_units=settings.latent_size_z,
            W=init_last_layer_mlp,
            nonlinearity=None,
            name="mean_q_dense2")
        log_var_q_dense1 = lasagne.layers.DenseLayer(
            input_q_mlp,
            num_units=settings.num_hidden_mlp,
            nonlinearity=nonlin_encoder,
            name="log_var_q_dense1")
        log_var_q_dense2 = lasagne.layers.DenseLayer(
            log_var_q_dense1,
            num_units=settings.latent_size_z,
            W=init_last_layer_mlp,
            nonlinearity=None,
            name="log_var_q_dense2")

        if settings.cons == 0:
            cons = 0
        elif settings.cons < 0:
            cons = 10**(settings.cons)
        else:
            raise ValueError()

        stochastic_recurrent_layer = StochsticRecurrentLayer(
            input_p=self.d_dropout_layer,
            input_q=a_layer,
            mu_p_mlp=mean_prior_dense2,
            logvar_p_mlp=log_var_prior_dense2,
            q_mu_mlp=mean_q_dense2,
            q_logvar_mlp=log_var_q_dense2,
            num_units=settings.latent_size_z,
            unroll_scan=settings.unroll_scan,
            use_mu_residual_q=settings.use_mu_residual,
            z_init=self.input_layer_z_tm1,
            mu_p_init=self.input_layer_mean_prior_tm1,
            mask_input=self.input_layer_mask,
            cons=cons,
            name='stochastic_recurrent_layer')

        # ListIndexLayer is needed after a Layer that returns multiple outputs
        self.z_layer = ListIndexLayer(stochastic_recurrent_layer,
                                      index=0,
                                      name='z_layer')
        self.mean_prior_layer = ListIndexLayer(stochastic_recurrent_layer,
                                               index=1,
                                               name='mean_prior_layer')
        self.log_var_prior_layer = ListIndexLayer(stochastic_recurrent_layer,
                                                  index=2,
                                                  name='log_var_prior_layer')
        self.mean_q_layer = ListIndexLayer(stochastic_recurrent_layer,
                                           index=3,
                                           name='mean_q_layer')
        self.log_var_q_layer = ListIndexLayer(stochastic_recurrent_layer,
                                              index=4,
                                              name='log_var_q_layer')

        # Finish the generative model
        self.z_dropout_layer = lasagne.layers.DropoutLayer(
            self.z_layer, p=settings.p_z_drop, name="z_dropout_layer")

        # The softmax mlp needs 2d tensors, hence we reshape here, add the mlp and reshape again
        d_layer_reshaped = lasagne.layers.ReshapeLayer(
            self.d_dropout_layer, (-1, settings.latent_size_d),
            name="d_layer_reshaped")

        z_layer_reshaped = lasagne.layers.ReshapeLayer(
            self.z_dropout_layer, (-1, settings.latent_size_z),
            name="z_layer_reshaped")

        input_generative_mlp = lasagne.layers.ConcatLayer(
            [d_layer_reshaped, z_layer_reshaped],
            axis=1,
            name="input_generative_mlp")
        generative_mlp = mlp(input_generative_mlp,
                             settings.num_hidden_mlp,
                             nonlin_decoder,
                             "generative_mlp",
                             num_mlp_layers=settings.num_layers_mlp)

        # Compute the softmax output and reshape
        mean_gauss_output_reshaped = lasagne.layers.DenseLayer(
            generative_mlp,
            num_units=settings.output_dim,
            nonlinearity=lasagne.nonlinearities.identity,
            W=init_last_layer_mlp,
            name="mean_gauss_output_reshaped")

        log_var_output_reshaped = lasagne.layers.DenseLayer(
            generative_mlp,
            num_units=settings.output_dim,
            nonlinearity=lasagne.nonlinearities.identity,
            W=init_last_layer_mlp,
            name="log_var_output_reshaped")

        self.mean_gauss_output_layer = lasagne.layers.ReshapeLayer(
            mean_gauss_output_reshaped,
            (settings.batch_size, -1, settings.output_dim),
            name="mean_gauss_output_layer")
        self.log_var_gauss_output_layer = lasagne.layers.ReshapeLayer(
            log_var_output_reshaped,
            (settings.batch_size, -1, settings.output_dim),
            name="log_var_output_layer")

        # List of all layers that we need to pass for pickle/model_info (see base model)
        self.output_layer = [
            self.z_layer, self.mean_prior_layer, self.log_var_prior_layer,
            self.mean_gauss_output_layer, self.log_var_gauss_output_layer
        ]
        # Get a list of all parameters in the network.
        self.model_params = lasagne.layers.get_all_params(self.output_layer)
        # Get a list of all trainable parameters in the network.
        self.model_params_trainable = lasagne.layers.get_all_params(
            self.output_layer, trainable=True)
Exemplo n.º 5
0
}).shape
h6_dec = get_unlab(l_out_enc)
print "y_weights_decoder:", lasagne.layers.get_output(h6_dec, sym_x).eval({
    sym_x:
    x_train[:200]
}).shape

###############
#  DECODER    #
###############

##### Decoder Layer 6
u6 = ScaleAndShiftLayer(NormalizeLayer(h6_dec, name='dec_normalize6'),
                        name='dec_scale6')
z_hat6 = DenoiseLayer(u_net=u6, z_net=get_unlab(z_noise6), name='dec_denoise6')
mean6 = ListIndexLayer(norm_list6, index=1, name='dec_index_mean6')
var6 = ListIndexLayer(norm_list6, index=2, name='dec_index_var6')
z_hat_bn6 = DecoderNormalizeLayer(z_hat6,
                                  mean=mean6,
                                  var=var6,
                                  name='dec_decnormalize6')
###########################

z_hat5, z_hat_bn5 = create_decoder(z_hat6, z_noise5, 250, norm_list5, 5)
z_hat4, z_hat_bn4 = create_decoder(z_hat5, z_noise4, 250, norm_list4, 4)
z_hat3, z_hat_bn3 = create_decoder(z_hat4, z_noise3, 250, norm_list3, 3)
z_hat2, z_hat_bn2 = create_decoder(z_hat3, z_noise2, 500, norm_list2, 2)
z_hat1, z_hat_bn1 = create_decoder(z_hat2, z_noise1, 1000, norm_list1, 1)

############################# Decoder Layer 0
# i need this because i also has h0 aka. input layer....
Exemplo n.º 6
0
# Recognition model q(z|x)
l_in = lasagne.layers.InputLayer((None, num_features))
l_enc_h1 = denselayer(l_in, num_units=nhidden, name='ENC_DENSE1', nonlinearity=nonlin_enc)
l_enc_h1 = denselayer(l_enc_h1, num_units=nhidden, name='ENC_DENSE2', nonlinearity=nonlin_enc)
l_mu = lasagne.layers.DenseLayer(l_enc_h1, num_units=latent_size, nonlinearity=lasagne.nonlinearities.identity, name='ENC_MU')
l_log_var = lasagne.layers.DenseLayer(l_enc_h1, num_units=latent_size, nonlinearity=lasagne.nonlinearities.identity, name='ENC_LOG_VAR')

#sample layer
l_z = SampleLayer(mean=l_mu, log_var=l_log_var, eq_samples=sym_eq_samples, iw_samples=sym_iw_samples)

#Normalizing Flow
l_logdet_J = []
l_zk = l_z
for i in range(nflows):
    l_nf = NormalizingPlanarFlowLayer(l_zk)
    l_zk = ListIndexLayer(l_nf,index=0)
    l_logdet_J += [ListIndexLayer(l_nf,index=1)] #we need this for the cost function

# Generative model q(x|z)
l_dec_h1 = denselayer(l_zk, num_units=nhidden, name='DEC_DENSE2', nonlinearity=nonlin_dec)
l_dec_h1 = denselayer(l_dec_h1, num_units=nhidden, name='DEC_DENSE1', nonlinearity=nonlin_dec)
l_dec_x_mu = lasagne.layers.DenseLayer(l_dec_h1, num_units=num_features, nonlinearity=lasagne.nonlinearities.sigmoid, name='X_MU')

# get output needed for evaluating of training i.e with noise if any
train_out = lasagne.layers.get_output(
    [l_z, l_zk, l_mu, l_log_var, l_dec_x_mu]+l_logdet_J, sym_x, deterministic=False
)
z_train = train_out[0]
zk_train = train_out[1]
z_mu_train = train_out[2]
z_log_var_train = train_out[3]