示例#1
0
def get_estimator(n_features, files, labels, eval_size=0.1):
    layers = [
        (InputLayer, {'shape': (None, n_features)}),
        (DenseLayer, {'num_units': N_HIDDEN_1, 'nonlinearity': rectify,
                      'W': init.Orthogonal('relu'),
                      'b': init.Constant(0.01)}),
        (FeaturePoolLayer, {'pool_size': 2}),
        (DenseLayer, {'num_units': N_HIDDEN_2, 'nonlinearity': rectify,
                      'W': init.Orthogonal('relu'),
                      'b': init.Constant(0.01)}),
        (FeaturePoolLayer, {'pool_size': 2}),
        (DenseLayer, {'num_units': 1, 'nonlinearity': None}),
    ]
    args = dict(
        layers=layers,
        update=adam,
        update_learning_rate=theano.shared(util.float32(START_LR)),
        batch_iterator_train=ResampleIterator(BATCH_SIZE),
        batch_iterator_test=BatchIterator(BATCH_SIZE),
        objective=nn.get_objective(l1=L1, l2=L2),
        #eval_size=eval_size,
        custom_score=('kappa', util.kappa) if eval_size > 0.0 else None,
        on_epoch_finished=[
            nn.Schedule('update_learning_rate', SCHEDULE),
        ],
        regression=True,
        max_epochs=N_ITER,
        verbose=1,
    )
    net = BlendNet(eval_size=eval_size, **args)
    net.set_split(files, labels)
    return net
def ptb_lstm(input_var, vocabulary_size, hidden_size, seq_len, num_layers,
             dropout, batch_size):
    l_input = L.InputLayer(shape=(batch_size, seq_len), input_var=input_var)
    l_embed = L.EmbeddingLayer(l_input,
                               vocabulary_size,
                               hidden_size,
                               W=init.Uniform(1.0))
    l_lstms = []
    for i in range(num_layers):
        l_lstm = L.LSTMLayer(l_embed if i == 0 else l_lstms[-1],
                             hidden_size,
                             ingate=L.Gate(W_in=init.GlorotUniform(),
                                           W_hid=init.Orthogonal()),
                             forgetgate=L.Gate(W_in=init.GlorotUniform(),
                                               W_hid=init.Orthogonal(),
                                               b=init.Constant(1.0)),
                             cell=L.Gate(
                                 W_in=init.GlorotUniform(),
                                 W_hid=init.Orthogonal(),
                                 W_cell=None,
                                 nonlinearity=lasagne.nonlinearities.tanh),
                             outgate=L.Gate(W_in=init.GlorotUniform(),
                                            W_hid=init.Orthogonal()))
        l_lstms.append(l_lstm)
    l_drop = L.DropoutLayer(l_lstms[-1], dropout)
    l_out = L.DenseLayer(l_drop, num_units=vocabulary_size, num_leading_axes=2)
    l_out = L.ReshapeLayer(
        l_out,
        (l_out.output_shape[0] * l_out.output_shape[1], l_out.output_shape[2]))
    l_out = L.NonlinearityLayer(l_out,
                                nonlinearity=lasagne.nonlinearities.softmax)
    return l_out
示例#3
0
 def _forward(self):
     net = {}
     net['input'] = layers.InputLayer(shape=(None, 1, 28, 28),
                                      input_var=self.X)
     net['conv1'] = layers.Conv2DLayer(net['input'],
                                       32, (3, 3),
                                       W=init.Orthogonal(),
                                       pad=1)
     net['pool1'] = layers.MaxPool2DLayer(net['conv1'], (2, 2),
                                          stride=(2, 2))
     net['conv2'] = layers.Conv2DLayer(net['pool1'],
                                       64, (3, 3),
                                       W=init.Orthogonal(),
                                       pad=1)
     net['pool2'] = layers.MaxPool2DLayer(net['conv2'], (2, 2),
                                          stride=(2, 2))
     net['conv3'] = layers.Conv2DLayer(net['pool2'],
                                       128, (3, 3),
                                       W=init.Orthogonal(),
                                       pad=1)
     net['conv4'] = layers.Conv2DLayer(net['conv3'],
                                       128, (3, 3),
                                       W=init.Orthogonal(),
                                       pad=1)
     net['pool3'] = layers.MaxPool2DLayer(net['conv4'], (2, 2),
                                          stride=(2, 2))
     net['flatten'] = layers.FlattenLayer(net['pool3'])
     net['out'] = layers.DenseLayer(net['flatten'],
                                    10,
                                    b=None,
                                    nonlinearity=nonlinearities.softmax)
     return net
示例#4
0
 def add_gate_params(gate_name):
     return (self.add_param(spec=init.Orthogonal(0.1),
                            shape=(num_inputs, num_units),
                            name="W_in_to_{}".format(gate_name)),
             self.add_param(spec=init.Orthogonal(0.1),
                            shape=(num_units, num_units),
                            name="W_hid_to_{}".format(gate_name)),
             self.add_param(spec=init.Constant(0.0),
                            shape=(num_units, ),
                            name="b_{}".format(gate_name),
                            regularizable=False))
示例#5
0
    def add_gate_params(self, gate_name):
        num_prev_units = self.num_proj_units if self.num_proj_units else self.num_units

        return (self.add_param(init.Orthogonal(),
                               (num_prev_units, self.num_units),
                               name="W_h_{}".format(gate_name)),
                self.add_param(init.Orthogonal(),
                               (self.num_inputs, self.num_units),
                               name="W_x_{}".format(gate_name)),
                self.add_param(init.Constant(0.0), (self.num_units, ),
                               name="b_{}".format(gate_name),
                               regularizable=False))
示例#6
0
    def init_main_lstm_weights(self):
        (self.W_h_ig, self.W_x_ig, self.b_ig) = self.add_gate_params('ig')
        (self.W_h_fg, self.W_x_fg, self.b_fg) = self.add_gate_params('fg')
        (self.W_h_c, self.W_x_c, self.b_c) = self.add_gate_params('c')
        (self.W_h_og, self.W_x_og, self.b_og) = self.add_gate_params('og')

        self.W_h_stacked = T.concatenate(
            [self.W_h_ig, self.W_h_fg, self.W_h_c, self.W_h_og], axis=1)
        self.W_x_stacked = T.concatenate(
            [self.W_x_ig, self.W_x_fg, self.W_x_c, self.W_x_og], axis=1)
        self.b_stacked = T.concatenate(
            [self.b_ig, self.b_fg, self.b_c, self.b_og], axis=0)

        if self.num_proj_units:
            self.W_p = self.add_param(init.Orthogonal(),
                                      (self.num_units, self.num_proj_units),
                                      name="W_p")

        self.init_states()

        if self.use_layer_norm:
            self.W_x_alpha = self.add_param(spec=init.Constant(1.0),
                                            shape=(self.num_units * 4, ),
                                            name="W_x_alpha")
            self.W_h_alpha = self.add_param(spec=init.Constant(1.0),
                                            shape=(self.num_units * 4, ),
                                            name="W_h_alpha")
            self.W_c_alpha = self.add_param(spec=init.Constant(1.0),
                                            shape=(self.num_units, ),
                                            name="W_c_alpha")
            self.W_c_beta = self.add_param(spec=init.Constant(0.0),
                                           shape=(self.num_units, ),
                                           name="W_c_beta",
                                           regularizable=False)
示例#7
0
def conv_params(
        num_filters,
        filter_size=(3, 3),
        pad=1,  #border_mode='same',
        nonlinearity=leaky_rectify,
        W=init.Orthogonal(gain=1.0),
        b=init.Constant(0.05),
        untie_biases=True,
        **kwargs):
    args = {
        'num_filters': num_filters,
        'filter_size': filter_size,
        #'border_mode': border_mode,
        'pad': pad,
        'nonlinearity': nonlinearity,
        'W': W,
        'b': b,
        'untie_biases': untie_biases,
    }
    args.update(kwargs)
    if CC:
        args['dimshuffle'] = False
    else:
        args.pop('partial_sum', None)
    return args
示例#8
0
文件: layers.py 项目: gunkisu/asr
 def __init__(self,
              W_in=init.Orthogonal(0.1),
              W_hid=init.Orthogonal(0.1),
              W_cell=init.Uniform(0.1),
              b=init.Constant(0.),
              nonlinearity=nonlinearities.sigmoid):
     self.W_in = W_in
     self.W_hid = W_hid
     # Don't store a cell weight vector when cell is None
     if W_cell is not None:
         self.W_cell = W_cell
     self.b = b
     # For the nonlinearity, if None is supplied, use identity
     if nonlinearity is None:
         self.nonlinearity = nonlinearities.identity
     else:
         self.nonlinearity = nonlinearity
示例#9
0
def dense_params(num_units, nonlinearity=leaky_rectify, **kwargs):
    args = {
        'num_units': num_units,
        'nonlinearity': nonlinearity,
        'W': init.Orthogonal(1.0),
        'b': init.Constant(0.05),
    }
    args.update(kwargs)
    return args
示例#10
0
    def __init__(self,
                 incoming,
                 num_units,
                 num_hyper_units,
                 num_proj_units,
                 ingate=Gate(W_in=init.Orthogonal()),
                 forgetgate=Gate(W_in=init.Orthogonal()),
                 cell=Gate(W_in=init.Orthogonal(),
                           W_cell=None,
                           nonlinearity=nonlinearities.tanh),
                 outgate=Gate(W_in=init.Orthogonal()),
                 nonlinearity=nonlinearities.tanh,
                 cell_init=init.Constant(0.),
                 hid_init=init.Constant(0.),
                 backwards=False,
                 gradient_steps=-1,
                 grad_clipping=0,
                 precompute_input=True,
                 mask_input=None,
                 reparam='relu',
                 use_layer_norm=False,
                 **kwargs):

        super(HyperLHUCLSTMLayer, self).__init__(incoming,
                                                 num_units,
                                                 num_hyper_units,
                                                 num_proj_units,
                                                 ingate,
                                                 forgetgate,
                                                 cell,
                                                 outgate,
                                                 nonlinearity,
                                                 cell_init,
                                                 hid_init,
                                                 backwards,
                                                 gradient_steps,
                                                 grad_clipping,
                                                 precompute_input,
                                                 mask_input,
                                                 use_layer_norm=use_layer_norm,
                                                 **kwargs)

        self.reparam = to_reparam_fn(reparam)
示例#11
0
def conv_params(num_filters, filter_size=(3, 3), border_mode='same',
         nonlinearity=leaky_rectify, W=init.Orthogonal(gain=1.0),
         b=init.Constant(0.05), untie_biases=True, **kwargs):
    args = {
        'num_filters': num_filters,
        'filter_size': filter_size, 
        'border_mode': border_mode,
        'nonlinearity': nonlinearity, 
        'W': W, 
        'b': b,
        'untie_biases': untie_biases,
    }
    args.update(kwargs)
    return args
示例#12
0
 def _forward(self):
     net = {}
     net['input'] = layers.InputLayer(shape=(None, 1, 28, 28),
                                      input_var=self.X)
     net['conv'] = layers.Conv2DLayer(net['input'],
                                      10, (5, 5),
                                      W=init.Orthogonal())
     net['pool'] = layers.MaxPool2DLayer(net['conv'], (3, 3),
                                         stride=(1, 1),
                                         pad=(1, 1))
     net['flatten'] = layers.FlattenLayer(net['pool'])
     net['out'] = layers.DenseLayer(net['flatten'],
                                    10,
                                    b=None,
                                    nonlinearity=nonlinearities.softmax)
     return net
示例#13
0
    def __init__(
        self,
        incomings,
        num_units,
        nonlinearity=LN.tanh,
        gate_nonlinearity=LN.sigmoid,
        name=None,
        W=LI.Orthogonal(1.0),
        b=LI.Constant(0.),
        h0=LI.Constant(0.),
        c0=LI.Constant(0.),
        grad_clipping=0.,
        # h0_trainable=False,
    ):

        super().__init__(incomings, name=name)

        input_shape = self.input_shapes[0][1:]
        input_dim = np.int(np.prod(input_shape))
        self.h0 = self.add_param(h0, (num_units, ),
                                 name="h0",
                                 trainable=False,
                                 regularizable=False)
        self.c0 = self.add_param(c0, (num_units, ),
                                 name="c0",
                                 trainable=False,
                                 regularizable=False)

        self.num_units = num_units
        self.nonlinearity = nonlinearity
        self.gate_nonlinearity = gate_nonlinearity
        self.grad_clipping = grad_clipping

        # Weights for all gates.
        self.W_x = self.add_param(W, (input_dim, num_units * 4), name="W_x")
        self.W_h = self.add_param(W, (num_units, num_units * 4), name="W_h")
        self.b = self.add_param(b, (num_units * 4, ),
                                name="b",
                                regularizable=False)
示例#14
0
def conv_params(num_filters,
                filter_size=(3, 3),
                stride=(1, 1),
                border_mode='same',
                nonlinearity=rectify,
                W=init.Orthogonal(gain=1.0),
                b=init.Constant(0.05),
                untie_biases=False,
                **kwargs):
    args = {
        'num_filters': num_filters,
        'filter_size': filter_size,
        'stride': stride,
        'pad':
        border_mode,  # The new version has 'pad' instead of 'border_mode'
        'nonlinearity': nonlinearity,
        'W': W,
        'b': b,
        'untie_biases': untie_biases,
    }
    args.update(kwargs)
    return args
示例#15
0
    def __init__(
            self,
            # input data
            input_data_layer,
            input_mask_layer,
            # model size
            num_units,
            # initialize
            cell_init=init.Constant(0.),
            hid_init=init.Constant(0.),
            learn_init=False,
            # options
            stochastic=False,
            skip_scale=T.ones(shape=(1, ), dtype=floatX),
            backwards=False,
            gradient_steps=-1,
            grad_clipping=0,
            only_return_final=False,
            **kwargs):

        # input
        incomings = [input_data_layer, input_mask_layer]

        # init input
        input_init = init.Constant(0.)
        self.input_init_incoming_index = -1
        if isinstance(input_init, Layer):
            incomings.append(input_init)
            self.input_init_incoming_index = len(incomings) - 1

        # init hidden
        self.hid_init_incoming_index = -1
        if isinstance(hid_init, Layer):
            incomings.append(hid_init)
            self.hid_init_incoming_index = len(incomings) - 1

        # init cell
        self.cell_init_incoming_index = -1
        if isinstance(cell_init, Layer):
            incomings.append(cell_init)
            self.cell_init_incoming_index = len(incomings) - 1

        # init class
        super(DiffSkipLSTMLayer, self).__init__(incomings, **kwargs)

        # set options
        self.stochastic = stochastic
        self.skip_scale = skip_scale
        self.learn_init = learn_init
        self.num_units = num_units
        self.backwards = backwards
        self.gradient_steps = gradient_steps
        self.grad_clipping = grad_clipping
        self.only_return_final = only_return_final

        # set sampler
        self.uniform = RandomStreams(get_rng().randint(1, 2147462579)).uniform

        # get input size
        input_shape = self.input_shapes[0]
        num_inputs = np.prod(input_shape[2:])

        ###################
        # gate parameters #
        ###################
        def add_gate_params(gate_name):
            return (self.add_param(spec=init.Orthogonal(0.1),
                                   shape=(num_inputs, num_units),
                                   name="W_in_to_{}".format(gate_name)),
                    self.add_param(spec=init.Orthogonal(0.1),
                                   shape=(num_units, num_units),
                                   name="W_hid_to_{}".format(gate_name)),
                    self.add_param(spec=init.Constant(0.0),
                                   shape=(num_units, ),
                                   name="b_{}".format(gate_name),
                                   regularizable=False))

        ##### in gate #####
        (self.W_in_to_ingate, self.W_hid_to_ingate,
         self.b_ingate) = add_gate_params('ingate')
        self.W_cell_to_ingate = self.add_param(spec=init.Uniform(0.1),
                                               shape=(num_units, ),
                                               name="W_cell_to_ingate")
        ##### forget gate #####
        (self.W_in_to_forgetgate, self.W_hid_to_forgetgate,
         self.b_forgetgate) = add_gate_params('forgetgate')
        self.W_cell_to_forgetgate = self.add_param(spec=init.Uniform(0.1),
                                                   shape=(num_units, ),
                                                   name="W_cell_to_forgetgate")
        ##### cell #####
        (self.W_in_to_cell, self.W_hid_to_cell,
         self.b_cell) = add_gate_params('cell')

        ##### out gate #####
        (self.W_in_to_outgate, self.W_hid_to_outgate,
         self.b_outgate) = add_gate_params('outgate')
        self.W_cell_to_outgate = self.add_param(spec=init.Uniform(0.1),
                                                shape=(num_units, ),
                                                name="W_cell_to_outgate")

        ###################
        # skip parameters #
        ###################
        self.W_cell_to_skip = self.add_param(spec=init.Orthogonal(0.1),
                                             shape=(num_units, num_units),
                                             name="W_cell_to_skip")
        self.b_cell_to_skip = self.add_param(spec=init.Constant(1.0),
                                             shape=(num_units, ),
                                             name="b_cell_to_skip",
                                             regularizable=False)

        self.W_hid_to_skip = self.add_param(spec=init.Orthogonal(0.1),
                                            shape=(num_units, num_units),
                                            name="W_hid_to_skip")
        self.b_hid_to_skip = self.add_param(spec=init.Constant(1.0),
                                            shape=(num_units, ),
                                            name="b_hid_to_skip",
                                            regularizable=False)

        self.W_in_to_skip = self.add_param(spec=init.Orthogonal(0.1),
                                           shape=(num_inputs, num_units),
                                           name="W_in_to_skip")
        self.b_in_to_skip = self.add_param(spec=init.Constant(1.0),
                                           shape=(num_units, ),
                                           name="b_in_to_skip",
                                           regularizable=False)

        self.W_skip = self.add_param(spec=init.Orthogonal(0.1),
                                     shape=(num_units, 1),
                                     name="W_skip")
        self.b_skip = self.add_param(spec=init.Constant(0.0),
                                     shape=(1, ),
                                     name="b_skip",
                                     regularizable=False)

        self.W_diff_to_skip = self.add_param(spec=init.Orthogonal(0.1),
                                             shape=(num_inputs, num_units),
                                             name="W_diff_to_skip")
        self.b_diff_to_skip = self.add_param(spec=init.Constant(0.0),
                                             shape=(num_units, ),
                                             name="b_diff_to_skip",
                                             regularizable=False)

        if isinstance(input_init, Layer):
            self.input_init = input_init
        else:
            self.input_init = self.add_param(spec=input_init,
                                             shape=(1, num_inputs),
                                             name="input_init",
                                             trainable=learn_init,
                                             regularizable=False)

        if isinstance(cell_init, Layer):
            self.cell_init = cell_init
        else:
            self.cell_init = self.add_param(spec=cell_init,
                                            shape=(1, num_units),
                                            name="cell_init",
                                            trainable=learn_init,
                                            regularizable=False)

        if isinstance(hid_init, Layer):
            self.hid_init = hid_init
        else:
            self.hid_init = self.add_param(spec=hid_init,
                                           shape=(1, num_units),
                                           name="hid_init",
                                           trainable=learn_init,
                                           regularizable=False)
示例#16
0
def build_model(
    batch_size,
    num_channels,
    input_length,
    output_dim,
    subsample,
):
    l_in = layers.InputLayer(
        shape=(batch_size, num_channels, input_length),
        name='input',
    )

    l_sampling = SubsampleLayer(
        l_in,
        window=(None, None, 10),
        name='l_sampling',
    )

    l_window = WindowNormLayer(
        l_sampling,
        name='l_window',
    )

    l_conv1 = Conv1DLayer(
        l_window,
        name='conv1',
        num_filters=16,
        border_mode='same',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_pool1 = MaxPool1DLayer(
        l_conv1,
        name='pool1',
        pool_size=3,
        stride=2,
    )

    l_conv2 = Conv1DLayer(
        l_pool1,
        name='conv2',
        num_filters=32,
        border_mode='same',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_conv3 = Conv1DLayer(
        l_conv2,
        name='conv3',
        num_filters=64,
        border_mode='same',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_pool3 = MaxPool1DLayer(
        l_conv3,
        name='pool3',
        pool_size=3,
        stride=2,
    )

    l_dropout_dense1 = layers.DropoutLayer(
        l_pool3,
        p=0.5,
    )

    l_dense1 = layers.DenseLayer(
        l_dropout_dense1,
        num_units=128,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_dropout_dense2 = layers.DropoutLayer(
        l_dense1,
        p=0.5,
    )

    l_dense2 = layers.DenseLayer(
        l_dropout_dense2,
        num_units=128,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_out = layers.DenseLayer(
        l_dense2,
        name='output',
        num_units=output_dim,
        nonlinearity=nonlinearities.sigmoid,
        W=init.Orthogonal(),
    )

    return l_out
示例#17
0
    def __init__(self,
                 incoming,
                 num_units,
                 num_hyper_units,
                 num_proj_units,
                 ingate=Gate(W_in=init.Orthogonal()),
                 forgetgate=Gate(W_in=init.Orthogonal()),
                 cell=Gate(W_in=init.Orthogonal(),
                           W_cell=None,
                           nonlinearity=nonlinearities.tanh),
                 outgate=Gate(W_in=init.Orthogonal()),
                 nonlinearity=nonlinearities.tanh,
                 cell_init=init.Constant(0.),
                 hid_init=init.Constant(0.),
                 backwards=False,
                 gradient_steps=-1,
                 grad_clipping=0,
                 precompute_input=True,
                 mask_input=None,
                 ivector_input=None,
                 use_layer_norm=False,
                 **kwargs):

        incomings = [incoming]
        self.mask_incoming_index = -1
        if mask_input is not None:
            incomings.append(mask_input)
            self.mask_incoming_index = len(incomings) - 1
        if ivector_input is not None:
            incomings.append(ivector_input)
            self.ivector_incoming_index = len(incomings) - 1

        super(HyperLSTMLayer, self).__init__(incomings, **kwargs)

        if nonlinearity is None:
            self.nonlinearity = nonlinearities.identity
        else:
            self.nonlinearity = nonlinearity

        self.num_units = num_units
        self.num_hyper_units = num_hyper_units
        self.num_proj_units = num_proj_units
        self.backwards = backwards
        self.gradient_steps = gradient_steps
        self.grad_clipping = grad_clipping
        self.precompute_input = precompute_input

        input_shape = self.input_shapes[0]

        self.num_inputs = numpy.prod(input_shape[2:])

        self.ingate = ingate
        self.forgetgate = forgetgate
        self.cell = cell
        self.outgate = outgate

        self.nonlinearity_ingate = ingate.nonlinearity
        self.nonlinearity_forgetgate = forgetgate.nonlinearity
        self.nonlinearity_cell = cell.nonlinearity
        self.nonlinearity_outgate = outgate.nonlinearity

        self.cell_init = cell_init
        self.hid_init = hid_init

        self.use_layer_norm = use_layer_norm

        self.init_weights()
def build_model(
    batch_size,
    num_channels,
    input_length,
    output_dim,
):
    l_in = layers.InputLayer(
        shape=(batch_size, num_channels, input_length),
        name='l_in',
    )

    l_conv1 = Conv1DLayer(
        l_in,
        name='conv1',
        num_filters=8,
        border_mode='valid',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_pool1 = MaxPool1DLayer(
        l_conv1,
        name='pool1',
        pool_size=3,
        stride=2,
    )

    l_conv2 = Conv1DLayer(
        l_pool1,
        name='conv2',
        num_filters=16,
        border_mode='valid',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_pool2 = MaxPool1DLayer(
        l_conv2,
        name='pool2',
        pool_size=3,
        stride=2,
    )

    l_dropout_dense1 = layers.DropoutLayer(
        #l_pool4,
        l_pool2,
        p=0.5,
    )

    l_dense1 = layers.DenseLayer(
        l_dropout_dense1,
        num_units=32,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_out = layers.DenseLayer(
        l_dense1,
        num_units=output_dim,
        nonlinearity=nonlinearities.sigmoid,
        W=init.Orthogonal(),
    )

    return l_out
示例#19
0
def estimator(protocol,
              classifier,
              n_features,
              files,
              X,
              labels,
              run,
              fold,
              eval_size=0.1):

    final_weights = 'weights/final_%s_%s_fold_%s.pkl' % (classifier, run, fold)

    if classifier == "SVM":
        if os.path.exists(final_weights):
            est = joblib.load(final_weights)

        else:
            svm = SVC(kernel='linear',
                      class_weight='balanced',
                      cache_size=5500,
                      probability=True)
            if protocol != 'protocol3':
                svm_model = svm
                param_grid = {"C": [1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4]}
                cv = StratifiedShuffleSplit(labels.reshape(
                    (labels.shape[0], )),
                                            n_iter=10,
                                            test_size=0.1,
                                            random_state=0)
                est = GridSearchCV(svm_model,
                                   param_grid=param_grid,
                                   scoring='roc_auc',
                                   n_jobs=15,
                                   cv=cv,
                                   verbose=2)
                est.fit(X, labels.reshape((labels.shape[0], )))
            else:
                param_grid = {
                    "estimator__C":
                    [1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4]
                }
                binarized_labels = label_binarize(np.squeeze(labels),
                                                  classes=[0, 1, 2])
                svm_model = OneVsRestClassifier(svm)
                cv = StratifiedShuffleSplit(binarized_labels,
                                            n_iter=10,
                                            test_size=0.1,
                                            random_state=0)
                est = GridSearchCV(svm_model,
                                   param_grid=param_grid,
                                   scoring='roc_auc',
                                   n_jobs=15,
                                   cv=cv,
                                   verbose=2)
                est.fit(X, binarized_labels)

            est = est.best_estimator_
            print("Best estimator found by grid search for %s: " %
                  (classifier))
            print(est)

            # Persistence
            #joblib.dump(est, final_weights)

    elif classifier == "RF":
        if os.path.exists(final_weights):
            est = joblib.load(final_weights)

        else:
            #for criterion in ["gini","entropy"]:
            #                    for n_estimators in [10, 50, 100, 200]:#, 200, 250, 500, 750, 1000]:
            #                            for max_features in [None]: #"auto", "sqrt", "log2",
            #                                    # We are not using class_weight='auto'. Error in sklearn

            param_grid = {
                'criterion': ['gini', 'entropy'],
                'n_estimators': [50, 100, 200, 300, 10, 250, 500, 750]
            }
            est = GridSearchCV(RandomForestClassifier(max_features="auto"),
                               param_grid=param_grid,
                               n_jobs=-1,
                               verbose=2)
            print(X[:3])
            est.fit(X, labels.reshape((labels.shape[0], )))

            est = est.best_estimator_
            print("Best estimator found by grid search for %s: " %
                  (classifier))
            print(est)

            # Persistence
            joblib.dump(est, final_weights)

    else:
        layers = [
            (InputLayer, {
                'shape': (None, n_features)
            }),
            (DenseLayer, {
                'num_units': N_HIDDEN_1,
                'nonlinearity': rectify,
                'W': init.Orthogonal('relu'),
                'b': init.Constant(0.01)
            }),
            (FeaturePoolLayer, {
                'pool_size': 2
            }),
            (DenseLayer, {
                'num_units': N_HIDDEN_2,
                'nonlinearity': rectify,
                'W': init.Orthogonal('relu'),
                'b': init.Constant(0.01)
            }),
            (FeaturePoolLayer, {
                'pool_size': 2
            }),
            (DenseLayer, {
                'num_units': 2,
                'nonlinearity': softmax
            }),
        ]
        args = dict(
            update=adam,
            update_learning_rate=theano.shared(util.float32(START_LR)),
            batch_iterator_train=ResampleIterator(BATCH_SIZE),
            batch_iterator_test=BatchIterator(BATCH_SIZE),
            objective=nn.get_objective(l1=L1, l2=L2),
            eval_size=eval_size,
            custom_scores=[('kappa',
                            metrics.kappa)] if eval_size > 0.0 else None,
            on_epoch_finished=[
                nn.Schedule('update_learning_rate', SCHEDULE),
            ],
            regression=False,
            max_epochs=N_ITER,
            verbose=1,
        )
        est = BlendNet(layers, **args)
        if os.path.exists(final_weights):
            est.load_params_from(str(final_weights))
            print("loaded weights from {}".format(final_weights))

        else:
            est.set_split(files, labels)
            est.fit(X, labels)

            #Persistence
            #est.save_params_to(final_weights)

    return est
示例#20
0
文件: layers.py 项目: gunkisu/asr
    def __init__(self,
                 incoming,
                 num_prj,
                 num_units,
                 ingate=Gate(),
                 forgetgate=Gate(b=init.Constant(1.)),
                 cell=Gate(W_cell=None, nonlinearity=nonlinearities.tanh),
                 outgate=Gate(),
                 nonlinearity=nonlinearities.tanh,
                 cell_init=init.Constant(0.),
                 hid_init=init.Constant(0.),
                 dropout_ratio=0.2,
                 weight_noise=0.0,
                 backwards=False,
                 learn_init=False,
                 peepholes=True,
                 gradient_steps=-1,
                 grad_clipping=0,
                 unroll_scan=False,
                 mask_input=None,
                 only_return_final=False,
                 only_return_hidden=True,
                 **kwargs):

        incomings = [incoming]
        self.mask_incoming_index = -1
        if mask_input is not None:
            incomings.append(mask_input)
            self.mask_incoming_index = len(incomings) - 1

        self.hid_init_incoming_index = -1
        if isinstance(hid_init, Layer):
            incomings.append(hid_init)
            self.hid_init_incoming_index = len(incomings) - 1

        self.cell_init_incoming_index = -1
        if isinstance(cell_init, Layer):
            incomings.append(cell_init)
            self.cell_init_incoming_index = len(incomings) - 1

        # Initialize parent layer
        super(LSTMPLayer, self).__init__(incomings, **kwargs)

        # for dropout
        self.binomial = RandomStreams(get_rng().randint(1,
                                                        2147462579)).binomial
        self.p = dropout_ratio

        # for weight noise
        self.normal = RandomStreams(get_rng().randint(1, 2147462579)).normal

        # If the provided nonlinearity is None, make it linear
        if nonlinearity is None:
            self.nonlinearity = nonlinearities.identity
        else:
            self.nonlinearity = nonlinearity

        self.weight_noise = weight_noise
        self.learn_init = learn_init
        self.num_prj = num_prj
        self.num_units = num_units
        self.backwards = backwards
        self.peepholes = peepholes
        self.gradient_steps = gradient_steps
        self.grad_clipping = grad_clipping
        self.unroll_scan = unroll_scan
        self.only_return_final = only_return_final
        self.only_return_hidden = only_return_hidden

        if unroll_scan and gradient_steps != -1:
            raise ValueError(
                "Gradient steps must be -1 when unroll_scan is true.")

        input_shape = self.input_shapes[0]
        if unroll_scan and input_shape[1] is None:
            raise ValueError("Input sequence length cannot be specified as "
                             "None when unroll_scan is True")

        #### weight init ####
        num_inputs = numpy.prod(input_shape[2:])

        def add_gate_params(gate, gate_name):
            return (self.add_param(spec=gate.W_in,
                                   shape=(num_inputs, num_units),
                                   name="W_in_to_{}".format(gate_name)),
                    self.add_param(spec=gate.W_hid,
                                   shape=(num_prj, num_units),
                                   name="W_hid_to_{}".format(gate_name)),
                    self.add_param(spec=gate.b,
                                   shape=(num_units, ),
                                   name="b_{}".format(gate_name),
                                   regularizable=False), gate.nonlinearity)

        #### ingate ####
        (self.W_in_to_ingate, self.W_hid_to_ingate, self.b_ingate,
         self.nonlinearity_ingate) = add_gate_params(ingate, 'ingate')

        #### forgetgate ####
        (self.W_in_to_forgetgate, self.W_hid_to_forgetgate, self.b_forgetgate,
         self.nonlinearity_forgetgate) = add_gate_params(
             forgetgate, 'forgetgate')

        #### cell ####
        (self.W_in_to_cell, self.W_hid_to_cell, self.b_cell,
         self.nonlinearity_cell) = add_gate_params(cell, 'cell')

        #### outgate ####
        (self.W_in_to_outgate, self.W_hid_to_outgate, self.b_outgate,
         self.nonlinearity_outgate) = add_gate_params(outgate, 'outgate')

        #### peepholes ####
        if self.peepholes:
            self.W_cell_to_ingate = self.add_param(spec=ingate.W_cell,
                                                   shape=(num_units, ),
                                                   name="W_cell_to_ingate")

            self.W_cell_to_forgetgate = self.add_param(
                spec=forgetgate.W_cell,
                shape=(num_units, ),
                name="W_cell_to_forgetgate")

            self.W_cell_to_outgate = self.add_param(spec=outgate.W_cell,
                                                    shape=(num_units, ),
                                                    name="W_cell_to_outgate")

        #### hidden projection ####
        self.W_hid_projection = self.add_param(spec=init.Orthogonal(),
                                               shape=(num_units, num_prj),
                                               name="W_cell_to_outgate")

        # Setup initial values for the cell and the hidden units
        if isinstance(cell_init, Layer):
            self.cell_init = cell_init
        else:
            self.cell_init = self.add_param(cell_init, (1, num_units),
                                            name="cell_init",
                                            trainable=learn_init,
                                            regularizable=False)

        if isinstance(hid_init, Layer):
            self.hid_init = hid_init
        else:
            self.hid_init = self.add_param(hid_init, (1, num_prj),
                                           name="hid_init",
                                           trainable=learn_init,
                                           regularizable=False)
示例#21
0
def build_model(
    batch_size,
    num_channels,
    input_length,
    output_dim,
):
    l_in = layers.InputLayer(
        shape=(batch_size, num_channels, input_length),
        name='l_in',
    )

    l_sampling = SubsampleLayer(
        l_in,
        window=(None, None, 5),
        name='l_sampling',
    )

    l_window = WindowNormLayer(
        l_sampling,
        name='l_window',
    )

    l_conv1 = Conv1DLayer(
        l_window,
        name='conv1',
        num_filters=16,
        pad='same',
        filter_size=1,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_conv2 = Conv1DLayer(
        l_conv1,
        name='conv2',
        num_filters=8,
        pad='same',
        filter_size=1,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_pool2 = MaxPool1DLayer(
        l_conv2,
        name='pool2',
        pool_size=3,
        stride=2,
    )

    l_conv3 = Conv1DLayer(
        l_pool2,
        name='conv3',
        num_filters=32,
        pad='same',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_conv4 = Conv1DLayer(
        l_conv3,
        name='conv4',
        num_filters=16,
        pad='same',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_pool4 = MaxPool1DLayer(
        l_conv4,
        name='pool4',
        pool_size=3,
        stride=2,
    )

    l_conv5 = Conv1DLayer(
        l_pool4,
        name='conv5',
        num_filters=64,
        pad='same',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_conv6 = Conv1DLayer(
        l_conv5,
        name='conv6',
        num_filters=32,
        pad='same',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_pool6 = MaxPool1DLayer(
        l_conv6,
        name='pool6',
        pool_size=3,
        stride=2,
    )

    l_conv7 = Conv1DLayer(
        l_pool6,
        name='conv7',
        num_filters=64,
        pad='same',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_conv8 = Conv1DLayer(
        l_conv7,
        name='conv8',
        num_filters=32,
        pad='same',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_pool8 = MaxPool1DLayer(
        l_conv8,
        name='pool8',
        pool_size=3,
        stride=2,
    )

    l_dropout_dense1 = layers.DropoutLayer(
        l_pool8,
        p=0.5,
    )

    l_dense1 = layers.DenseLayer(
        l_dropout_dense1,
        num_units=64,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_dropout_dense2 = layers.DropoutLayer(
        l_dense1,
        p=0.5,
    )

    l_dense2 = layers.DenseLayer(
        l_dropout_dense2,
        num_units=64,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_out = layers.DenseLayer(
        l_dense2,
        num_units=output_dim,
        nonlinearity=nonlinearities.sigmoid,
        W=init.Orthogonal(),
    )

    return l_out
def build_model(batch_size,
                num_channels,
                input_length,
                output_dim,):
    l_in = layers.InputLayer(
        shape=(batch_size, num_channels, input_length),
        name='input',
    )

    # window size should be 1600 for this network
    l_ss_left = SubsampleLayer(
        l_in,
        window=(None, 1000, 10),
        name='l_ss_left',
    )

    l_ss_right = SubsampleLayer(
        l_in,
        window=(1000, None, 10),
        name='l_ss_right',
    )

    #l_window_left = WindowNormLayer(
    #    l_ss_left,
    #    name='l_window_left',
    #)

    #l_window_right = WindowNormLayer(
    #    l_ss_right,
    #    name='l_window_right',
    #)

    l_conv1_left = Conv1DLayer(
        #l_window_left,
        l_ss_left,
        name='conv1_left',
        num_filters=8,
        border_mode='valid',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_conv1_right = Conv1DLayer(
        #l_window_right,
        l_ss_right,
        name='conv1_right',
        num_filters=8,
        border_mode='valid',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_pool1_left = MaxPool1DLayer(
        l_conv1_left,
        name='pool1_left',
        pool_size=3,
        stride=2,
    )

    l_pool1_right = MaxPool1DLayer(
        l_conv1_right,
        name='pool1_right',
        pool_size=3,
        stride=2,
    )

    l_dropout_conv2_left = layers.DropoutLayer(
        l_pool1_left,
        name='drop_conv2_left',
        p=0.1,
    )

    l_dropout_conv2_right = layers.DropoutLayer(
        l_pool1_right,
        name='drop_conv2_right',
        p=0.1,
    )

    l_conv2_left = Conv1DLayer(
        l_dropout_conv2_left,
        name='conv2_left',
        num_filters=16,
        border_mode='valid',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_conv2_right = Conv1DLayer(
        l_dropout_conv2_right,
        name='conv2_right',
        num_filters=16,
        border_mode='valid',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_dropout_conv3_left = layers.DropoutLayer(
        l_conv2_left,
        name='drop_conv3_left',
        p=0.2,
    )

    l_dropout_conv3_right = layers.DropoutLayer(
        l_conv2_right,
        name='drop_conv3_right',
        p=0.2,
    )

    l_conv3_left = Conv1DLayer(
        l_dropout_conv3_left,
        name='conv3_left',
        num_filters=32,
        border_mode='valid',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_conv3_right = Conv1DLayer(
        l_dropout_conv3_right,
        name='conv3_right',
        num_filters=32,
        border_mode='valid',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_pool3_left = MaxPool1DLayer(
        l_conv3_left,
        name='pool3_left',
        pool_size=3,
        stride=2,
    )

    l_pool3_right = MaxPool1DLayer(
        l_conv3_right,
        name='pool3_right',
        pool_size=3,
        stride=2,
    )

    l_concat = layers.ConcatLayer(
        incomings=(l_pool3_left, l_pool3_right),
        name='concat',
    )

    l_dropout_dense1 = layers.DropoutLayer(
        l_concat,
        name='drop_dense1',
        p=0.5,
    )

    l_dense1 = layers.DenseLayer(
        l_dropout_dense1,
        name='dense1',
        num_units=128,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_dropout_dense2 = layers.DropoutLayer(
        l_dense1,
        name='drop_dense2',
        p=0.5,
    )

    l_dense2 = layers.DenseLayer(
        l_dropout_dense2,
        name='dense2',
        num_units=128,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_out = layers.DenseLayer(
        l_dense2,
        name='output',
        num_units=output_dim,
        nonlinearity=nonlinearities.sigmoid,
        W=init.Orthogonal(),
    )

    return l_out
示例#23
0
class DrawLayer(Layer):
    '''
    Implements the draw model.

    The input to the model should be flattened images. Set the original
    image shape with img shp

    nb. Glorot init will not work



    REFS
    Gregor, K., Danihelka, I., Graves, A., & Wierstra, D. (2015).
    DRAW: A Recurrent Neural Network For Image Generation.
    arXiv Preprint arXiv:1502.04623.
    '''
    ini = init.Normal(std=0.01, mean=0.0)
    zero = init.Constant(0.)
    ortho = init.Orthogonal(np.sqrt(2))

    def __init__(self,
                 input_layer,
                 num_units_encoder_and_decoder,
                 glimpses,
                 dimz,
                 imgshp,
                 N_filters_read,
                 N_filters_write,
                 W_x_to_gates=ini,
                 W_cell_to_gates=zero,
                 b_gates=zero,
                 W_read=ini,
                 b_read=zero,
                 W_write=ini,
                 b_write=zero,
                 nonlinearity_ingate=nonlinearities.sigmoid,
                 nonlinearity_forgetgate=nonlinearities.sigmoid,
                 nonlinearity_modulationgate=nonlinearities.tanh,
                 nonlinearity_outgate=nonlinearities.sigmoid,
                 nonlinearities_out_encoder=nonlinearities.tanh,
                 nonlinearities_out_decoder=nonlinearities.tanh,
                 cell_init=zero,
                 hid_init=zero,
                 canvas_init=zero,
                 W_dec_to_canvas=ini,
                 W_enc_to_mu_z=ini,
                 learn_hid_init=False,
                 learn_canvas_init=True,
                 peepholes=False,
                 x_distribution='bernoulli',
                 qz_distribution='gaussian',
                 pz_distribution='gaussian',
                 read_init=None,
                 n_classes=None,
                 use_y=False,
                 grad_clip_vals_out=[-1.0, 1.0],
                 grad_clip_vals_in=[-10, 10]):
        """
        :param input_layer: Lasagne input layer
        :param num_units_encoder_and_decoder:  Number of units in encoder and
               decoder
        :param glimpses: Number of times the networks sees and tries to
                         reconstruct the image
        :param dimz: Size of latent layer
        :param imgshp: list, [height, width]
        :param N_filters_read:  int
        :param N_filters_write: int
        :param W_x_to_gates:   function or np.ndarray or theano.shared
        :param W_cell_to_gates: function or np.ndarray or theano.shared
        :param b_gates: function or np.ndarray or theano.shared
        :param W_read:  function or np.ndarray or theano.shared
        :param b_read:  function or np.ndarray or theano.shared
        :param W_write: function or np.ndarray or theano.shared
        :param b_write: function or np.ndarray or theano.shared
        :param nonlinearity_ingate: function
        :param nonlinearity_forgetgate: function
        :param nonlinearity_modulationgate: function
        :param nonlinearity_outgate: function
        :param nonlinearities_out_encoder: function
        :param nonlinearities_out_decoder: function
        :param cell_init: function or np.ndarray or theano.shared
        :param hid_init:  function or np.ndarray or theano.shared
        :param canvas_init:  function or np.ndarray or theano.shared
        :param W_dec_to_canvas: function or np.ndarray or theano.shared
        :param W_enc_to_mu_z:   function or np.ndarray or theano.shared
        :param learn_hid_init:  boolean. If true cell and hid inits are learned
        :param learn_canvas_init: boolean. Learn canvas init. To start with a
                                 blank canvas set this to False
        :param peepholes: boolean. LSTM with or without peepholes
        :param x_distribution: str. Distribution of input data. Only supports
                                'bernoulli'
        :param qz_distribution: distribution of q(z|x), only supports
                                'gaussianmarg'
        :param pz_distribution: prior on z, p(z), only supports 'gaussianmarg'
        :param read_init: None or nd.array of length 5 with initial values
                          for reading operation. If you want to change this
                          you should probly change it so the models sees a
                          blurry version of the entire image.
        :param n_classes: int, Number if classes. required if use_y=True
        :param use_y: boolean. If true models p(x,y) else p(x)
        :param grad_clip_vals_out: Clipping of gradients with grad_clip
        :param grad_clip_vals_in: Clipping of gradients with grad_clip
        """

        # Initialize parent layer
        super(DrawLayer, self).__init__(input_layer)
        # For any of the nonlinearities, if None is supplied, use identity
        if nonlinearity_ingate is None:
            self.nonlinearity_ingate = nonlinearities.identity
        else:
            self.nonlinearity_ingate = nonlinearity_ingate

        if nonlinearity_forgetgate is None:
            self.nonlinearity_forgetgate = nonlinearities.identity
        else:
            self.nonlinearity_forgetgate = nonlinearity_forgetgate

        if nonlinearity_modulationgate is None:
            self.nonlinearity_modulationgate = nonlinearities.identity
        else:
            self.nonlinearity_modulationgate = nonlinearity_modulationgate

        if nonlinearity_outgate is None:
            self.nonlinearity_outgate = nonlinearities.identity
        else:
            self.nonlinearity_outgate = nonlinearity_outgate
        if x_distribution not in ['bernoulli']:
            raise NotImplementedError
        if pz_distribution not in ['gaussianmarg']:
            raise NotImplementedError
        if qz_distribution not in ['gaussianmarg']:
            raise NotImplementedError

        if use_y is True and n_classes is None:
            raise ValueError('n_classes must be given when use_y is true')
        self.learn_hid_init = learn_hid_init
        self.learn_canvas_init = learn_canvas_init
        self.num_units_encoder_and_decoder = num_units_encoder_and_decoder
        self.peepholes = peepholes
        self.glimpses = glimpses
        self.dimz = dimz
        self.nonlinearity_out_encoder = nonlinearities_out_encoder
        self.nonlinearity_out_decoder = nonlinearities_out_decoder
        self.x_distribution = x_distribution
        self.qz_distribution = qz_distribution
        self.pz_distribution = pz_distribution
        self.N_filters_read = N_filters_read
        self.N_filters_write = N_filters_write
        self.imgshp = imgshp
        self.n_classes = n_classes
        self.use_y = use_y
        self.grad_clip_vals_out = grad_clip_vals_out
        self.grad_clip_vals_in = grad_clip_vals_in

        # Input dimensionality is the output dimensionality of the input layer
        num_batch, num_inputs = self.input_layer.output_shape
        self.num_batch = num_batch
        self.num_inputs = num_inputs

        if self.peepholes:
            self.W_cellenc_to_enc_gates = self.add_param(
                W_cell_to_gates, [3 * num_units_encoder_and_decoder])
            self.W_celldec_to_dec_gates = self.add_param(
                W_cell_to_gates, [3 * num_units_encoder_and_decoder])
            self.W_cellenc_to_enc_gates.name = "DrawLayer: W_cellenc_to_enc_gates"
            self.W_celldec_to_dec_gates.name = "DrawLayer: W_celldec_to_dec_gates"
        else:
            self.W_cellenc_to_enc_gates = []
            self.W_celldec_to_dec_gates = []

        # enc
        self.b_gates_enc = self.add_param(b_gates,
                                          [4 * num_units_encoder_and_decoder])

        # extra input applies to both encoder and decoder
        if self.use_y:
            # if y is modelled its concatenated to the x input to the encoder
            # and the z input to the decoder. We need to expand the
            # corresponding matrices to handle this.
            extra_input = self.n_classes
        else:
            extra_input = 0

        self.W_enc_gates = self.add_param(W_x_to_gates, [
            2 * N_filters_read * N_filters_read +
            num_units_encoder_and_decoder + extra_input,
            4 * num_units_encoder_and_decoder
        ])

        self.W_hid_to_gates_enc = self.add_param(
            W_x_to_gates,
            [num_units_encoder_and_decoder, 4 * num_units_encoder_and_decoder])

        self.b_gates_dec = self.add_param(b_gates,
                                          [4 * num_units_encoder_and_decoder])
        self.W_z_to_gates_dec = self.add_param(
            W_x_to_gates,
            [dimz + extra_input, 4 * num_units_encoder_and_decoder])
        self.W_hid_to_gates_dec = self.add_param(
            W_x_to_gates,
            [num_units_encoder_and_decoder, 4 * num_units_encoder_and_decoder])

        # Setup initial values for the cell and the lstm hidden units
        if self.learn_hid_init:
            self.cell_init_enc = self.add_param(
                cell_init, (1, num_units_encoder_and_decoder))
            self.hid_init_enc = self.add_param(
                hid_init, (1, num_units_encoder_and_decoder))
            self.cell_init_dec = self.add_param(
                cell_init, (1, num_units_encoder_and_decoder))
            self.hid_init_dec = self.add_param(
                hid_init, (1, num_units_encoder_and_decoder))

        else:  # init at zero + they will not be returned as parameters
            self.cell_init_enc = T.zeros((1, num_units_encoder_and_decoder))
            self.hid_init_enc = T.zeros((1, num_units_encoder_and_decoder))
            self.cell_init_dec = T.zeros((1, num_units_encoder_and_decoder))
            self.hid_init_dec = T.zeros((1, num_units_encoder_and_decoder))

        if self.learn_canvas_init:
            self.canvas_init = self.add_param(canvas_init, (1, num_inputs))
        else:
            self.canvas_init = T.zeros((1, num_inputs))

        # decoder to canvas
        self.W_dec_to_canvas_patch = self.add_param(
            W_dec_to_canvas,
            (num_units_encoder_and_decoder, N_filters_write * N_filters_write))

        # variational weights
        # TODO: Make the sizes more flexible, they are not required to be equal

        self.W_enc_to_z_mu = self.add_param(
            W_enc_to_mu_z, (self.num_units_encoder_and_decoder, self.dimz))
        self.b_enc_to_z_mu = self.add_param(b_gates, (self.dimz, ))
        self.W_enc_to_z_sigma = self.add_param(
            W_enc_to_mu_z, (self.num_units_encoder_and_decoder, self.dimz))
        self.b_enc_to_z_sigma = self.add_param(b_gates, (self.dimz, ))

        self.b_gates_enc.name = "DrawLayer: b_gates_enc"
        self.b_gates_dec.name = "DrawLayer: b_gates_dec"
        self.W_enc_gates.name = "DrawLayer: W_x_to_gates_enc"
        self.W_hid_to_gates_enc.name = "DrawLayer: W_hid_to_gates_enc"
        self.W_z_to_gates_dec.name = "DrawLayer: W_z_to_gates_dec"
        self.W_hid_to_gates_dec.name = "DrawLayer: W_hid_to_gates_dec"
        self.W_enc_to_z_mu.name = "DrawLayer: W_enc_to_z_mu"
        self.b_enc_to_z_mu.name = "DrawLayer: b_enc_to_z_mu"
        self.W_enc_to_z_sigma.name = "DrawLayer: W_enc_to_z_sigma"
        self.b_enc_to_z_sigma.name = "DrawLayer: b_enc_to_z_sigma"
        self.W_dec_to_canvas_patch.name = "DrawLayer: W_dec_to_canvas"

        self.cell_init_enc.name = "DrawLayer: cell_init_enc"
        self.hid_init_enc.name = "DrawLayer: hid_init_enc"
        self.cell_init_dec.name = "DrawLayer: cell_init_dec"
        self.hid_init_dec.name = "DrawLayer: hid_init_dec"
        self.canvas_init.name = "DrawLayer: canvas_init"

        # init values for read operation.
        delta_read = 1  #
        gamma = 1.0
        sigma_read = 1.0
        center_y = 0.
        center_x = 0.
        if read_init is None:
            read_init = np.array([[
                center_y, center_x,
                np.log(delta_read),
                np.log(sigma_read),
                np.log(gamma)
            ]])
            read_init = read_init.astype(theano.config.floatX)
        print("Read init is", read_init)

        self.W_read = self.add_param(W_read,
                                     (num_units_encoder_and_decoder, 5))
        self.W_write = self.add_param(W_write,
                                      (num_units_encoder_and_decoder, 5))
        self.b_read = self.add_param(b_read, (5, ))
        self.b_write = self.add_param(b_write, (5, ))
        self.read_init = self.add_param(read_init, (1, 5))
        self.W_read.name = "DrawLayer: W_read"
        self.W_write.name = "DrawLayer: W_write"
        self.b_read.name = "DrawLayer: b_read"
        self.b_write.name = "DrawLayer: b_write"

    def get_read_init(self):
        return self.read_init

    def get_params(self):
        '''
        Get all parameters of this layer.

        :returns:
            - params : list of theano.shared
                List of all parameters
        '''
        params = self.get_weight_params() + self.get_bias_params()
        if self.peepholes:
            params.extend(self.get_peephole_params())

        if self.learn_hid_init:
            params.extend(self.get_init_params())

        if self.learn_canvas_init:
            params += [self.canvas_init]

        return params

    def get_weight_params(self):
        '''
        Get all weights of this layer
        :returns:
            - weight_params : list of theano.shared
                List of all weight parameters
        '''
        return [
            self.W_enc_gates, self.W_hid_to_gates_enc, self.W_z_to_gates_dec,
            self.W_hid_to_gates_dec, self.W_dec_to_canvas_patch,
            self.W_enc_to_z_mu, self.W_enc_to_z_sigma, self.W_read,
            self.W_write
        ]

    def get_peephole_params(self):
        '''
        Get all peephole parameters of this layer.
        :returns:
            - init_params : list of theano.shared
                List of all peephole parameters
        '''
        return [self.W_cellenc_to_enc_gates, self.W_celldec_to_dec_gates]

    def get_init_params(self):
        '''
        Get all initital parameters of this layer.
        :returns:
            - init_params : list of theano.shared
                List of all initial parameters
        '''
        if self.learn_hid_init:
            params = [
                self.hid_init_enc, self.cell_init_enc, self.hid_init_dec,
                self.cell_init_dec
            ]
        else:
            params = []
        return params

    def get_bias_params(self):
        '''
        Get all bias parameters of this layer.

        :returns:
            - bias_params : list of theano.shared
                List of all bias parameters
        '''
        params = [
            self.b_gates_enc, self.b_gates_dec, self.b_enc_to_z_mu,
            self.b_enc_to_z_sigma, self.b_read, self.b_write
        ]

        return params

    def get_output_shape_for(self, input_shape):
        '''
        Compute the expected output shape given the input.

        :parameters:
            - input_shape : tuple
                Dimensionality of expected input

        :returns:
            - output_shape : tuple
                Dimensionality of expected outputs given input_shape
        '''
        return self.input_shape

    def _lstm(self, gates, cell_previous, W_cell_to_gates, nonlinearity_out):
        # LSTM step
        # Gate names are taken from http://arxiv.org/abs/1409.2329 figure 1
        def slice_w(x, n):
            start = n * self.num_units_encoder_and_decoder
            stop = (n + 1) * self.num_units_encoder_and_decoder
            return x[:, start:stop]

        def slice_c(x, n):
            start = n * self.num_units_encoder_and_decoder
            stop = (n + 1) * self.num_units_encoder_and_decoder
            return x[start:stop]

        def clip(x):
            return theano.gradient.grad_clip(x, self.grad_clip_vals_in[0],
                                             self.grad_clip_vals_in[1])

        ingate = slice_w(gates, 0)
        forgetgate = slice_w(gates, 1)
        modulationgate = slice_w(gates, 2)
        outgate = slice_w(gates, 3)

        if self.peepholes:
            ingate += cell_previous * slice_c(W_cell_to_gates, 0)
            forgetgate += cell_previous * slice_c(W_cell_to_gates, 1)

        if self.grad_clip_vals_in is not None:
            print('STEP: CLipping gradients IN', self.grad_clip_vals_in)
            ingate = clip(ingate)
            forgetgate = clip(forgetgate)
            modulationgate = clip(modulationgate)
        ingate = self.nonlinearity_ingate(ingate)
        forgetgate = self.nonlinearity_forgetgate(forgetgate)
        modulationgate = self.nonlinearity_modulationgate(modulationgate)
        if self.grad_clip_vals_in is not None:
            ingate = clip(ingate)
            forgetgate = clip(forgetgate)
            modulationgate = clip(modulationgate)

        cell = forgetgate * cell_previous + ingate * modulationgate
        if self.peepholes:
            outgate += cell * slice_c(W_cell_to_gates, 2)

        if self.grad_clip_vals_in is not None:
            outgate = clip(outgate)

        outgate = self.nonlinearity_outgate(outgate)
        if self.grad_clip_vals_in is not None:
            outgate = clip(outgate)

        hid = outgate * nonlinearity_out(cell)
        return [cell, hid]

    def get_cost(self, x, y=None, *args, **kwargs):
        """
        Compute layer cost.

        :parameters:
            - input : theano.TensorType
                Symbolic input variable

        :returns:
            - layer_output : theano.TensorType
                Symbolic output variable
        """

        if y is None and self.use_y is True:
            raise ValueError('y must be given when use_y is true')

        def step(
            eps_n,
            ######### REUCCRENT
            cell_previous_enc,
            hid_previous_enc,
            cell_previous_dec,
            hid_previous_dec,
            canvas_previous,
            mu_z_previous,
            log_sigma_z_previous,
            z_previous,
            l_read_previous,
            l_write_previous,
            #kl_previous,
            ######### x and WEIGHTS
            x,
            y,
            W_enc_gates,
            W_hid_to_gates_enc,
            b_gates_enc,
            W_cellenc_to_enc_gates,
            W_read,
            b_read,
            W_z_to_gates_dec,
            b_gates_dec,
            W_hid_to_gates_dec,
            W_celldec_to_dec_gates,
            W_enc_to_z_mu,
            b_enc_to_z_mu,
            W_enc_to_z_sigma,
            b_enc_to_z_sigma,
            W_dec_to_canvas_patch,
            W_write,
            b_write,
        ):
            # calculate gates pre-activations and slice
            N_read = self.N_filters_read
            N_write = self.N_filters_write
            img_shp = self.imgshp

            x_err = x - T.nnet.sigmoid(canvas_previous)
            att_read = nn2att(l_read_previous, N_read, img_shp)
            x_org_in, x_err_in = read(x, x_err, att_read, N_read, img_shp)

            x_org_in = att_read['gamma'] * x_org_in
            x_err_in = att_read['gamma'] * x_err_in

            if self.use_y:
                in_gates_enc = T.concatenate(
                    [y, x_org_in, x_err_in, hid_previous_dec], axis=1)
            else:
                in_gates_enc = T.concatenate(
                    [x_org_in, x_err_in, hid_previous_dec], axis=1)

            # equation (5)~ish
            #slice_gates_idx = 4*self.num_units_encoder_and_decoder
            # ENCODER
            gates_enc = T.dot(in_gates_enc, W_enc_gates) + b_gates_enc
            gates_enc += T.dot(hid_previous_enc, W_hid_to_gates_enc)
            #gates_enc +=T.dot(hid_previous_enc, W_hidenc_to_enc_gates)
            cell_enc, hid_enc = self._lstm(gates_enc, cell_previous_enc,
                                           W_cellenc_to_enc_gates,
                                           self.nonlinearity_out_encoder)

            # VARIATIONAL
            # eq 6
            mu_z = T.dot(hid_enc, W_enc_to_z_mu) + b_enc_to_z_mu
            log_sigma_z = 0.5 * (T.dot(hid_enc, W_enc_to_z_sigma) +
                                 b_enc_to_z_sigma)
            z = mu_z + T.exp(log_sigma_z) * eps_n

            if self.use_y:
                print('STEP: using Y')
                in_gates_dec = T.concatenate([y, z], axis=1)
            else:
                print('STEP: Not using Y')
                in_gates_dec = z

            # DECODER
            gates_dec = T.dot(in_gates_dec,
                              W_z_to_gates_dec) + b_gates_dec  # i_dec
            gates_dec += T.dot(hid_previous_dec, W_hid_to_gates_dec)
            # equation (7)
            cell_dec, hid_dec = self._lstm(gates_dec, cell_previous_dec,
                                           W_celldec_to_dec_gates,
                                           self.nonlinearity_out_decoder)

            # WRITE
            l_write = T.dot(hid_dec, W_write) + b_write
            w = T.dot(hid_dec, W_dec_to_canvas_patch)
            att_write = nn2att(l_write, N_write, img_shp)
            canvas_upd = write(w, att_write, N_write, img_shp)
            canvas_upd = 1.0 / (att_write['gamma'] + 1e-4) * canvas_upd
            canvas = canvas_previous + canvas_upd

            l_read = T.dot(hid_dec, W_read) + b_read

            # Todo: some of the (all?) gradient clips are redundant
            # + I'm unsure if I use grad_clip correct and in correct places...
            # The description of gradient clipping is in
            # Generating sequences with recurrent neural networks
            # section: 2.1 Long Short-Term Memory
            #
            #if self.grad_clip_vals_out is not None:
            #    print('STEP: CLipping gradients Out', self.grad_clip_vals_out)
            #    cell_enc = theano.gradient.grad_clip(cell_enc, self.grad_clip_vals_out[0], self.grad_clip_vals_out[1])
            #    hid_enc = theano.gradient.grad_clip(hid_enc, self.grad_clip_vals_out[0], self.grad_clip_vals_out[1])
            #    cell_dec = theano.gradient.grad_clip(cell_dec, self.grad_clip_vals_out[0], self.grad_clip_vals_out[1])
            #    hid_dec = theano.gradient.grad_clip(hid_dec, self.grad_clip_vals_out[0], self.grad_clip_vals_out[1])

            return [
                cell_enc, hid_enc, cell_dec, hid_dec, canvas, mu_z,
                log_sigma_z, z, l_read, l_write
            ]

        ones = T.ones((self.num_batch, 1))
        mu_z_init = T.zeros((self.num_batch, self.dimz))
        log_sigma_z_init = T.zeros((self.num_batch, self.dimz))
        z_init = T.zeros((self.num_batch, self.dimz))
        att_vals_write_init = T.zeros((self.num_batch, 5))

        if theano.config.compute_test_value is 'off':
            eps = _srng.normal((self.glimpses, self.num_batch))
        else:
            # for testing
            print("draw.py: is not using random generator" + "!#>" * 30)
            eps = T.ones(
                (self.glimpses, self.num_batch), theano.config.floatX) * 0.3

        if y is None:
            y = T.zeros((1))

        # Todo: cleanup this somehow
        # Todo: Will it slow down theano optimization if I dont pass in
        # non seqs as arguments, but just call them with self.XXXX?
        seqs = [eps]
        init = [
            T.dot(ones, self.cell_init_enc),
            T.dot(ones, self.hid_init_enc),
            T.dot(ones, self.cell_init_dec),
            T.dot(ones, self.hid_init_dec),
            T.dot(ones, self.canvas_init), mu_z_init, log_sigma_z_init, z_init,
            T.dot(ones, self.read_init), att_vals_write_init
        ]
        nonseqs_input = [x, y]
        nonseqs_enc = [
            self.W_enc_gates, self.W_hid_to_gates_enc, self.b_gates_enc,
            self.W_cellenc_to_enc_gates, self.W_read, self.b_read
        ]
        nonseqs_dec = [
            self.W_z_to_gates_dec, self.b_gates_dec, self.W_hid_to_gates_dec,
            self.W_celldec_to_dec_gates
        ]
        nonseqs_variational = [
            self.W_enc_to_z_mu, self.b_enc_to_z_mu, self.W_enc_to_z_sigma,
            self.b_enc_to_z_sigma
        ]
        nonseqs_other = [
            self.W_dec_to_canvas_patch, self.W_write, self.b_write
        ]
        non_seqs = nonseqs_input +  nonseqs_enc + nonseqs_dec + nonseqs_variational \
                   + nonseqs_other

        output_scan = theano.scan(step,
                                  sequences=seqs,
                                  outputs_info=init,
                                  non_sequences=non_seqs,
                                  go_backwards=False)[0]


        cell_enc, hid_enc, cell_dec, hid_dec, canvas, mu_z, log_sigma_z, \
        z, l_read, l_write = output_scan

        # because we model the output as bernoulli we take sigmoid to ensure
        # range (0,1)
        last_reconstruction = T.nnet.sigmoid(canvas[-1, :, :])
        # select distribution of p(x|z)

        # LOSS
        # The loss is the negative loglikelihood of the data plus the
        # KL divergence between the the variational approximation to z and
        # the prior on z:
        # Loss = -logD(x) + D_kl(Q(z|h)||p(z))
        # If we assume that x is bernoulli then
        # -logD(x) = -(t*log(o) +(1-t)*log(1-o)) = cross_ent(t,o)
        # D_kl(Q(z|h)||p(z)) can in some cases be solved analytically as
        # D_kl(Q(z|h)||p(z)) = 0.5(sum_T(mu^2 + sigma^2 - 1 -log(sigma^2)))
        # We add these terms and return minus the cost, i.e return the
        # lowerbound

        L_x = T.nnet.binary_crossentropy(last_reconstruction, x).sum()
        #L_x = cross_ent(last_reconstruction, x).sum()
        L_z = T.sum(0.5 *
                    (mu_z**2 + T.exp(log_sigma_z * 2) - 1 - log_sigma_z * 2))
        self.L_x = L_x
        self.L_z = L_z
        L = L_x + L_z

        self.canvas = canvas
        self.att_vals_read = l_read
        self.att_vals_write = l_write

        return L / self.num_batch

    def get_canvas(self):
        return T.nnet.sigmoid(self.canvas.dimshuffle(1, 0, 2))

    def get_att_vals(self):
        return self.att_vals_read.dimshuffle(1,0,2), \
               self.att_vals_write.dimshuffle(1,0,2)

    def get_logx(self):
        return self.L_x / self.num_batch

    def get_KL(self):
        return self.L_z / self.num_batch

    def generate(self, n_digits, y=None, *args, **kwargs):
        '''

        Generate digits see http://arxiv.org/abs/1502.04623v1 section 2.3

        '''

        if y is None and self.use_y is True:
            raise ValueError('y must be given when use_y is true')

        def step(z, cell_previous_dec, hid_previous_dec, canvas_previous,
                 l_write_previous, y, W_z_to_gates_dec, b_gates_dec,
                 W_hid_to_gates_dec, W_celldec_to_dec_gates,
                 W_dec_to_canvas_patch, W_write, b_write):
            N_write = self.N_filters_write
            img_shp = self.imgshp

            # DECODER
            if self.use_y:
                print('STEP: using Y')
                in_gates_dec = T.concatenate([y, z], axis=1)
            else:
                print('STEP: Not using Y')
                in_gates_dec = z

            gates_dec = T.dot(in_gates_dec, W_z_to_gates_dec) + b_gates_dec
            gates_dec += T.dot(hid_previous_dec, W_hid_to_gates_dec)
            # equation (7)
            cell_dec, hid_dec = self._lstm(gates_dec, cell_previous_dec,
                                           W_celldec_to_dec_gates,
                                           self.nonlinearity_out_decoder)

            # WRITE
            l_write = T.dot(hid_dec, W_write) + b_write
            w = T.dot(hid_dec, W_dec_to_canvas_patch)
            att_write = nn2att(l_write, N_write, img_shp)
            canvas_upd = write(w, att_write, N_write, img_shp)
            canvas_upd = 1.0 / (att_write['gamma'] + 1e-4) * canvas_upd
            canvas = canvas_previous + canvas_upd

            return [cell_dec, hid_dec, canvas, l_write]

        ones = T.ones((n_digits, 1))
        if theano.config.compute_test_value is 'off':
            z_samples = _srng.normal((self.glimpses, n_digits, self.dimz))
        else:
            print("draw.py: is not using random generator" + "!#>" * 30)
            z_samples = T.ones((self.glimpses, n_digits, self.dimz),
                               theano.config.floatX) * 0.3

        if y is None:
            y = T.zeros((1))
        att_vals_write_init = T.zeros((n_digits, 5))
        seqs = [z_samples]
        init = [
            T.dot(ones, self.cell_init_dec),
            T.dot(ones, self.hid_init_dec),
            T.dot(ones, self.canvas_init), att_vals_write_init
        ]
        non_seqs = [
            y, self.W_z_to_gates_dec, self.b_gates_dec,
            self.W_hid_to_gates_dec, self.W_celldec_to_dec_gates,
            self.W_dec_to_canvas_patch, self.W_write, self.b_write
        ]

        output_scan = theano.scan(step,
                                  sequences=seqs,
                                  outputs_info=init,
                                  non_sequences=non_seqs,
                                  go_backwards=False)[0]

        canvas = output_scan[2]
        l_write = output_scan[3]
        return T.nnet.sigmoid(canvas.dimshuffle(1, 0, 2)), l_write.dimshuffle(
            1, 0, 2)
def build_model(
    batch_size,
    num_channels,
    input_length,
    output_dim,
    subsample,
):
    l_in = layers.InputLayer(
        shape=(batch_size, num_channels, input_length),
        name='input',
    )

    l_sampling = SubsampleLayer(
        l_in,
        window=(None, None, subsample),
        name='l_sampling',
    )

    l_conv1 = Conv1DLayer(
        l_sampling,
        name='conv1',
        num_filters=8,
        border_mode='valid',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_pool1 = MaxPool1DLayer(
        l_conv1,
        name='pool1',
        pool_size=3,
        stride=2,
    )

    l_dropout_conv2 = layers.DropoutLayer(
        l_pool1,
        name='drop_conv2',
        p=0.2,
    )

    l_conv2 = Conv1DLayer(
        l_dropout_conv2,
        name='conv2',
        num_filters=16,
        border_mode='valid',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_dropout_conv3 = layers.DropoutLayer(
        l_conv2,
        name='drop_conv2',
        p=0.2,
    )

    l_conv3 = Conv1DLayer(
        l_dropout_conv3,
        name='conv2',
        num_filters=16,
        border_mode='valid',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_pool3 = MaxPool1DLayer(
        l_conv3,
        name='pool3',
        pool_size=3,
        stride=2,
    )

    l_dropout_conv4 = layers.DropoutLayer(
        l_pool3,
        name='drop_conv4',
        p=0.3,
    )

    l_conv4 = Conv1DLayer(
        l_dropout_conv4,
        name='conv4',
        num_filters=32,
        border_mode='valid',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_dropout_conv5 = layers.DropoutLayer(
        l_conv4,
        name='drop_conv5',
        p=0.3,
    )

    l_conv5 = Conv1DLayer(
        l_dropout_conv5,
        name='conv4',
        num_filters=32,
        border_mode='valid',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_pool5 = MaxPool1DLayer(
        l_conv5,
        name='pool5',
        pool_size=3,
        stride=2,
    )

    l_dropout_conv6 = layers.DropoutLayer(
        l_pool5,
        name='drop_conv4',
        p=0.4,
    )

    l_conv6 = Conv1DLayer(
        l_dropout_conv6,
        name='conv6',
        num_filters=64,
        border_mode='valid',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_dropout_conv7 = layers.DropoutLayer(
        l_conv6,
        name='drop_conv7',
        p=0.4,
    )

    l_conv7 = Conv1DLayer(
        l_dropout_conv7,
        name='conv7',
        num_filters=64,
        border_mode='valid',
        filter_size=3,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_pool7 = MaxPool1DLayer(
        l_conv7,
        name='pool7',
        pool_size=3,
        stride=2,
    )

    l_dropout_dense1 = layers.DropoutLayer(
        l_pool7,
        name='drop_dense1',
        p=0.5,
    )

    l_dense1 = layers.DenseLayer(
        l_dropout_dense1,
        name='dense1',
        num_units=128,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_dropout_dense2 = layers.DropoutLayer(
        l_dense1,
        name='drop_dense2',
        p=0.5,
    )

    l_dense2 = layers.DenseLayer(
        l_dropout_dense2,
        name='dense2',
        num_units=128,
        nonlinearity=nonlinearities.rectify,
        W=init.Orthogonal(),
    )

    l_out = layers.DenseLayer(
        l_dense2,
        name='output',
        num_units=output_dim,
        nonlinearity=nonlinearities.sigmoid,
        W=init.Orthogonal(),
    )

    return l_out