示例#1
0
    def __init__(self, output_size, meta_size, depth=2):

        encoder_sizes = [64, 64, 64]

        input_var = TT.matrix()
        meta_var = TT.matrix()
        target_var = TT.matrix()
        mask_var = TT.matrix()

        input_layer = layers.InputLayer((None, output_size), input_var=input_var)
        meta_layer = layers.InputLayer((None, meta_size), input_var=meta_var)
        concat_input_layer = layers.ConcatLayer([input_layer, meta_layer])
        dense = concat_input_layer

        for idx in xrange(depth):
            dense = layers.DenseLayer(dense, encoder_sizes[idx])
            dense = layers.batch_norm(dense)

        mu_and_logvar = layers.DenseLayer(dense, 2 * output_size, nonlinearity=nonlinearities.linear)
        mu = layers.SliceLayer(mu_and_logvar, slice(0, output_size), axis=1)
        log_var = layers.SliceLayer(mu_and_logvar, slice(output_size, None), axis=1)

        loss = neg_log_likelihood2(
            target_var,
            layers.get_output(mu),
            layers.get_output(log_var),
            mask_var
        ).mean()

        test_loss = neg_log_likelihood2(
            target_var,
            layers.get_output(mu, deterministic=True),
            layers.get_output(log_var, deterministic=True),
            mask_var
        ).mean()

        params = layers.get_all_params(mu_and_logvar, trainable=True)
        param_updates = updates.adadelta(loss, params)

        self._train_fn = theano.function(
            [input_var, meta_var, target_var],
            updates=param_updates,
            outputs=loss
        )

        self._loss_fn = theano.function(
            [input_var, meta_var, target_var],
            outputs=test_loss
        )

        self._predict_fn = theano.function(
            [input_var, meta_var],
            outputs=[
                layers.get_output(mu, deterministic=True),
                layers.get_output(log_var, deterministic=True)
            ]
        )
    def build_network(self, vocab_size, input_var, mask_var, W_init):

        l_in = L.InputLayer(shape=(None, None, 1), input_var=input_var)
        l_mask = L.InputLayer(shape=(None, None), input_var=mask_var)
        l_embed = L.EmbeddingLayer(l_in,
                                   input_size=vocab_size,
                                   output_size=EMBED_DIM,
                                   W=W_init)

        l_fwd_1 = L.LSTMLayer(l_embed,
                              NUM_HIDDEN,
                              grad_clipping=GRAD_CLIP,
                              mask_input=l_mask,
                              gradient_steps=GRAD_STEPS,
                              precompute_input=True)
        l_bkd_1 = L.LSTMLayer(l_embed,
                              NUM_HIDDEN,
                              grad_clipping=GRAD_CLIP,
                              mask_input=l_mask,
                              gradient_steps=GRAD_STEPS,
                              precompute_input=True,
                              backwards=True)

        l_all_1 = L.concat([l_fwd_1, l_bkd_1], axis=2)

        l_fwd_2 = L.LSTMLayer(l_all_1,
                              NUM_HIDDEN,
                              grad_clipping=GRAD_CLIP,
                              mask_input=l_mask,
                              gradient_steps=GRAD_STEPS,
                              precompute_input=True)
        l_bkd_2 = L.LSTMLayer(l_all_1,
                              NUM_HIDDEN,
                              grad_clipping=GRAD_CLIP,
                              mask_input=l_mask,
                              gradient_steps=GRAD_STEPS,
                              precompute_input=True,
                              backwards=True)

        l_fwd_1_slice = L.SliceLayer(l_fwd_1, -1, 1)
        l_bkd_1_slice = L.SliceLayer(l_bkd_1, 0, 1)
        y_1 = L.ElemwiseSumLayer([l_fwd_1_slice, l_bkd_1_slice])

        l_fwd_2_slice = L.SliceLayer(l_fwd_2, -1, 1)
        l_bkd_2_slice = L.SliceLayer(l_bkd_2, 0, 1)
        y_2 = L.ElemwiseSumLayer([l_fwd_2_slice, l_bkd_2_slice])

        y = L.concat([y_1, y_2], axis=1)
        g = L.DenseLayer(y,
                         num_units=EMBED_DIM,
                         nonlinearity=lasagne.nonlinearities.tanh)
        l_out = L.DenseLayer(g,
                             num_units=vocab_size,
                             W=l_embed.W.T,
                             nonlinearity=lasagne.nonlinearities.softmax)

        return l_out
示例#3
0
    def _invert_PadLayer(self, layer, feeder):
        assert isinstance(layer, L.PadLayer)
        assert layer.batch_ndim == 2
        assert len(L.get_output_shape(layer)) == 4.

        tmp = L.SliceLayer(feeder,
                           slice(layer.width[0][0], -layer.width[0][1]),
                           axis=2)
        return L.SliceLayer(tmp,
                            slice(layer.width[1][0], -layer.width[1][1]),
                            axis=3)
示例#4
0
 def build_network(self):
     l_char1_in = L.InputLayer(shape=(None, None, self.max_word_len),
                               input_var=self.inps[0])
     l_char2_in = L.InputLayer(shape=(None, None, self.max_word_len),
                               input_var=self.inps[1])
     l_mask1_in = L.InputLayer(shape=(None, None, self.max_word_len),
                               input_var=self.inps[2])
     l_mask2_in = L.InputLayer(shape=(None, None, self.max_word_len),
                               input_var=self.inps[3])
     l_char_in = L.ConcatLayer([l_char1_in, l_char2_in],
                               axis=1)  # B x (ND+NQ) x L
     l_char_mask = L.ConcatLayer([l_mask1_in, l_mask2_in], axis=1)
     shp = (self.inps[0].shape[0],
            self.inps[0].shape[1] + self.inps[1].shape[1],
            self.inps[1].shape[2])
     l_index_reshaped = L.ReshapeLayer(l_char_in,
                                       (shp[0] * shp[1], shp[2]))  # BN x L
     l_mask_reshaped = L.ReshapeLayer(l_char_mask,
                                      (shp[0] * shp[1], shp[2]))  # BN x L
     l_lookup = L.EmbeddingLayer(l_index_reshaped, self.num_chars,
                                 self.char_dim)  # BN x L x D
     l_fgru = L.GRULayer(l_lookup,
                         2 * self.char_dim,
                         grad_clipping=10,
                         gradient_steps=-1,
                         precompute_input=True,
                         only_return_final=True,
                         mask_input=l_mask_reshaped)
     l_bgru = L.GRULayer(l_lookup,
                         2 * self.char_dim,
                         grad_clipping=10,
                         gradient_steps=-1,
                         precompute_input=True,
                         backwards=True,
                         only_return_final=True,
                         mask_input=l_mask_reshaped)  # BN x 2D
     l_fwdembed = L.DenseLayer(l_fgru,
                               self.embed_dim / 2,
                               nonlinearity=None)  # BN x DE
     l_bckembed = L.DenseLayer(l_bgru,
                               self.embed_dim / 2,
                               nonlinearity=None)  # BN x DE
     l_embed = L.ElemwiseSumLayer([l_fwdembed, l_bckembed], coeffs=1)
     l_char_embed = L.ReshapeLayer(l_embed,
                                   (shp[0], shp[1], self.embed_dim / 2))
     l_embed1 = L.SliceLayer(l_char_embed,
                             slice(0, self.inps[0].shape[1]),
                             axis=1)
     l_embed2 = L.SliceLayer(l_char_embed,
                             slice(-self.inps[1].shape[1], None),
                             axis=1)
     return l_embed1, l_embed2
    def _invert_Conv2DLayer(self, layer, feeder):
        # Warning they are swapped here
        feeder = self._put_rectifiers(feeder, layer)
        feeder = self._get_normalised_relevance_layer(layer, feeder)

        f_s = layer.filter_size
        if layer.pad == 'same':
            pad = 'same'
        elif layer.pad == 'valid' or layer.pad == (0, 0):
            pad = 'full'
        else:
            raise RuntimeError("Define your padding as full or same.")

        # By definition the
        # Flip filters must be on to be a proper deconvolution.
        num_filters = L.get_output_shape(layer.input_layer)[1]
        if layer.stride == (4, 4):
            # Todo: similar code gradient based explainers. Merge.
            feeder = L.Upscale2DLayer(feeder, layer.stride, mode='dilate')
            output_layer = L.Conv2DLayer(feeder,
                                         num_filters=num_filters,
                                         filter_size=f_s,
                                         stride=1,
                                         pad=pad,
                                         nonlinearity=None,
                                         b=None,
                                         flip_filters=True)
            conv_layer = output_layer
            tmp = L.SliceLayer(output_layer, slice(0, -3), axis=3)
            output_layer = L.SliceLayer(tmp, slice(0, -3), axis=2)
            output_layer.W = conv_layer.W
        else:
            output_layer = L.Conv2DLayer(feeder,
                                         num_filters=num_filters,
                                         filter_size=f_s,
                                         stride=1,
                                         pad=pad,
                                         nonlinearity=None,
                                         b=None,
                                         flip_filters=True)
        W = output_layer.W

        # Do the multiplication.
        x_layer = L.ReshapeLayer(layer.input_layer,
                                 (-1, ) + L.get_output_shape(output_layer)[1:])
        output_layer = L.ElemwiseMergeLayer(incomings=[x_layer, output_layer],
                                            merge_function=T.mul)
        output_layer.W = W
        return output_layer
示例#6
0
def recurrent(input_var=None,
              num_units=512,
              batch_size=64,
              seq_length=1,
              grad_clip=100):
    recurrent = []

    theano_rng = RandomStreams(rng.randint(2**15))
    # we want noise to match tanh range of activation ([-1,1])
    noise = theano_rng.uniform(size=(batch_size, seq_length, num_units),
                               low=-1.0,
                               high=1.0)
    input_var = noise if input_var is None else input_var

    recurrent.append(
        ll.InputLayer(shape=(batch_size, seq_length, num_units),
                      input_var=input_var))

    recurrent.append(
        ll.LSTMLayer(recurrent[-1], num_units,
                     grad_clipping=grad_clip))  #tanh is default

    recurrent.append(ll.SliceLayer(recurrent[-1], -1, 1))

    recurrent.append(ll.ReshapeLayer(recurrent[-1], ([0], 1, [1])))

    for layer in recurrent:
        print layer.output_shape
    print ""

    return recurrent
示例#7
0
def concat_tn(_top, _seed, start=0, num_slices=1):
    if _top==None:
        return L.SliceLayer(_seed, indices=slice(start, start+num_slices), axis=1), start+num_slices
    elif num_slices>0:
        _seed1, n = create_slices_from(_seed, _top.output_shape, start=start, num_slices=num_slices)
        return L.ConcatLayer([_top, _seed1], axis=1), start+n  
    else:
        return _top, start
示例#8
0
def create_slices_from(_source, ish, start=0, num_slices=1):
    ns = num_slices if len(ish)==2 else num_slices * ish[2] * ish[3]
    osh = (num_slices,) if len(ish)==2 else (num_slices, ish[2], ish[3])
    
    _slice = L.SliceLayer(_source, indices=slice(start, start+ns), axis=1)
    return L.ReshapeLayer(_slice, ([0],)+osh), ns
    def build_network(self, K, vocab_size, doc_var, query_var, docmask_var,
                      qmask_var, candmask_var, feat_var, W_init):

        l_docin = L.InputLayer(shape=(None, None, 1), input_var=doc_var)
        l_qin = L.InputLayer(shape=(None, None, 1), input_var=query_var)
        l_docmask = L.InputLayer(shape=(None, None), input_var=docmask_var)
        l_qmask = L.InputLayer(shape=(None, None), input_var=qmask_var)
        l_featin = L.InputLayer(shape=(None, None), input_var=feat_var)
        l_docembed = L.EmbeddingLayer(l_docin,
                                      input_size=vocab_size,
                                      output_size=EMBED_DIM,
                                      W=W_init)  # B x N x 1 x DE
        l_doce = L.ReshapeLayer(
            l_docembed,
            (doc_var.shape[0], doc_var.shape[1], EMBED_DIM))  # B x N x DE
        l_qembed = L.EmbeddingLayer(l_qin,
                                    input_size=vocab_size,
                                    output_size=EMBED_DIM,
                                    W=l_docembed.W)
        l_fembed = L.EmbeddingLayer(l_featin, input_size=2,
                                    output_size=2)  # B x N x 2

        if not EMB_TRAIN: l_docembed.params[l_docembed.W].remove('trainable')

        l_fwd_q = L.GRULayer(l_qembed,
                             NUM_HIDDEN,
                             grad_clipping=GRAD_CLIP,
                             mask_input=l_qmask,
                             gradient_steps=GRAD_STEPS,
                             precompute_input=True)
        l_bkd_q = L.GRULayer(l_qembed,
                             NUM_HIDDEN,
                             grad_clipping=GRAD_CLIP,
                             mask_input=l_qmask,
                             gradient_steps=GRAD_STEPS,
                             precompute_input=True,
                             backwards=True)

        l_fwd_q_slice = L.SliceLayer(l_fwd_q, -1, 1)
        l_bkd_q_slice = L.SliceLayer(l_bkd_q, 0, 1)
        l_q = L.ConcatLayer([l_fwd_q_slice, l_bkd_q_slice])  # B x 2D
        q = L.get_output(l_q)  # B x 2D

        l_qs = [l_q]
        for i in range(K - 1):
            l_fwd_doc_1 = L.GRULayer(l_doce,
                                     NUM_HIDDEN,
                                     grad_clipping=GRAD_CLIP,
                                     mask_input=l_docmask,
                                     gradient_steps=GRAD_STEPS,
                                     precompute_input=True)
            l_bkd_doc_1 = L.GRULayer(l_doce,
                                     NUM_HIDDEN,
                                     grad_clipping=GRAD_CLIP,
                                     mask_input=l_docmask,
                                     gradient_steps=GRAD_STEPS,
                                     precompute_input=True,
                                     backwards=True)

            l_doc_1 = L.concat([l_fwd_doc_1, l_bkd_doc_1], axis=2)

            l_fwd_q_1 = L.GRULayer(l_qembed,
                                   NUM_HIDDEN,
                                   grad_clipping=GRAD_CLIP,
                                   mask_input=l_qmask,
                                   gradient_steps=GRAD_STEPS,
                                   precompute_input=True)
            l_bkd_q_1 = L.GRULayer(l_qembed,
                                   NUM_HIDDEN,
                                   grad_clipping=GRAD_CLIP,
                                   mask_input=l_qmask,
                                   gradient_steps=GRAD_STEPS,
                                   precompute_input=True,
                                   backwards=True)

            l_fwd_q_slice_1 = L.SliceLayer(l_fwd_q_1, -1, 1)
            l_bkd_q_slice_1 = L.SliceLayer(l_bkd_q_1, 0, 1)
            l_q_c_1 = L.ConcatLayer([l_fwd_q_slice_1,
                                     l_bkd_q_slice_1])  # B x DE

            l_qs.append(l_q_c_1)

            qd = L.get_output(l_q_c_1)
            q_rep = T.reshape(T.tile(qd, (1, doc_var.shape[1])),
                              (doc_var.shape[0], doc_var.shape[1],
                               2 * NUM_HIDDEN))  # B x N x DE

            l_q_rep_in = L.InputLayer(shape=(None, None, 2 * NUM_HIDDEN),
                                      input_var=q_rep)
            l_doc_2_in = L.ElemwiseMergeLayer([l_doc_1, l_q_rep_in], T.mul)
            l_doce = L.dropout(l_doc_2_in, p=DROPOUT_RATE)  # B x N x DE

        l_doce = L.ConcatLayer([l_doce, l_fembed], axis=2)  # B x N x DE+2
        l_fwd_doc = L.GRULayer(l_doce,
                               NUM_HIDDEN,
                               grad_clipping=GRAD_CLIP,
                               mask_input=l_docmask,
                               gradient_steps=GRAD_STEPS,
                               precompute_input=True)
        l_bkd_doc = L.GRULayer(l_doce, NUM_HIDDEN, grad_clipping=GRAD_CLIP,
                mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True, \
                        backwards=True)

        l_doc = L.concat([l_fwd_doc, l_bkd_doc], axis=2)

        d = L.get_output(l_doc)  # B x N x 2D
        p = T.batched_dot(d, q)  # B x N
        pm = T.nnet.softmax(p) * candmask_var
        pm = pm / pm.sum(axis=1)[:, np.newaxis]

        index = T.reshape(T.repeat(T.arange(p.shape[0]), p.shape[1]), p.shape)
        final = T.inc_subtensor(T.alloc(0.,p.shape[0],vocab_size)[index,T.flatten(doc_var,outdim=2)],\
                pm)

        dv = L.get_output(l_doc, deterministic=True)  # B x N x 2D
        p = T.batched_dot(dv, q)  # B x N
        pm = T.nnet.softmax(p) * candmask_var
        pm = pm / pm.sum(axis=1)[:, np.newaxis]

        index = T.reshape(T.repeat(T.arange(p.shape[0]), p.shape[1]), p.shape)
        final_v = T.inc_subtensor(T.alloc(0.,p.shape[0],vocab_size)[index,\
                T.flatten(doc_var,outdim=2)],pm)

        return final, final_v, l_doc, l_qs
示例#10
0
    def __init__(self,
                 input_shape,
                 output_dim,
                 hidden_sizes,
                 conv_filters,
                 conv_filter_sizes,
                 conv_strides,
                 conv_pads,
                 encoding_levels=None,
                 num_encoding_levels=5,
                 xd_dim=32,
                 hidden_W_init=LI.GlorotUniform(),
                 hidden_b_init=LI.Constant(0.),
                 output_W_init=LI.GlorotUniform(),
                 output_b_init=LI.Constant(0.),
                 hidden_nonlinearity=LN.rectify,
                 output_nonlinearity=None,
                 name=None,
                 input_var=None):

        if name is None:
            prefix = ""
        else:
            prefix = name + "_"

        if len(input_shape) == 3:
            l_in = L.InputLayer(shape=(None, np.prod(input_shape)),
                                input_var=input_var)
            l_hid = L.reshape(l_in, ([0], ) + input_shape)
        elif len(input_shape) == 2:
            l_in = L.InputLayer(shape=(None, np.prod(input_shape)),
                                input_var=input_var)
            input_shape = (1, ) + input_shape
            l_hid = L.reshape(l_in, ([0], ) + input_shape)
        else:
            l_in = L.InputLayer(shape=(None, ) + input_shape,
                                input_var=input_var)
            l_hid = l_in

        assert input_shape[0] % 2 == 0
        l_hid0 = L.SliceLayer(l_hid, slice(None, input_shape[0] // 2), axis=1)
        l_hid1 = L.SliceLayer(l_hid, slice(input_shape[0] // 2, None), axis=1)
        l_hids = [l_hid0, l_hid1]

        if encoding_levels is None:
            encoding_levels = [num_encoding_levels]
        else:
            assert max(encoding_levels) == num_encoding_levels

        xlevels_c_dim = OrderedDict(
            zip(range(num_encoding_levels + 1), [3, 64, 128, 256, 512, 512]))

        import h5py
        params_file = h5py.File("models/theano/vgg16_levelsall_nodyn_model.h5",
                                'r')
        params_kwargs_list = []
        # encoding
        for ihid, l_hid in enumerate(l_hids):
            l_xlevels = OrderedDict()
            l_xdlevels = OrderedDict(
            )  # downsampled version of l_xlevels at the resolution for servoing
            for level in range(num_encoding_levels + 1):
                if level == 0:
                    l_xlevel = l_hid
                elif level < 3:
                    l_xlevelm1 = l_xlevels[level - 1]
                    if level == 1:
                        # change from BGR to RGB and subtract mean pixel values
                        # (X - mean_pixel_bgr[None, :, None, None])[:, ::-1, :, :]
                        # X[:, ::-1, :, :] - mean_pixel_rgb[None, :, None, None]
                        if ihid == 0:
                            mean_pixel_bgr = np.array(
                                [103.939, 116.779, 123.68], dtype=np.float32)
                            mean_pixel_rgb = mean_pixel_bgr[::-1]
                            W = np.eye(3)[::-1, :].reshape(
                                (3, 3, 1, 1)).astype(np.float32)
                            b = -mean_pixel_rgb
                            params_kwargs = dict(W=W, b=b)
                            for k, v in params_kwargs.items():
                                bcast = tuple(s == 1 for s in v.shape)
                                params_kwargs[k] = theano.shared(
                                    v, broadcastable=bcast)
                            params_kwargs_list.append(params_kwargs)
                        else:
                            params_kwargs = params_kwargs_list.pop(0)
                        l_xlevelm1 = L.Conv2DLayer(l_xlevelm1,
                                                   num_filters=3,
                                                   filter_size=1,
                                                   nonlinearity=nl.identity,
                                                   **params_kwargs)
                        l_xlevelm1.W.name = 'x0.W'
                        l_xlevelm1.params[l_xlevelm1.W].remove('trainable')
                        l_xlevelm1.b.name = 'x0.b'
                        l_xlevelm1.params[l_xlevelm1.b].remove('trainable')
                    if ihid == 0:
                        conv1_W = params_file['conv%d_1.W' % level][()]
                        conv1_b = params_file['conv%d_1.b' % level][()]
                        conv2_W = params_file['conv%d_2.W' % level][()]
                        conv2_b = params_file['conv%d_2.b' % level][()]
                        params_kwargs = dict(conv1_W=conv1_W,
                                             conv1_b=conv1_b,
                                             conv2_W=conv2_W,
                                             conv2_b=conv2_b)
                        for k, v in params_kwargs.items():
                            bcast = tuple(s == 1 for s in v.shape)
                            params_kwargs[k] = theano.shared(
                                v, broadcastable=bcast)
                        params_kwargs_list.append(params_kwargs)
                    else:
                        params_kwargs = params_kwargs_list.pop(0)
                    l_xlevel = LT.VggEncodingLayer(l_xlevelm1,
                                                   xlevels_c_dim[level],
                                                   level=str(level),
                                                   **params_kwargs)
                else:
                    if ihid == 0:
                        conv1_W = params_file['conv%d_1.W' % level][()]
                        conv1_b = params_file['conv%d_1.b' % level][()]
                        conv2_W = params_file['conv%d_2.W' % level][()]
                        conv2_b = params_file['conv%d_2.b' % level][()]
                        conv3_W = params_file['conv%d_3.W' % level][()]
                        conv3_b = params_file['conv%d_3.b' % level][()]
                        params_kwargs = dict(conv1_W=conv1_W,
                                             conv1_b=conv1_b,
                                             conv2_W=conv2_W,
                                             conv2_b=conv2_b,
                                             conv3_W=conv3_W,
                                             conv3_b=conv3_b)
                        for k, v in params_kwargs.items():
                            bcast = tuple(s == 1 for s in v.shape)
                            params_kwargs[k] = theano.shared(
                                v, broadcastable=bcast)
                        params_kwargs_list.append(params_kwargs)
                    else:
                        params_kwargs = params_kwargs_list.pop(0)
                    l_xlevel = LT.VggEncoding3Layer(
                        l_xlevels[level - 1],
                        xlevels_c_dim[level],
                        dilation=(2**(level - 3), ) * 2,
                        level=str(level),
                        **params_kwargs)
                # TODO:
                LT.set_layer_param_tags(l_xlevel, trainable=False)
                # downsample to servoing resolution
                xlevel_shape = L.get_output_shape(l_xlevel)
                xlevel_dim = xlevel_shape[-1]
                assert xlevel_shape[-2] == xlevel_dim
                scale_factor = xlevel_dim // xd_dim
                if scale_factor > 1:
                    l_xdlevel = LT.Downscale2DLayer(l_xlevel,
                                                    scale_factor=scale_factor,
                                                    name='x%dd' % level)
                elif scale_factor == 1:
                    l_xdlevel = l_xlevel
                else:
                    raise NotImplementedError
                if 0 < level < 3:
                    l_xlevel = L.MaxPool2DLayer(l_xlevel,
                                                pool_size=2,
                                                stride=2,
                                                pad=0,
                                                name='pool%d' % level)
                l_xlevels[level] = l_xlevel
                l_xdlevels[level] = l_xdlevel

            l_ylevels = OrderedDict(
            )  # standarized version of l_xdlevels used as the feature for servoing
            for level in encoding_levels:
                if ihid == 0:
                    offset = params_file['y%d.offset' % level][()]
                    scale = params_file['y%d.scale' % level][()]
                    params_kwargs = dict(offset=offset, scale=scale)
                    for k, v in params_kwargs.items():
                        bcast = tuple(s == 1 for s in v.shape)
                        params_kwargs[k] = theano.shared(v,
                                                         broadcastable=bcast)
                    params_kwargs_list.append(params_kwargs)
                else:
                    params_kwargs = params_kwargs_list.pop(0)
                l_ylevels[level] = LT.StandarizeLayer(l_xdlevels[level],
                                                      name='y%d' % level,
                                                      **params_kwargs)

            l_hids[ihid] = L.ConcatLayer(
                [l_ylevels[level] for level in encoding_levels], axis=1)
        assert not params_kwargs_list

        l_hid = L.ConcatLayer(l_hids, axis=1)

        for idx, conv_filter, filter_size, stride, pad in zip(
                range(len(conv_filters)),
                conv_filters,
                conv_filter_sizes,
                conv_strides,
                conv_pads,
        ):
            l_hid = L.Conv2DLayer(
                l_hid,
                num_filters=conv_filter,
                filter_size=filter_size,
                stride=(stride, stride),
                pad=pad,
                nonlinearity=hidden_nonlinearity,
                name="%sconv_hidden_%d" % (prefix, idx),
            )
        conv_out = l_hid
        for idx, hidden_size in enumerate(hidden_sizes):
            l_hid = L.DenseLayer(
                l_hid,
                num_units=hidden_size,
                nonlinearity=hidden_nonlinearity,
                name="%shidden_%d" % (prefix, idx),
                W=hidden_W_init,
                b=hidden_b_init,
            )
        l_out = L.DenseLayer(
            l_hid,
            num_units=output_dim,
            nonlinearity=output_nonlinearity,
            name="%soutput" % (prefix, ),
            W=output_W_init,
            b=output_b_init,
        )
        self._l_in = l_in
        self._l_out = l_out
        self._input_var = l_in.input_var
        self._conv_out = conv_out
示例#11
0
    def rnn_decoder(l_input_one_hot,
                    l_encoder_hid,
                    encoder_mask,
                    out_sym,
                    out_mask,
                    out_go_sym,
                    name="Decoder"):
        n_layers = 1
        n_units = 256
        n_attention_units = 256
        emb_size = 256
        rnn = DropoutLSTMLayer

        l_go_out = L.InputLayer((None, None), input_var=out_go_sym)
        l_out_mask = L.InputLayer((None, None), input_var=out_mask)
        l_in_mask = L.InputLayer((None, None), input_var=encoder_mask)

        l_emb = L.EmbeddingLayer(l_go_out,
                                 dict_size,
                                 emb_size,
                                 name=name + '.Embedding')

        last_hid_encoded = L.SliceLayer(rnn(l_encoder_hid,
                                            num_units=n_units,
                                            mask_input=l_in_mask,
                                            name=name + '.Summarizer',
                                            dropout=0.25),
                                        indices=-1,
                                        axis=1)
        encoder_last_hid_repeat = RepeatLayer(last_hid_encoded,
                                              n=T.shape(out_go_sym)[1],
                                              axis=1)

        l_dec = L.ConcatLayer([l_emb, encoder_last_hid_repeat], axis=2)
        for i in range(n_layers):
            l_dec = rnn(l_dec,
                        num_units=n_units,
                        mask_input=l_out_mask,
                        name="%s.%d.Forward" % (name, i),
                        learn_init=True,
                        dropout=0.25)

        l_attention = BahdanauKeyValueAttentionLayer(
            [l_encoder_hid, l_input_one_hot, l_in_mask, l_dec],
            n_attention_units,
            name=name + '.Attention')  # (bs, seq_out, dict)
        l_out = L.ReshapeLayer(l_attention, (-1, [2]))

        out_random = L.get_output(
            l_out, deterministic=False)  # (batch * seq_out) x dict
        out_deterministic = L.get_output(
            l_out, deterministic=True)  # (batch * seq_out) x dict
        params = L.get_all_params([l_out], trainable=True)

        rcrossentropy = T.nnet.categorical_crossentropy(
            out_random + 1e-8, out_sym.flatten())  # (batch * seq) x 1
        crossentropy = T.reshape(rcrossentropy, (bs, -1))  # batch x seq
        loss = T.sum(out_mask * crossentropy) / T.sum(out_mask)  # scalar

        argmax = T.argmax(T.reshape(out_deterministic, (bs, -1, dict_size)),
                          axis=-1)  # batch x seq x 1

        return {'loss': loss, 'argmax': argmax, 'params': params}
    def __init__(self,
                 input_shape,
                 output_dim,
                 hidden_sizes,
                 conv_filters,
                 conv_filter_sizes,
                 conv_strides,
                 conv_pads,
                 hidden_W_init=LI.GlorotUniform(),
                 hidden_b_init=LI.Constant(0.),
                 output_W_init=LI.GlorotUniform(),
                 output_b_init=LI.Constant(0.),
                 hidden_nonlinearity=LN.rectify,
                 output_nonlinearity=LN.softmax,
                 name=None,
                 input_var=None):

        if name is None:
            prefix = ""
        else:
            prefix = name + "_"

        if len(input_shape) == 3:
            l_in = L.InputLayer(shape=(None, np.prod(input_shape)),
                                input_var=input_var)
            l_hid = L.reshape(l_in, ([0], ) + input_shape)
        elif len(input_shape) == 2:
            l_in = L.InputLayer(shape=(None, np.prod(input_shape)),
                                input_var=input_var)
            input_shape = (1, ) + input_shape
            l_hid = L.reshape(l_in, ([0], ) + input_shape)
        else:
            l_in = L.InputLayer(shape=(None, ) + input_shape,
                                input_var=input_var)
            l_hid = l_in

        assert input_shape[0] % 2 == 0
        l_hid0 = L.SliceLayer(l_hid, slice(None, input_shape[0] // 2), axis=1)
        l_hid1 = L.SliceLayer(l_hid, slice(input_shape[0] // 2, None), axis=1)
        l_hids = [l_hid0, l_hid1]

        for idx, conv_filter, filter_size, stride, pad in zip(
                range(len(conv_filters)),
                conv_filters,
                conv_filter_sizes,
                conv_strides,
                conv_pads,
        ):
            for ihid in range(len(l_hids)):
                if ihid > 0:
                    conv_kwargs = dict(W=l_hids[0].W, b=l_hids[0].b)
                else:
                    conv_kwargs = dict()
                l_hids[ihid] = L.Conv2DLayer(l_hids[ihid],
                                             num_filters=conv_filter,
                                             filter_size=filter_size,
                                             stride=(stride, stride),
                                             pad=pad,
                                             nonlinearity=hidden_nonlinearity,
                                             name="%sconv_hidden_%d_%d" %
                                             (prefix, idx, ihid),
                                             convolution=wrapped_conv,
                                             **conv_kwargs)

        l_hid = L.ElemwiseSumLayer(l_hids, coeffs=[-1, 1])
        l_hid = L.ExpressionLayer(l_hid, lambda X: X * X)

        for idx, hidden_size in enumerate(hidden_sizes):
            l_hid = L.DenseLayer(
                l_hid,
                num_units=hidden_size,
                nonlinearity=hidden_nonlinearity,
                name="%shidden_%d" % (prefix, idx),
                W=hidden_W_init,
                b=hidden_b_init,
            )
        l_out = L.DenseLayer(
            l_hid,
            num_units=output_dim,
            nonlinearity=output_nonlinearity,
            name="%soutput" % (prefix, ),
            W=output_W_init,
            b=output_b_init,
        )
        self._l_in = l_in
        self._l_out = l_out
        self._input_var = l_in.input_var
示例#13
0
    def build_network(self, vocab_size, doc_var, query_var, docmask_var,
                      qmask_var, candmask_var, W_init):

        l_docin = L.InputLayer(shape=(None, None, 1), input_var=doc_var)
        l_qin = L.InputLayer(shape=(None, None, 1), input_var=query_var)
        l_docmask = L.InputLayer(shape=(None, None), input_var=docmask_var)
        l_qmask = L.InputLayer(shape=(None, None), input_var=qmask_var)
        l_docembed = L.EmbeddingLayer(l_docin,
                                      input_size=vocab_size,
                                      output_size=EMBED_DIM,
                                      W=W_init)  # B x N x 1 x DE
        l_doce = L.ReshapeLayer(
            l_docembed,
            (doc_var.shape[0], doc_var.shape[1], EMBED_DIM))  # B x N x DE
        l_qembed = L.EmbeddingLayer(l_qin,
                                    input_size=vocab_size,
                                    output_size=EMBED_DIM,
                                    W=l_docembed.W)

        l_fwd_q = L.GRULayer(l_qembed,
                             NUM_HIDDEN,
                             grad_clipping=GRAD_CLIP,
                             mask_input=l_qmask,
                             gradient_steps=GRAD_STEPS,
                             precompute_input=True)
        l_bkd_q = L.GRULayer(l_qembed,
                             NUM_HIDDEN,
                             grad_clipping=GRAD_CLIP,
                             mask_input=l_qmask,
                             gradient_steps=GRAD_STEPS,
                             precompute_input=True,
                             backwards=True)

        l_fwd_q_slice = L.SliceLayer(l_fwd_q, -1, 1)
        l_bkd_q_slice = L.SliceLayer(l_bkd_q, 0, 1)
        l_q = L.ConcatLayer([l_fwd_q_slice, l_bkd_q_slice])  # B x 2D
        q = L.get_output(l_q)  # B x 2D

        l_fwd_doc_1 = L.GRULayer(l_doce,
                                 NUM_HIDDEN,
                                 grad_clipping=GRAD_CLIP,
                                 mask_input=l_docmask,
                                 gradient_steps=GRAD_STEPS,
                                 precompute_input=True)
        l_bkd_doc_1 = L.GRULayer(l_doce,
                                 NUM_HIDDEN,
                                 grad_clipping=GRAD_CLIP,
                                 mask_input=l_docmask,
                                 gradient_steps=GRAD_STEPS,
                                 precompute_input=True,
                                 backwards=True)

        l_doc_1 = L.concat([l_fwd_doc_1, l_bkd_doc_1], axis=2)
        l_doc_1 = L.dropout(l_doc_1, p=DROPOUT_RATE)

        l_fwd_q_c = L.GRULayer(l_qembed,
                               NUM_HIDDEN,
                               grad_clipping=GRAD_CLIP,
                               mask_input=l_qmask,
                               gradient_steps=GRAD_STEPS,
                               precompute_input=True)
        l_bkd_q_c = L.GRULayer(l_qembed,
                               NUM_HIDDEN,
                               grad_clipping=GRAD_CLIP,
                               mask_input=l_qmask,
                               gradient_steps=GRAD_STEPS,
                               precompute_input=True,
                               backwards=True)

        l_fwd_q_slice_c = L.SliceLayer(l_fwd_q_c, -1, 1)
        l_bkd_q_slice_c = L.SliceLayer(l_bkd_q_c, 0, 1)
        l_q_c = L.ConcatLayer([l_fwd_q_slice_c, l_bkd_q_slice_c])  # B x DE

        qd = L.get_output(l_q_c)
        q_rep = T.reshape(
            T.tile(qd, (1, doc_var.shape[1])),
            (doc_var.shape[0], doc_var.shape[1], 2 * NUM_HIDDEN))  # B x N x DE

        l_q_rep_in = L.InputLayer(shape=(None, None, 2 * NUM_HIDDEN),
                                  input_var=q_rep)
        l_doc_gru_in = L.ElemwiseMergeLayer([l_doc_1, l_q_rep_in], T.mul)

        l_fwd_doc = L.GRULayer(l_doc_gru_in,
                               NUM_HIDDEN,
                               grad_clipping=GRAD_CLIP,
                               mask_input=l_docmask,
                               gradient_steps=GRAD_STEPS,
                               precompute_input=True)
        l_bkd_doc = L.GRULayer(l_doc_gru_in,
                               NUM_HIDDEN,
                               grad_clipping=GRAD_CLIP,
                               mask_input=l_docmask,
                               gradient_steps=GRAD_STEPS,
                               precompute_input=True,
                               backwards=True)

        l_doc = L.concat([l_fwd_doc, l_bkd_doc], axis=2)

        d = L.get_output(l_doc)  # B x N x 2D
        p = T.batched_dot(d, q)  # B x N
        pm = T.nnet.softmax(
            T.set_subtensor(
                T.alloc(-20., p.shape[0], p.shape[1])[candmask_var.nonzero()],
                p[candmask_var.nonzero()]))

        index = T.reshape(T.repeat(T.arange(p.shape[0]), p.shape[1]), p.shape)
        final = T.inc_subtensor(
            T.alloc(0., p.shape[0], vocab_size)[index,
                                                T.flatten(doc_var, outdim=2)],
            pm)

        dv = L.get_output(l_doc, deterministic=True)  # B x N x 2D
        p = T.batched_dot(dv, q)  # B x N
        pm = T.nnet.softmax(
            T.set_subtensor(
                T.alloc(-20., p.shape[0], p.shape[1])[candmask_var.nonzero()],
                p[candmask_var.nonzero()]))

        index = T.reshape(T.repeat(T.arange(p.shape[0]), p.shape[1]), p.shape)
        final_v = T.inc_subtensor(
            T.alloc(0., p.shape[0], vocab_size)[index,
                                                T.flatten(doc_var, outdim=2)],
            pm)

        return final, final_v, l_doc, [l_q, l_q_c]
示例#14
0
def clone(src_net, dst_net, mask_input):
    """
    Clones a lasagne neural network, keeping weights tied.

    For all layers of src_net in turn, starting at the first:
     1. creates a copy of the layer,
     2. reuses the original objects for weights and
     3. appends the new layer to dst_net.

    InputLayers are ignored.
    Recurrent layers (LSTMLayer) are passed mask_input.
    """
    logger.info("Net to be cloned:")
    for l in layers.get_all_layers(src_net):
        logger.info(" - {} ({}):".format(l.name, l))

    logger.info("Starting to clone..")
    for l in layers.get_all_layers(src_net):
        logger.info("src_net[...]: {} ({}):".format(l.name, l))
        if type(l) == layers.InputLayer:
            logger.info(' - skipping')
            continue
        if type(l) == layers.DenseLayer:
            dst_net = layers.DenseLayer(
                dst_net,
                num_units=l.num_units,
                W=l.W,
                b=l.b,
                nonlinearity=l.nonlinearity,
                name=l.name+'2',
            )
        elif type(l) == layers.EmbeddingLayer:
            dst_net = layers.EmbeddingLayer(
                dst_net,
                l.input_size,
                l.output_size,
                W=l.W,
                name=l.name+'2',
            )
        elif type(l) == layers.LSTMLayer:
            dst_net = layers.LSTMLayer(
                dst_net,
                l.num_units,
                ingate=layers.Gate(
                    W_in=l.W_in_to_ingate,
                    W_hid=l.W_hid_to_ingate,
                    W_cell=l.W_cell_to_ingate,
                    b=l.b_ingate,
                    nonlinearity=l.nonlinearity_ingate
                ),
                forgetgate=layers.Gate(
                    W_in=l.W_in_to_forgetgate,
                    W_hid=l.W_hid_to_forgetgate,
                    W_cell=l.W_cell_to_forgetgate,
                    b=l.b_forgetgate,
                    nonlinearity=l.nonlinearity_forgetgate
                ),
                cell=layers.Gate(
                    W_in=l.W_in_to_cell,
                    W_hid=l.W_hid_to_cell,
                    W_cell=None,
                    b=l.b_cell,
                    nonlinearity=l.nonlinearity_cell
                ),
                outgate=layers.Gate(
                    W_in=l.W_in_to_outgate,
                    W_hid=l.W_hid_to_outgate,
                    W_cell=l.W_cell_to_outgate,
                    b=l.b_outgate,
                    nonlinearity=l.nonlinearity_outgate
                ),
                nonlinearity=l.nonlinearity,
                cell_init=l.cell_init,
                hid_init=l.hid_init,
                backwards=l.backwards,
                learn_init=l.learn_init,
                peepholes=l.peepholes,
                gradient_steps=l.gradient_steps,
                grad_clipping=l.grad_clipping,
                unroll_scan=l.unroll_scan,
                precompute_input=l.precompute_input,
                # mask_input=l.mask_input, # AttributeError: 'LSTMLayer' object has no attribute 'mask_input'
                name=l.name+'2',
                mask_input=mask_input,
            )
        elif type(l) == layers.SliceLayer:
            dst_net = layers.SliceLayer(
                dst_net,
                indices=l.slice,
                axis=l.axis,
                name=l.name+'2',
            )
        else:
            raise ValueError("Unhandled layer: {}".format(l))
        new_layer = layers.get_all_layers(dst_net)[-1]
        logger.info('dst_net[...]: {} ({})'.format(new_layer, new_layer.name))

    logger.info("Result of cloning:")
    for l in layers.get_all_layers(dst_net):
        logger.info(" - {} ({}):".format(l.name, l))

    return dst_net
示例#15
0
    def _invert_Conv2DLayer(self, layer, feeder):
        def _check_padding_same():
            for s, p in zip(layer.filter_size, layer.pad):
                if s % 2 != 1:
                    return False
                elif s // 2 != p:
                    return False
            return True

        # Warning they are swapped here.
        feeder = self._put_rectifiers(feeder, layer)

        f_s = layer.filter_size
        if layer.pad == 'same' or _check_padding_same():
            pad = 'same'
        elif layer.pad == 'valid' or layer.pad == (0, 0):
            pad = 'full'
        else:
            raise RuntimeError("Define your padding as full or same.")

        # By definition the
        # Flip filters must be on to be a proper deconvolution.

        num_filters = L.get_output_shape(layer.input_layer)[1]
        if layer.stride == (4, 4):
            # Todo: clean this!
            print("Applying alexnet hack.")
            feeder = L.Upscale2DLayer(feeder, layer.stride, mode='dilate')
            output_layer = L.Conv2DLayer(feeder,
                                         num_filters=num_filters,
                                         filter_size=f_s,
                                         stride=1,
                                         pad=pad,
                                         nonlinearity=None,
                                         b=None,
                                         flip_filters=True)
            print("Applying alexnet hack part 2.")
            conv_layer = output_layer
            output_layer = L.SliceLayer(L.SliceLayer(output_layer,
                                                     slice(0, -3),
                                                     axis=3),
                                        slice(0, -3),
                                        axis=2)
            output_layer.W = conv_layer.W
        elif layer.stride == (2, 2):
            # Todo: clean this! Seems to be the same code as for AlexNet above.
            print("Applying GoogLeNet hack.")
            feeder = L.Upscale2DLayer(feeder, layer.stride, mode='dilate')
            output_layer = L.Conv2DLayer(feeder,
                                         num_filters=num_filters,
                                         filter_size=f_s,
                                         stride=1,
                                         pad=pad,
                                         nonlinearity=None,
                                         b=None,
                                         flip_filters=True)
        else:
            # Todo: clean this. Repetitions all over.
            output_layer = L.Conv2DLayer(feeder,
                                         num_filters=num_filters,
                                         filter_size=f_s,
                                         stride=1,
                                         pad=pad,
                                         nonlinearity=None,
                                         b=None,
                                         flip_filters=True)
        return output_layer
    def build_network(self,
                      vocab_size,
                      input_var,
                      mask_var,
                      docidx_var,
                      docidx_mask,
                      skip_connect=True):

        l_in = L.InputLayer(shape=(None, None, 1), input_var=input_var)

        l_mask = L.InputLayer(shape=(None, None), input_var=mask_var)

        l_embed = L.EmbeddingLayer(l_in,
                                   input_size=vocab_size,
                                   output_size=EMBED_DIM,
                                   W=self.params['W_emb'])

        l_embed_noise = L.dropout(l_embed, p=DROPOUT_RATE)

        # NOTE: Moved initialization of forget gate biases to init_params
        #forget_gate_1 = L.Gate(b=lasagne.init.Constant(3))
        #forget_gate_2 = L.Gate(b=lasagne.init.Constant(3))

        # NOTE: LSTM layer provided by Lasagne is slightly different from that used in DeepMind's paper.
        # In the paper the cell-to-* weights are not diagonal.
        # the 1st lstm layer
        in_gate = L.Gate(W_in=self.params['W_lstm1_xi'],
                         W_hid=self.params['W_lstm1_hi'],
                         W_cell=self.params['W_lstm1_ci'],
                         b=self.params['b_lstm1_i'],
                         nonlinearity=lasagne.nonlinearities.sigmoid)
        forget_gate = L.Gate(W_in=self.params['W_lstm1_xf'],
                             W_hid=self.params['W_lstm1_hf'],
                             W_cell=self.params['W_lstm1_cf'],
                             b=self.params['b_lstm1_f'],
                             nonlinearity=lasagne.nonlinearities.sigmoid)
        out_gate = L.Gate(W_in=self.params['W_lstm1_xo'],
                          W_hid=self.params['W_lstm1_ho'],
                          W_cell=self.params['W_lstm1_co'],
                          b=self.params['b_lstm1_o'],
                          nonlinearity=lasagne.nonlinearities.sigmoid)
        cell_gate = L.Gate(W_in=self.params['W_lstm1_xc'],
                           W_hid=self.params['W_lstm1_hc'],
                           W_cell=None,
                           b=self.params['b_lstm1_c'],
                           nonlinearity=lasagne.nonlinearities.tanh)
        l_fwd_1 = L.LSTMLayer(l_embed_noise,
                              NUM_HIDDEN,
                              ingate=in_gate,
                              forgetgate=forget_gate,
                              cell=cell_gate,
                              outgate=out_gate,
                              peepholes=True,
                              grad_clipping=GRAD_CLIP,
                              mask_input=l_mask,
                              gradient_steps=GRAD_STEPS,
                              precompute_input=True)

        # the 2nd lstm layer
        if skip_connect:
            # construct skip connection from the lookup table to the 2nd layer
            batch_size, seq_len, _ = input_var.shape
            # concatenate the last dimension of l_fwd_1 and embed
            l_fwd_1_shp = L.ReshapeLayer(l_fwd_1, (-1, NUM_HIDDEN))
            l_embed_shp = L.ReshapeLayer(l_embed, (-1, EMBED_DIM))
            to_next_layer = L.ReshapeLayer(
                L.concat([l_fwd_1_shp, l_embed_shp], axis=1),
                (batch_size, seq_len, NUM_HIDDEN + EMBED_DIM))
        else:
            to_next_layer = l_fwd_1

        to_next_layer_noise = L.dropout(to_next_layer, p=DROPOUT_RATE)

        in_gate = L.Gate(W_in=self.params['W_lstm2_xi'],
                         W_hid=self.params['W_lstm2_hi'],
                         W_cell=self.params['W_lstm2_ci'],
                         b=self.params['b_lstm2_i'],
                         nonlinearity=lasagne.nonlinearities.sigmoid)
        forget_gate = L.Gate(W_in=self.params['W_lstm2_xf'],
                             W_hid=self.params['W_lstm2_hf'],
                             W_cell=self.params['W_lstm2_cf'],
                             b=self.params['b_lstm2_f'],
                             nonlinearity=lasagne.nonlinearities.sigmoid)
        out_gate = L.Gate(W_in=self.params['W_lstm2_xo'],
                          W_hid=self.params['W_lstm2_ho'],
                          W_cell=self.params['W_lstm2_co'],
                          b=self.params['b_lstm2_o'],
                          nonlinearity=lasagne.nonlinearities.sigmoid)
        cell_gate = L.Gate(W_in=self.params['W_lstm2_xc'],
                           W_hid=self.params['W_lstm2_hc'],
                           W_cell=None,
                           b=self.params['b_lstm2_c'],
                           nonlinearity=lasagne.nonlinearities.tanh)
        l_fwd_2 = L.LSTMLayer(to_next_layer_noise,
                              NUM_HIDDEN,
                              ingate=in_gate,
                              forgetgate=forget_gate,
                              cell=cell_gate,
                              outgate=out_gate,
                              peepholes=True,
                              grad_clipping=GRAD_CLIP,
                              mask_input=l_mask,
                              gradient_steps=GRAD_STEPS,
                              precompute_input=True)

        # slice final states of both lstm layers
        l_fwd_1_slice = L.SliceLayer(l_fwd_1, -1, 1)
        l_fwd_2_slice = L.SliceLayer(l_fwd_2, -1, 1)

        # g will be used to score the words based on their embeddings
        g = L.DenseLayer(L.concat([l_fwd_1_slice, l_fwd_2_slice], axis=1),
                         num_units=EMBED_DIM,
                         W=self.params['W_dense'],
                         b=self.params['b_dense'],
                         nonlinearity=lasagne.nonlinearities.tanh)

        ## get outputs
        #g_out = L.get_output(g) # B x D
        #g_out_val = L.get_output(g, deterministic=True) # B x D

        ## compute softmax probs
        #probs,_ = theano.scan(fn=lambda g,d,dm,W: T.nnet.softmax(T.dot(g,W[d,:].T)*dm),
        #                    outputs_info=None,
        #                    sequences=[g_out,docidx_var,docidx_mask],
        #                    non_sequences=self.params['W_emb'])
        #predicted_probs = probs.reshape(docidx_var.shape) # B x N
        #probs_val,_ = theano.scan(fn=lambda g,d,dm,W: T.nnet.softmax(T.dot(g,W[d,:].T)*dm),
        #                    outputs_info=None,
        #                    sequences=[g_out_val,docidx_var,docidx_mask],
        #                    non_sequences=self.params['W_emb'])
        #predicted_probs_val = probs_val.reshape(docidx_var.shape) # B x N
        #return predicted_probs, predicted_probs_val

        # W is shared with the lookup table
        l_out = L.DenseLayer(g,
                             num_units=vocab_size,
                             W=self.params['W_emb'].T,
                             nonlinearity=lasagne.nonlinearities.softmax,
                             b=None)
        return l_out
示例#17
0
    def buildModel(self):
        print(' -- Building...')
        x_init = sparse.csr_matrix('x', dtype='float32')
        y_init = T.imatrix('y')
        g_init = T.imatrix('g')
        ind_init = T.ivector('ind')
        sub_path_init = T.imatrix('subPathsBatch')
        mask_init = T.fmatrix('subMask')

        # step train
        x_input = lgl.InputLayer(shape=(None, self.x.shape[1]),
                                 input_var=x_init)
        g_input = lgl.InputLayer(shape=(None, 2), input_var=g_init)
        ind_input = lgl.InputLayer(shape=(None, ), input_var=ind_init)
        pair_second = lgl.SliceLayer(g_input, indices=1, axis=1)
        pair_first = lgl.SliceLayer(g_input, indices=0, axis=1)
        pair_first_emd = lgl.EmbeddingLayer(pair_first,
                                            input_size=self.num_ver,
                                            output_size=self.embedding_size)
        emd_to_numver = layers.DenseLayer(
            pair_first_emd,
            self.num_ver,
            nonlinearity=lg.nonlinearities.softmax)
        index_emd = lgl.EmbeddingLayer(ind_input,
                                       input_size=self.num_ver,
                                       output_size=self.embedding_size,
                                       W=pair_first_emd.W)
        x_to_ydim = layers.SparseLayer(x_input,
                                       self.y.shape[1],
                                       nonlinearity=lg.nonlinearities.softmax)
        index_emd = layers.DenseLayer(index_emd,
                                      self.y.shape[1],
                                      nonlinearity=lg.nonlinearities.softmax)
        concat_two = lgl.ConcatLayer([x_to_ydim, index_emd], axis=1)
        concat_two = layers.DenseLayer(concat_two,
                                       self.y.shape[1],
                                       nonlinearity=lg.nonlinearities.softmax)
        concat_two_output = lgl.get_output(concat_two)
        step_loss = lgo.categorical_crossentropy(concat_two_output,
                                                 y_init).mean()
        hid_loss = lgl.get_output(x_to_ydim)
        step_loss += lgo.categorical_crossentropy(hid_loss, y_init).mean()
        emd_loss = lgl.get_output(index_emd)
        step_loss += lgo.categorical_crossentropy(emd_loss, y_init).mean()
        step_params = [
            index_emd.W, index_emd.b, x_to_ydim.W, x_to_ydim.b, concat_two.W,
            concat_two.b
        ]
        step_updates = lg.updates.sgd(step_loss,
                                      step_params,
                                      learning_rate=self.step_learning_rate)
        self.step_train = theano.function([x_init, y_init, ind_init],
                                          step_loss,
                                          updates=step_updates,
                                          on_unused_input='ignore')
        self.test_fn = theano.function([x_init, ind_init],
                                       concat_two_output,
                                       on_unused_input='ignore')

        # supervised train
        fc_output = lgl.get_output(emd_to_numver)
        pair_second_output = lgl.get_output(pair_second)
        sup_loss = lgo.categorical_crossentropy(fc_output,
                                                pair_second_output).sum()
        sup_params = lgl.get_all_params(emd_to_numver, trainable=True)
        sup_updates = lg.updates.sgd(sup_loss,
                                     sup_params,
                                     learning_rate=self.sup_learning_rate)
        self.sup_train = theano.function([g_init],
                                         sup_loss,
                                         updates=sup_updates,
                                         on_unused_input='ignore')

        cross_entropy = lgo.categorical_crossentropy(fc_output,
                                                     pair_second_output)
        cross_entropy = T.reshape(cross_entropy, (1, self.unsup_batch_size),
                                  ndim=None)

        mask_input = lgl.InputLayer(shape=(None, self.window_size + 1),
                                    input_var=mask_init)
        subPath_in = lgl.InputLayer(shape=(None, self.window_size + 1),
                                    input_var=sub_path_init)
        sub_path_emd = lgl.EmbeddingLayer(subPath_in,
                                          input_size=self.num_ver,
                                          output_size=self.embedding_size,
                                          W=pair_first_emd.W)

        lstm_layer = lgl.LSTMLayer(sub_path_emd,
                                   self.lstm_hidden_units,
                                   grad_clipping=3,
                                   mask_input=mask_input)

        # handle path weight
        max1 = T.mean(lgl.get_output(lstm_layer), axis=1)
        max2 = T.mean(max1, axis=1)
        max2_init = T.fcol('max2')
        max2_init = T.reshape(max2, ((self.subpath_num, 1)))
        max2_input = lgl.InputLayer(shape=(self.subpath_num, 1),
                                    input_var=max2_init)
        max2_input = lgl.BatchNormLayer(max2_input)
        path_weight = lgl.get_output(max2_input)
        path_weight = lg.nonlinearities.sigmoid(path_weight)
        path_weight = 1 + 0.3 * path_weight

        # unsupervised train
        reweight_loss = T.dot(cross_entropy, path_weight)[0][0]
        lstm_params_all = lgl.get_all_params(lstm_layer, trainable=True)
        lstm_params = list(set(lstm_params_all).difference(set(sup_params)))
        lstm_updates = lg.updates.sgd(reweight_loss,
                                      lstm_params,
                                      learning_rate=0.01)
        self.lstm_fn = theano.function([sub_path_init, g_init, mask_init],
                                       reweight_loss,
                                       updates=lstm_updates,
                                       on_unused_input='ignore')
        alpha_updates = lg.updates.sgd(reweight_loss,
                                       sup_params,
                                       learning_rate=0.001)
        self.alpha_fn = theano.function([sub_path_init, g_init, mask_init],
                                        reweight_loss,
                                        updates=alpha_updates,
                                        on_unused_input='ignore')
        print(' -- Done!')
示例#18
0
    def build_network(self, K, vocab_size, doc_var, query_var, cand_var,
                      docmask_var, qmask_var, candmask_var, W_init):

        l_docin = L.InputLayer(shape=(None, None, 1), input_var=doc_var)
        l_qin = L.InputLayer(shape=(None, None, 1), input_var=query_var)
        l_docmask = L.InputLayer(shape=(None, None), input_var=docmask_var)
        l_qmask = L.InputLayer(shape=(None, None), input_var=qmask_var)
        l_docembed = L.EmbeddingLayer(l_docin,
                                      input_size=vocab_size,
                                      output_size=self.embed_dim,
                                      W=W_init)  # B x N x 1 x DE
        l_doce = L.ReshapeLayer(
            l_docembed,
            (doc_var.shape[0], doc_var.shape[1], self.embed_dim))  # B x N x DE
        l_qembed = L.EmbeddingLayer(l_qin,
                                    input_size=vocab_size,
                                    output_size=self.embed_dim,
                                    W=l_docembed.W)

        if self.train_emb == 0:
            l_docembed.params[l_docembed.W].remove('trainable')

        l_fwd_q = L.GRULayer(l_qembed,
                             self.nhidden,
                             grad_clipping=GRAD_CLIP,
                             mask_input=l_qmask,
                             gradient_steps=GRAD_STEPS,
                             precompute_input=True)
        l_bkd_q = L.GRULayer(l_qembed,
                             self.nhidden,
                             grad_clipping=GRAD_CLIP,
                             mask_input=l_qmask,
                             gradient_steps=GRAD_STEPS,
                             precompute_input=True,
                             backwards=True)

        l_fwd_q_slice = L.SliceLayer(l_fwd_q, -1, 1)
        l_bkd_q_slice = L.SliceLayer(l_bkd_q, 0, 1)
        l_q = L.ConcatLayer([l_fwd_q_slice, l_bkd_q_slice])  # B x 2D
        q = L.get_output(l_q)  # B x 2D

        l_qs = [l_q]
        for i in range(K - 1):
            l_fwd_doc_1 = L.GRULayer(l_doce,
                                     self.nhidden,
                                     grad_clipping=GRAD_CLIP,
                                     mask_input=l_docmask,
                                     gradient_steps=GRAD_STEPS,
                                     precompute_input=True)
            l_bkd_doc_1 = L.GRULayer(l_doce,
                                     self.nhidden,
                                     grad_clipping=GRAD_CLIP,
                                     mask_input=l_docmask,
                                     gradient_steps=GRAD_STEPS,
                                     precompute_input=True,
                                     backwards=True)

            l_doc_1 = L.concat([l_fwd_doc_1, l_bkd_doc_1], axis=2)

            l_fwd_q_1 = L.GRULayer(l_qembed,
                                   self.nhidden,
                                   grad_clipping=GRAD_CLIP,
                                   mask_input=l_qmask,
                                   gradient_steps=GRAD_STEPS,
                                   precompute_input=True)
            l_bkd_q_1 = L.GRULayer(l_qembed,
                                   self.nhidden,
                                   grad_clipping=GRAD_CLIP,
                                   mask_input=l_qmask,
                                   gradient_steps=GRAD_STEPS,
                                   precompute_input=True,
                                   backwards=True)

            l_fwd_q_slice_1 = L.SliceLayer(l_fwd_q_1, -1, 1)
            l_bkd_q_slice_1 = L.SliceLayer(l_bkd_q_1, 0, 1)
            l_q_c_1 = L.ConcatLayer([l_fwd_q_slice_1,
                                     l_bkd_q_slice_1])  # B x DE

            l_qs.append(l_q_c_1)

            qd = L.get_output(l_q_c_1)
            q_rep = T.reshape(T.tile(qd, (1, doc_var.shape[1])),
                              (doc_var.shape[0], doc_var.shape[1],
                               2 * self.nhidden))  # B x N x DE

            l_q_rep_in = L.InputLayer(shape=(None, None, 2 * self.nhidden),
                                      input_var=q_rep)
            l_doc_2_in = L.ElemwiseMergeLayer([l_doc_1, l_q_rep_in], T.mul)
            l_doce = L.dropout(l_doc_2_in, p=self.dropout)

        l_fwd_doc = L.GRULayer(l_doce,
                               self.nhidden,
                               grad_clipping=GRAD_CLIP,
                               mask_input=l_docmask,
                               gradient_steps=GRAD_STEPS,
                               precompute_input=True)
        l_bkd_doc = L.GRULayer(l_doce,
                               self.nhidden,
                               grad_clipping=GRAD_CLIP,
                               mask_input=l_docmask,
                               gradient_steps=GRAD_STEPS,
                               precompute_input=True,
                               backwards=True)

        l_doc = L.concat([l_fwd_doc, l_bkd_doc], axis=2)

        d = L.get_output(l_doc)  # B x N x 2D
        p = T.batched_dot(d, q)  # B x N
        pm = T.nnet.softmax(p) * candmask_var
        pm = pm / pm.sum(axis=1)[:, np.newaxis]
        final = T.batched_dot(pm, cand_var)

        dv = L.get_output(l_doc, deterministic=True)  # B x N x 2D
        p = T.batched_dot(dv, q)  # B x N
        pm = T.nnet.softmax(p) * candmask_var
        pm = pm / pm.sum(axis=1)[:, np.newaxis]
        final_v = T.batched_dot(pm, cand_var)

        return final, final_v, l_doc, l_qs, l_docembed.W
示例#19
0
    def __init__(self, input_size,
                 img_width,
                 img_height,
                 channel_size=1,
                 action_size=None,
                 feature_dim=10,
                 hidden_sizes=(32,32),
                 conv_args={},
                 l2_reg=0,
                 kl_weight=1,
                 learning_rate=1e-4,
                 hidden_act=nonlinearities.tanh,
                 use_actions=False,
                 set_norm_constant=None):
        self.input_size = input_size
        self.set_norm_constant = set_norm_constant
        self.sym_x1 = T.matrix()
        self.sym_x2 = T.matrix()
        self.sym_labels = T.matrix()

        self.lin1 = lasagne.layers.InputLayer((None, input_size))
        self.lin2 = lasagne.layers.InputLayer((None, input_size))

        if use_actions:
            lin1 = L.SliceLayer(self.lin1, slice(0, -action_size))
            lin2 = L.SliceLayer(self.lin2, slice(0, -action_size))
            lact1 = L.SliceLayer(self.lin1, slice(-action_size, None))
            lact2 = L.SliceLayer(self.lin2, slice(-action_size, None))
        else:
            lin1 = self.lin1
            lin2 = self.lin2

        lin1 = L.ReshapeLayer(lin1, (-1, channel_size, img_width, img_height))
        lin2 = L.ReshapeLayer(lin2, (-1, channel_size, img_width, img_height))

        self.base1 = ConvNet(lin1, **conv_args)
        self.base2 = ConvNet(lin2, **conv_args)

        l1_enc_h2 = self.base1.output_layer()
        l2_enc_h2 = self.base2.output_layer()

        if use_actions:
            l1_enc_h2 = L.ConcatLayer([l1_enc_h2, lact1])
            l2_enc_h2 = L.ConcatLayer([l2_enc_h2, lact2])

        self.mean_net1 = MLP(l1_enc_h2, feature_dim, hidden_sizes, hidden_act)
        self.mean_net2 = MLP(l2_enc_h2, feature_dim, hidden_sizes, hidden_act)

        self.logvar_net1 = MLP(l1_enc_h2, feature_dim, hidden_sizes, hidden_act)
        self.logvar_net2 = MLP(l1_enc_h2, feature_dim, hidden_sizes, hidden_act)

        l1_mu = self.mean_net1.output_layer()
        l1_log_var = self.logvar_net1.output_layer()

        l2_mu = self.mean_net2.output_layer()
        l2_log_var = self.logvar_net2.output_layer()

        # Sample latent variables
        l1_z = SimpleSampleLayer(mean=l1_mu, log_var=l1_log_var)
        l2_z = SimpleSampleLayer(mean=l2_mu, log_var=l2_log_var)

        combined_z = L.ConcatLayer([l1_z, l2_z])
        # Classify from latent
        self.class_net = MLP(combined_z, 1, hidden_sizes, output_act=nonlinearities.sigmoid)
        l_output = self.class_net.output_layer()

        combined_mu = L.ConcatLayer([l1_mu, l2_mu])
        combined_logvar = L.ConcatLayer([l1_log_var, l2_log_var])
        z_train, z_mu_train, z_log_var_train, output_train = L.get_output(
            [combined_z, combined_mu, combined_logvar, l_output],
            inputs={self.lin1: self.sym_x1, self.lin2: self.sym_x2},
            deterministic=False
        )

        l1_z_t, l2_z_t = L.get_output(
            [l1_z, l2_z],
            inputs={self.lin1: self.sym_x1, self.lin2: self.sym_x2},
            deterministic=False
        )

        output_test = L.get_output(l_output,
                                   inputs={self.lin1: self.sym_x1, self.lin2: self.sym_x2},
                                   deterministic=True)

        self.LL_train, self.class_loss, self.kl_loss = latent_gaussian_x_bernoulli(z_train, z_mu_train, z_log_var_train,
                                               output_train, self.sym_labels, True, kl_weight)
        self.LL_train *= -1

        if l2_reg != 0:
           self.LL_train += l2_reg * lasagne.regularization.regularize_network_params(l_output,
                                                                                     lasagne.regularization.l2)

        self.l_output = l_output
        self.output_test = output_test

        params = self.params()

        grads = T.grad(self.LL_train, params)

        updates = lasagne.updates.adam(grads, params, learning_rate=learning_rate)

        with compile_timer('train_fn'):
            self.train_model = theano.function([self.sym_x1, self.sym_x2, self.sym_labels],
                                               [self.LL_train, self.class_loss, self.kl_loss],
                                                updates=updates)
        with compile_timer('test_fn'):
            self.test_model = theano.function([self.sym_x1, self.sym_x2], self.output_test)
示例#20
0
    def _invert_layer_recursion(self, layer, prev_layer):
        """
        Note for concatenation layers this will be called multiple times.
        :param layer: Start the inversion recusrion in this layer.
        :return: the inverted layer, part of the entire graph.
        """
        # If we have a concatenation layer,
        # we must find out at which point it is concatenated and slice.
        # We should not store it in the map for this layer.
        # Because that would corrupt the result.

        # Did we already invert this?
        if self.inverse_map[layer] is not None:
            return self.inverse_map[layer]

        feeder = [
            self._invert_layer_recursion(l, layer)
            for l in self.output_map[layer]
        ]

        # Concatenation layers must be handled here.
        # This is not elegant, but it is important for the recursion that
        # the correct slice is computed every single time

        # Find the inverse of the layers this one feeds.
        # If this is none, it is the top layer and
        # we have to inject the explanation starting point
        if len(feeder) == 1:
            feeder = feeder[0]
        elif len(feeder) == 0:
            # It feeds nothing, so must be
            # output layer with restricted assumptions
            def nonlinearity(x):
                return 0 * x + self.relevance_values

            feeder = L.NonlinearityLayer(layer, nonlinearity=nonlinearity)
        else:
            # Multiple feeders.
            if type(self.output_map[layer][0]) is SliceLayer:
                print("Assuming all slices and non-overlapping")
                # TODO CHECK ASSUMPTIONS ARE APPLICABLE
                cat_axis = self.output_map[layer][0].axis
                print([l.slice for l in self.output_map[layer]])
                feeder = L.ConcatLayer(feeder, axis=cat_axis)
            else:
                feeders = feeder
                feeder = feeders[0]
                for f in feeders[1:]:
                    feeder = L.ElemwiseSumLayer([feeder, f])

        # Concatenation layer or other layer.
        if isinstance(layer, L.ConcatLayer):
            axis = layer.axis
            start_slice = 0
            for l in layer.input_layers:
                if l == prev_layer:
                    break
                start_slice += L.get_output_shape(l)[axis]
            end_slice = start_slice + L.get_output_shape(prev_layer)[axis]
            return L.SliceLayer(feeder,
                                slice(start_slice, end_slice),
                                axis=axis)
        else:
            self.inverse_map[layer] = self._invert_layer(layer, feeder)
            return self.inverse_map[layer]
示例#21
0
def build_model(vocab_size,
                doc_var,
                qry_var,
                doc_mask_var,
                qry_mask_var,
                W_init=lasagne.init.Normal()):

    l_doc_in = L.InputLayer(shape=(None, None, 1), input_var=doc_var)
    l_qry_in = L.InputLayer(shape=(None, None, 1), input_var=qry_var)

    l_doc_embed = L.EmbeddingLayer(l_doc_in, vocab_size, EMBED_DIM, W=W_init)
    l_qry_embed = L.EmbeddingLayer(l_qry_in,
                                   vocab_size,
                                   EMBED_DIM,
                                   W=l_doc_embed.W)

    l_doc_mask = L.InputLayer(shape=(None, None), input_var=doc_mask_var)
    l_qry_mask = L.InputLayer(shape=(None, None), input_var=qry_mask_var)

    l_doc_fwd = L.LSTMLayer(l_doc_embed,
                            NUM_HIDDEN,
                            grad_clipping=GRAD_CLIP,
                            mask_input=l_doc_mask,
                            gradient_steps=GRAD_STEPS,
                            precompute_input=True)
    l_doc_bkd = L.LSTMLayer(l_doc_embed,
                            NUM_HIDDEN,
                            grad_clipping=GRAD_CLIP,
                            mask_input=l_doc_mask,
                            gradient_steps=GRAD_STEPS,
                            precompute_input=True,
                            backwards=True)
    l_qry_fwd = L.LSTMLayer(l_qry_embed,
                            NUM_HIDDEN,
                            grad_clipping=GRAD_CLIP,
                            mask_input=l_qry_mask,
                            gradient_steps=GRAD_STEPS,
                            precompute_input=True)
    l_qry_bkd = L.LSTMLayer(l_qry_embed,
                            NUM_HIDDEN,
                            grad_clipping=GRAD_CLIP,
                            mask_input=l_qry_mask,
                            gradient_steps=GRAD_STEPS,
                            precompute_input=True,
                            backwards=True)

    l_doc_fwd_slice = L.SliceLayer(l_doc_fwd, -1, 1)
    l_doc_bkd_slice = L.SliceLayer(l_doc_bkd, 0, 1)
    l_qry_fwd_slice = L.SliceLayer(l_qry_fwd, -1, 1)
    l_qry_bkd_slice = L.SliceLayer(l_qry_bkd, 0, 1)

    r = L.DenseLayer(L.ElemwiseSumLayer([l_doc_fwd_slice, l_doc_bkd_slice]),
                     num_units=NUM_HIDDEN,
                     nonlinearity=lasagne.nonlinearities.tanh)
    u = L.DenseLayer(L.ElemwiseSumLayer([l_qry_fwd_slice, l_qry_bkd_slice]),
                     num_units=NUM_HIDDEN,
                     nonlinearity=lasagne.nonlinearities.tanh)

    g = L.DenseLayer(L.concat([r, u], axis=1),
                     num_units=EMBED_DIM,
                     W=lasagne.init.GlorotNormal(),
                     nonlinearity=lasagne.nonlinearities.tanh)

    l_out = L.DenseLayer(g,
                         num_units=vocab_size,
                         W=l_doc_embed.W.T,
                         nonlinearity=lasagne.nonlinearities.softmax,
                         b=None)

    return l_out
示例#22
0
    def build_critic(self,
                     critic_input_var,
                     condition_var,
                     vocoder,
                     ctxsize,
                     nonlinearity=lasagne.nonlinearities.very_leaky_rectify,
                     postlayers_nb=6,
                     use_LSweighting=True,
                     LSWGANtransfreqcutoff=4000,
                     LSWGANtranscoef=1.0 / 8.0,
                     use_WGAN_incnoisefeature=False):

        useLRN = False  # TODO

        layer_critic = ll.InputLayer(shape=(None, None,
                                            vocoder.featuressize()),
                                     input_var=critic_input_var,
                                     name='input')

        winlen = int(0.5 * self._windur / 0.005) * 2 + 1

        layerstoconcats = []

        # Amplitude spectrum
        layer = ll.SliceLayer(layer_critic,
                              indices=slice(
                                  vocoder.f0size(),
                                  vocoder.f0size() + vocoder.specsize()),
                              axis=2,
                              name='spec_slice')  # Assumed feature order

        if use_LSweighting:  # Using weighted WGAN+LS
            print(
                'WGAN Weighted LS - critic - SPEC (trans cutoff {}Hz)'.format(
                    LSWGANtransfreqcutoff))
            # wganls_spec_weights_ = nonlin_sigmoidparm(np.arange(vocoder.specsize(), dtype=theano.config.floatX),  int(LSWGANtransfreqcutoff*vocoder.specsize()), LSWGANtranscoef)
            wganls_spec_weights_ = nonlin_sigmoidparm(
                np.arange(vocoder.specsize(), dtype=theano.config.floatX),
                sp.freq2fwspecidx(LSWGANtransfreqcutoff, vocoder.fs,
                                  vocoder.specsize()), LSWGANtranscoef)
            wganls_weights = theano.shared(
                value=np.asarray(wganls_spec_weights_),
                name='wganls_spec_weights_')
            layer = CstMulLayer(layer,
                                cstW=wganls_weights,
                                name='cstdot_wganls_weights')

        layer = ll.dimshuffle(layer, [0, 'x', 1, 2], name='spec_dimshuffle')
        for layi in xrange(self._nbcnnlayers):
            layerstr = 'spec_l' + str(1 + layi) + '_GC{}x{}x{}'.format(
                self._nbfilters, winlen, self._spec_freqlen)
            # strides>1 make the first two Conv layers pyramidal. Increase patches' effects here and there, bad.
            layer = layer_GatedConv2DLayer(layer,
                                           self._nbfilters,
                                           [winlen, self._spec_freqlen],
                                           pad='same',
                                           nonlinearity=nonlinearity,
                                           name=layerstr)
            if useLRN: layer = ll.LocalResponseNormalization2DLayer(layer)
        layer = ll.dimshuffle(layer, [0, 2, 3, 1], name='spec_dimshuffle')
        layer_spec = ll.flatten(layer, outdim=3, name='spec_flatten')
        layerstoconcats.append(layer_spec)

        if use_WGAN_incnoisefeature and vocoder.noisesize(
        ) > 0:  # Add noise in critic
            layer = ll.SliceLayer(layer_critic,
                                  indices=slice(
                                      vocoder.f0size() + vocoder.specsize(),
                                      vocoder.f0size() + vocoder.specsize() +
                                      vocoder.noisesize()),
                                  axis=2,
                                  name='nm_slice')

            if use_LSweighting:  # Using weighted WGAN+LS
                print('WGAN Weighted LS - critic - NM (trans cutoff {}Hz)'.
                      format(LSWGANtransfreqcutoff))
                # wganls_spec_weights_ = nonlin_sigmoidparm(np.arange(vocoder.noisesize(), dtype=theano.config.floatX),  int(LSWGANtransfreqcutoff*vocoder.noisesize()), LSWGANtranscoef)
                wganls_spec_weights_ = nonlin_sigmoidparm(
                    np.arange(vocoder.noisesize(), dtype=theano.config.floatX),
                    sp.freq2fwspecidx(LSWGANtransfreqcutoff, vocoder.fs,
                                      vocoder.noisesize()), LSWGANtranscoef)
                wganls_weights = theano.shared(
                    value=np.asarray(wganls_spec_weights_),
                    name='wganls_spec_weights_')
                layer = CstMulLayer(layer,
                                    cstW=wganls_weights,
                                    name='cstdot_wganls_weights')

            layer = ll.dimshuffle(layer, [0, 'x', 1, 2], name='nm_dimshuffle')
            for layi in xrange(np.max(
                (1, int(np.ceil(self._nbcnnlayers / 2))))):
                layerstr = 'nm_l' + str(1 + layi) + '_GC{}x{}x{}'.format(
                    self._nbfilters, winlen, self._noise_freqlen)
                layer = layer_GatedConv2DLayer(layer,
                                               self._nbfilters,
                                               [winlen, self._noise_freqlen],
                                               pad='same',
                                               nonlinearity=nonlinearity,
                                               name=layerstr)
                if useLRN: layer = ll.LocalResponseNormalization2DLayer(layer)
            layer = ll.dimshuffle(layer, [0, 2, 3, 1], name='nm_dimshuffle')
            layer_bndnm = ll.flatten(layer, outdim=3, name='nm_flatten')
            layerstoconcats.append(layer_bndnm)

        # Add the contexts
        layer_ctx_input = ll.InputLayer(shape=(None, None, ctxsize),
                                        input_var=condition_var,
                                        name='ctx_input')
        layer_ctx = layer_context(layer_ctx_input,
                                  ctx_nblayers=self._ctx_nblayers,
                                  ctx_nbfilters=self._ctx_nbfilters,
                                  ctx_winlen=self._ctx_winlen,
                                  hiddensize=self._hiddensize,
                                  nonlinearity=nonlinearity,
                                  bn_axes=None,
                                  bn_cnn_axes=None,
                                  critic=True,
                                  useLRN=useLRN)
        layerstoconcats.append(layer_ctx)

        # Concatenate the features analysis with the contexts...
        layer = ll.ConcatLayer(layerstoconcats,
                               axis=2,
                               name='ctx_features.concat')

        # ... and finalize with a common FC network
        for layi in xrange(postlayers_nb):
            layerstr = 'post.l' + str(1 + layi) + '_FC' + str(self._hiddensize)
            layer = ll.DenseLayer(layer,
                                  self._hiddensize,
                                  nonlinearity=nonlinearity,
                                  num_leading_axes=2,
                                  name=layerstr)

        # output layer (linear)
        layer = ll.DenseLayer(layer,
                              1,
                              nonlinearity=None,
                              num_leading_axes=2,
                              name='projection')  # No nonlin for this output
        return [layer, layer_critic, layer_ctx_input]
示例#23
0
    def __init__(self, n_inputs=None, n_outputs=None, input_shape=None,
                 n_bypass=0,
                 density='mog',
                 n_hiddens=(10, 10), impute_missing=True, seed=None,
                 n_filters=(), filter_sizes=3, pool_sizes=2,
                 n_rnn=0,
                 **density_opts):

        """Initialize a mixture density network with custom layers

        Parameters
        ----------
        n_inputs : int
            Total input dimensionality (data/summary stats)
        n_outputs : int
            Dimensionality of output (simulator parameters)
        input_shape : tuple
            Size to which data are reshaped before CNN or RNN
        n_bypass : int
            Number of elements at end of input which bypass CNN or RNN
        density : string
            Type of density condition on the network, can be 'mog' or 'maf'
        n_components : int
            Number of components of the mixture density
        n_filters : list of ints
            Number of filters  per convolutional layer
        n_hiddens : list of ints
            Number of hidden units per fully connected layer
        n_rnn : None or int
            Number of RNN units
        impute_missing : bool
            If set to True, learns replacement value for NaNs, otherwise those
            inputs are set to zero
        seed : int or None
            If provided, random number generator will be seeded
        density_opts : dict
            Options for the density estimator
        """
        if n_rnn > 0 and len(n_filters) > 0:
            raise NotImplementedError
        assert isint(n_inputs) and isint(n_outputs)\
            and n_inputs > 0 and n_outputs > 0

        self.density = density.lower()
        self.impute_missing = impute_missing
        self.n_hiddens = list(n_hiddens)
        self.n_outputs, self.n_inputs = n_outputs, n_inputs
        self.n_bypass = n_bypass

        self.n_rnn = n_rnn

        self.n_filters, self.filter_sizes, self.pool_sizes, n_cnn = \
            list(n_filters), filter_sizes, pool_sizes, len(n_filters)
        if type(self.filter_sizes) is int:
            self.filter_sizes = [self.filter_sizes for _ in range(n_cnn)]
        else:
            assert len(self.filter_sizes) >= n_cnn
        if type(self.pool_sizes) is int:
            self.pool_sizes = [self.pool_sizes for _ in range(n_cnn)]
        else:
            assert len(self.pool_sizes) >= n_cnn

        self.iws = tt.vector('iws', dtype=dtype)

        self.seed = seed
        if seed is not None:
            self.rng = np.random.RandomState(seed=seed)
        else:
            self.rng = np.random.RandomState()
        lasagne.random.set_rng(self.rng)

        self.input_shape = (n_inputs,) if input_shape is None else input_shape
        assert np.prod(self.input_shape) + self.n_bypass == self.n_inputs
        assert 1 <= len(self.input_shape) <= 3

        # params: output placeholder (batch, self.n_outputs)
        self.params = tensorN(2, name='params', dtype=dtype)

        # stats : input placeholder, (batch, self.n_inputs)
        self.stats = tensorN(2, name='stats', dtype=dtype)

        # compose layers
        self.layer = collections.OrderedDict()

        # input layer, None indicates batch size not fixed at compile time
        self.layer['input'] = ll.InputLayer(
            (None, self.n_inputs), input_var=self.stats)

        # learn replacement values
        if self.impute_missing:
            self.layer['missing'] = \
                dl.ImputeMissingLayer(last(self.layer),
                                      n_inputs=(self.n_inputs,))
        else:
            self.layer['missing'] = \
                dl.ReplaceMissingLayer(last(self.layer),
                                       n_inputs=(self.n_inputs,))

        if self.n_bypass > 0 and (self.n_rnn > 0 or n_cnn > 0):
            last_layer = last(self.layer)
            bypass_slice = slice(self.n_inputs - self.n_bypass, self.n_inputs)
            direct_slice = slice(0, self.n_inputs - self.n_bypass)
            self.layer['bypass'] = ll.SliceLayer(last_layer, bypass_slice)
            self.layer['direct'] = ll.SliceLayer(last_layer, direct_slice)

        # reshape inputs prior to RNN or CNN step
        if self.n_rnn > 0 or n_cnn > 0:

            if len(n_filters) > 0 and len(self.input_shape) == 2:  # 1 channel
                rs = (-1, 1, *self.input_shape)
            else:
                if self.n_rnn > 0:
                    assert len(self.input_shape) == 2  # time, dim
                else:
                    assert len(self.input_shape) == 3  # channel, row, col
                rs = (-1, *self.input_shape)

            # last layer is 'missing' or 'direct'
            self.layer['reshape'] = ll.ReshapeLayer(last(self.layer), rs)

        # recurrent neural net, input: (batch, sequence_length, num_inputs)
        if self.n_rnn > 0:
            self.layer['rnn'] = ll.GRULayer(last(self.layer), n_rnn,
                                            only_return_final=True)

        # convolutional net, input: (batch, channels, rows, columns)
        if n_cnn > 0:
            for l in range(n_cnn):  # add layers
                if self.pool_sizes[l] == 1:
                    padding = (self.filter_sizes[l] - 1) // 2
                else:
                    padding = 0
                self.layer['conv_' + str(l + 1)] = ll.Conv2DLayer(
                    name='c' + str(l + 1),
                    incoming=last(self.layer),
                    num_filters=self.n_filters[l],
                    filter_size=self.filter_sizes[l],
                    stride=(1, 1),
                    pad=padding,
                    untie_biases=False,
                    W=lasagne.init.GlorotUniform(),
                    b=lasagne.init.Constant(0.),
                    nonlinearity=lnl.rectify,
                    flip_filters=True,
                    convolution=tt.nnet.conv2d)

                if self.pool_sizes[l] > 1:
                    self.layer['pool_' + str(l + 1)] = ll.MaxPool2DLayer(
                        name='p' + str(l + 1),
                        incoming=last(self.layer),
                        pool_size=self.pool_sizes[l],
                        stride=None,
                        ignore_border=True)

        # flatten
        self.layer['flatten'] = ll.FlattenLayer(
            incoming=last(self.layer),
            outdim=2)

        # incorporate bypass inputs
        if self.n_bypass > 0 and (self.n_rnn > 0 or n_cnn > 0):
            self.layer['bypass_merge'] = lasagne.layers.ConcatLayer(
                [self.layer['bypass'], last(self.layer)], axis=1)

        if self.density == 'mog':
            self.init_mdn(**density_opts)
        elif self.density == 'maf':
            self.init_maf(**density_opts)
        else:
            raise NotImplementedError

        self.compile_funs()  # theano functions
示例#24
0
    def build_RNN(self,
                  n_hidden_list=(100, ),
                  bidirectional=False,
                  addDenseLayers=False,
                  seed=int(time.time()),
                  debug=False,
                  logger=logger_RNNtools):
        # some inspiration from http://colinraffel.com/talks/hammer2015recurrent.pdf

        # if debug:
        #     logger_RNNtools.debug('\nInputs:');
        #     logger_RNNtools.debug('  X.shape:    %s', self.X[0].shape)
        #     logger_RNNtools.debug('  X[0].shape: %s %s %s \n%s', self.X[0][0].shape, type(self.X[0][0]),
        #                           type(self.X[0][0][0]), self.X[0][0][:5])
        #
        #     logger_RNNtools.debug('Targets: ');
        #     logger_RNNtools.debug('  Y.shape:    %s', self.Y.shape)
        #     logger_RNNtools.debug('  Y[0].shape: %s %s %s \n%s', self.Y[0].shape, type(self.Y[0]), type(self.Y[0][0]),
        #                           self.Y[0][:5])
        #     logger_RNNtools.debug('Layers: ')

        # fix these at initialization because it allows for compiler opimizations
        num_output_units = self.num_output_units
        num_features = self.num_features
        batch_size = self.batch_size

        audio_inputs = self.audio_inputs_var
        audio_masks = self.audio_masks_var  #set MATRIX, not iMatrix!! Otherwise all mask calculations are done by CPU, and everything will be ~2x slowed down!! Also in general_tools.generate_masks()
        valid_indices = self.audio_valid_indices_var

        net = {}
        # net['l1_in_valid'] = L.InputLayer(shape=(batch_size, None), input_var=valid_indices)

        # shape = (batch_size, batch_max_seq_length, num_features)
        net['l1_in'] = L.InputLayer(shape=(batch_size, None, num_features),
                                    input_var=audio_inputs)
        # We could do this and set all input_vars to None, but that is slower -> fix batch_size and num_features at initialization
        # batch_size, n_time_steps, n_features = net['l1_in'].input_var.shape

        # This input will be used to provide the network with masks.
        # Masks are matrices of shape (batch_size, n_time_steps);
        net['l1_mask'] = L.InputLayer(shape=(batch_size, None),
                                      input_var=audio_masks)

        if debug:
            get_l_in = L.get_output(net['l1_in'])
            l_in_val = get_l_in.eval({net['l1_in'].input_var: self.X})
            # logger_RNNtools.debug(l_in_val)
            logger_RNNtools.debug('  l_in size: %s', l_in_val.shape)

            get_l_mask = L.get_output(net['l1_mask'])
            l_mask_val = get_l_mask.eval(
                {net['l1_mask'].input_var: self.masks})
            # logger_RNNtools.debug(l_in_val)
            logger_RNNtools.debug('  l_mask size: %s', l_mask_val.shape)

            n_batch, n_time_steps, n_features = net['l1_in'].input_var.shape
            logger_RNNtools.debug(
                "  n_batch: %s | n_time_steps: %s | n_features: %s", n_batch,
                n_time_steps, n_features)

        ## LSTM parameters
        # All gates have initializers for the input-to-gate and hidden state-to-gate
        # weight matrices, the cell-to-gate weight vector, the bias vector, and the nonlinearity.
        # The convention is that gates use the standard sigmoid nonlinearity,
        # which is the default for the Gate class.
        gate_parameters = L.recurrent.Gate(W_in=lasagne.init.Orthogonal(),
                                           W_hid=lasagne.init.Orthogonal(),
                                           b=lasagne.init.Constant(0.))
        cell_parameters = L.recurrent.Gate(
            W_in=lasagne.init.Orthogonal(),
            W_hid=lasagne.init.Orthogonal(),
            # Setting W_cell to None denotes that no cell connection will be used.
            W_cell=None,
            b=lasagne.init.Constant(0.),
            # By convention, the cell nonlinearity is tanh in an LSTM.
            nonlinearity=lasagne.nonlinearities.tanh)

        # generate layers of stacked LSTMs, possibly bidirectional
        net['l2_lstm'] = []

        for i in range(len(n_hidden_list)):
            n_hidden = n_hidden_list[i]

            if i == 0: input = net['l1_in']
            else: input = net['l2_lstm'][i - 1]

            nextForwardLSTMLayer = L.recurrent.LSTMLayer(
                input,
                n_hidden,
                # We need to specify a separate input for masks
                mask_input=net['l1_mask'],
                # Here, we supply the gate parameters for each gate
                ingate=gate_parameters,
                forgetgate=gate_parameters,
                cell=cell_parameters,
                outgate=gate_parameters,
                # We'll learn the initialization and use gradient clipping
                learn_init=True,
                grad_clipping=100.)
            net['l2_lstm'].append(nextForwardLSTMLayer)

            if bidirectional:
                input = net['l2_lstm'][-1]
                # Use backward LSTM
                # The "backwards" layer is the same as the first,
                # except that the backwards argument is set to True.
                nextBackwardLSTMLayer = L.recurrent.LSTMLayer(
                    input,
                    n_hidden,
                    ingate=gate_parameters,
                    mask_input=net['l1_mask'],
                    forgetgate=gate_parameters,
                    cell=cell_parameters,
                    outgate=gate_parameters,
                    learn_init=True,
                    grad_clipping=100.,
                    backwards=True)
                net['l2_lstm'].append(nextBackwardLSTMLayer)

                # if debug:
                #     # Backwards LSTM
                #     get_l_lstm_back = theano.function([net['l1_in'].input_var, net['l1_mask'].input_var],
                #                                       L.get_output(net['l2_lstm'][-1]))
                #     l_lstmBack_val = get_l_lstm_back(self.X, self.masks)
                #     logger_RNNtools.debug('  l_lstm_back size: %s', l_lstmBack_val.shape)

                # We'll combine the forward and backward layer output by summing.
                # Merge layers take in lists of layers to merge as input.
                # The output of l_sum will be of shape (n_batch, max_n_time_steps, n_features)
                net['l2_lstm'].append(
                    L.ElemwiseSumLayer(
                        [net['l2_lstm'][-2], net['l2_lstm'][-1]]))

        # we need to convert (batch_size, seq_length, num_features) to (batch_size * seq_length, num_features) because Dense networks can't deal with 2 unknown sizes
        net['l3_reshape'] = L.ReshapeLayer(net['l2_lstm'][-1],
                                           (-1, n_hidden_list[-1]))

        # if debug:
        #     get_l_reshape = theano.function([net['l1_in'].input_var, net['l1_mask'].input_var],
        #                                     L.get_output(net['l3_reshape']))
        #     l_reshape_val = get_l_reshape(self.X, self.masks)
        #     logger.debug('  l_reshape size: %s', l_reshape_val.shape)
        #
        # if debug:
        #     # Forwards LSTM
        #     get_l_lstm = theano.function([net['l1_in'].input_var, net['l1_mask'].input_var],
        #                                  L.get_output(net['l2_lstm'][-1]))
        #     l_lstm_val = get_l_lstm(self.X, self.masks)
        #     logger_RNNtools.debug('  l2_lstm size: %s', l_lstm_val.shape);

        if addDenseLayers:
            net['l4_dense'] = L.DenseLayer(
                net['l3_reshape'],
                nonlinearity=lasagne.nonlinearities.rectify,
                num_units=256)
            dropoutLayer = L.DropoutLayer(net['l4_dense'], p=0.3)
            net['l5_dense'] = L.DenseLayer(
                dropoutLayer,
                nonlinearity=lasagne.nonlinearities.rectify,
                num_units=64)
            # Now we can apply feed-forward layers as usual for classification
            net['l6_dense'] = L.DenseLayer(
                net['l5_dense'],
                num_units=num_output_units,
                nonlinearity=lasagne.nonlinearities.softmax)
        else:
            # Now we can apply feed-forward layers as usual for classification
            net['l6_dense'] = L.DenseLayer(
                net['l3_reshape'],
                num_units=num_output_units,
                nonlinearity=lasagne.nonlinearities.softmax)

        # # Now, the shape will be (n_batch * n_timesteps, num_output_units). We can then reshape to
        # # n_batch to get num_output_units values for each timestep from each sequence
        net['l7_out_flattened'] = L.ReshapeLayer(net['l6_dense'],
                                                 (-1, num_output_units))
        net['l7_out'] = L.ReshapeLayer(net['l6_dense'],
                                       (batch_size, -1, num_output_units))

        net['l7_out_valid_basic'] = L.SliceLayer(net['l7_out'],
                                                 indices=valid_indices,
                                                 axis=1)
        net['l7_out_valid'] = L.ReshapeLayer(
            net['l7_out_valid_basic'], (batch_size, -1, num_output_units))
        net['l7_out_valid_flattened'] = L.ReshapeLayer(
            net['l7_out_valid_basic'], (-1, num_output_units))

        if debug:
            get_l_out = theano.function(
                [net['l1_in'].input_var, net['l1_mask'].input_var],
                L.get_output(net['l7_out']))
            l_out = get_l_out(self.X, self.masks)

            # this only works for batch_size == 1
            get_l_out_valid = theano.function(
                [audio_inputs, audio_masks, valid_indices],
                L.get_output(net['l7_out_valid']))
            try:
                l_out_valid = get_l_out_valid(self.X, self.masks,
                                              self.valid_frames)
                logger_RNNtools.debug('\n\n\n  l_out: %s  | l_out_valid: %s',
                                      l_out.shape, l_out_valid.shape)
            except:
                logger_RNNtools.warning(
                    "batchsize not 1, get_valid not working")

        if debug: self.print_network_structure(net)
        self.network_lout = net['l7_out_flattened']
        self.network_lout_batch = net['l7_out']
        self.network_lout_valid = net['l7_out_valid']
        self.network_lout_valid_flattened = net['l7_out_valid_flattened']

        self.network = net
    def build_network(self, vocab_size, doc_var, query_var, docmask_var,
                      qmask_var, W_init):

        l_docin = L.InputLayer(shape=(None, None, 1), input_var=doc_var)
        l_qin = L.InputLayer(shape=(None, None, 1), input_var=query_var)
        l_docmask = L.InputLayer(shape=(None, None), input_var=docmask_var)
        l_qmask = L.InputLayer(shape=(None, None), input_var=qmask_var)
        l_docembed = L.EmbeddingLayer(l_docin,
                                      input_size=vocab_size,
                                      output_size=EMBED_DIM,
                                      W=W_init)
        l_qembed = L.EmbeddingLayer(l_qin,
                                    input_size=vocab_size,
                                    output_size=EMBED_DIM,
                                    W=l_docembed.W)

        l_fwd_doc = L.GRULayer(l_docembed,
                               NUM_HIDDEN,
                               grad_clipping=GRAD_CLIP,
                               mask_input=l_docmask,
                               gradient_steps=GRAD_STEPS,
                               precompute_input=True)
        l_bkd_doc = L.GRULayer(l_docembed,
                               NUM_HIDDEN,
                               grad_clipping=GRAD_CLIP,
                               mask_input=l_docmask,
                               gradient_steps=GRAD_STEPS,
                               precompute_input=True,
                               backwards=True)

        l_doc = L.concat([l_fwd_doc, l_bkd_doc], axis=2)

        l_fwd_q = L.GRULayer(l_qembed,
                             NUM_HIDDEN,
                             grad_clipping=GRAD_CLIP,
                             mask_input=l_qmask,
                             gradient_steps=GRAD_STEPS,
                             precompute_input=True)
        l_bkd_q = L.GRULayer(l_qembed,
                             NUM_HIDDEN,
                             grad_clipping=GRAD_CLIP,
                             mask_input=l_qmask,
                             gradient_steps=GRAD_STEPS,
                             precompute_input=True,
                             backwards=True)

        l_fwd_q_slice = L.SliceLayer(l_fwd_q, -1, 1)
        l_bkd_q_slice = L.SliceLayer(l_bkd_q, 0, 1)
        l_q = L.ConcatLayer([l_fwd_q_slice, l_bkd_q_slice])

        d = L.get_output(l_doc)  # B x N x D
        q = L.get_output(l_q)  # B x D
        p = T.batched_dot(d, q)  # B x N
        pm = T.nnet.softmax(
            T.set_subtensor(
                T.alloc(-20., p.shape[0], p.shape[1])[docmask_var.nonzero()],
                p[docmask_var.nonzero()]))

        index = T.reshape(T.repeat(T.arange(p.shape[0]), p.shape[1]), p.shape)
        final = T.inc_subtensor(
            T.alloc(0., p.shape[0], vocab_size)[index,
                                                T.flatten(doc_var, outdim=2)],
            pm)
        #qv = T.flatten(query_var,outdim=2)
        #index2 = T.reshape(T.repeat(T.arange(qv.shape[0]),qv.shape[1]),qv.shape)
        #xx = index2[qmask_var.nonzero()]
        #yy = qv[qmask_var.nonzero()]
        #pV = T.set_subtensor(final[xx,yy], T.zeros_like(qv[xx,yy]))

        return final, l_doc, l_q
示例#26
0
    def __init__(self, full_length, output_size, meta_size, depth=2, encoder_size=64, decoder_size=64):

        latent_size = 16

        input_var = TT.tensor3(dtype='float32')
        meta_var = TT.tensor3(dtype='float32')
        target_var = TT.matrix()
        cut_weights = TT.vector(dtype='float32')

        input_layer = layers.InputLayer((None, None, output_size), input_var=input_var)
        meta_layer = layers.InputLayer((None, None, meta_size), input_var=meta_var)
        meta_layer = layers.DropoutLayer(meta_layer, p=0.2)
        concat_input_layer = layers.ConcatLayer([input_layer, meta_layer], axis=-1)

        # encoder
        lstm_layer = layers.RecurrentLayer(concat_input_layer, encoder_size / 2, learn_init=True)
        lstm_layer = layers.RecurrentLayer(lstm_layer, encoder_size / 2, learn_init=True)

        lstm_layer = layers.ReshapeLayer(lstm_layer, (-1, encoder_size / 2))

        encoded = layers.DenseLayer(lstm_layer, latent_size)
        encoded = layers.batch_norm(encoded)

        dense = encoded
        for idx in xrange(depth):
            dense = layers.DenseLayer(dense, decoder_size)
            dense = layers.batch_norm(dense)

        mu_and_logvar_x_layer = layers.DenseLayer(dense, full_length * 2, nonlinearity=nonlinearities.linear)

        mu_x_layer = layers.SliceLayer(mu_and_logvar_x_layer, slice(0, full_length), axis=1)
        mu_x_layer = layers.ReshapeLayer(mu_x_layer, (-1, full_length, full_length))
        logvar_x_layer = layers.SliceLayer(mu_and_logvar_x_layer, slice(full_length, None), axis=1)
        logvar_x_layer = layers.ReshapeLayer(logvar_x_layer, (-1, full_length, full_length))

        l2_norm = regularization.regularize_network_params(mu_and_logvar_x_layer, regularization.l2)

        loss = neg_log_likelihood(
            target_var,
            layers.get_output(mu_x_layer, deterministic=False),
            layers.get_output(logvar_x_layer, deterministic=False),
            cut_weights
        ) + 1e-4 * l2_norm

        test_loss = neg_log_likelihood(
            target_var,
            layers.get_output(mu_x_layer, deterministic=False),
            layers.get_output(logvar_x_layer, deterministic=False),
            cut_weights
        ) + 1e-4 * l2_norm

        params = layers.get_all_params(mu_and_logvar_x_layer, trainable=True)
        param_updates = updates.adadelta(loss.mean(), params)

        self._train_fn = theano.function(
            [input_var, meta_var, target_var, cut_weights],
            updates=param_updates,
            outputs=loss.mean()
        )

        self._loss_fn = theano.function(
            [input_var, meta_var, target_var, cut_weights],
            outputs=test_loss.mean()
        )

        self._predict_fn = theano.function(
            [input_var, meta_var],
            outputs=[
                layers.get_output(mu_x_layer, deterministic=True),
                layers.get_output(logvar_x_layer, deterministic=True)
            ]
        )