示例#1
0
    def create_attention(self, gru_con, in_con_mask, condition, batch_size,
                         n_hidden_con, **kwargs):

        # (batch_size, n_attention)
        gru_cond2 = non_flattening_dense_layer(gru_con,
                                               self.in_con_mask,
                                               self.n_attention,
                                               nonlinearity=None)
        gru_que2 = DenseLayer(condition, self.n_attention, nonlinearity=None)
        gru_que2 = dimshuffle(gru_que2, (0, 'x', 1))

        att = ElemwiseSumLayer([gru_cond2, gru_que2])
        att = NonlinearityLayer(att, T.tanh)
        att = SliceLayer(non_flattening_dense_layer(att,
                                                    self.in_con_mask,
                                                    1,
                                                    nonlinearity=None),
                         indices=0,
                         axis=2)

        att_softmax = SequenceSoftmax(att, self.in_con_mask)

        rep = ElemwiseMergeLayer(
            [ForgetSizeLayer(dimshuffle(att_softmax,
                                        (0, 1, 'x'))), gru_con], T.mul)

        return ExpressionLayer(rep, lambda x: T.sum(x, axis=1), lambda s:
                               (s[0], ) + s[2:])
    def get_embedding_layer(self, l_in, extra_vars):
        language = extra_vars[0]
        context_vars = extra_vars[1:]

        id_tag = (self.id + '/') if self.id else ''

        l_lang = InputLayer(shape=(None, ),
                            input_var=language,
                            name=id_tag + 'lang_input')

        if self.options.bilingual_en_embed_file:
            en_embeddings = load_embeddings(
                self.options.bilingual_en_embed_file, self.seq_vec)
            en_embed_size = en_embeddings.shape[1]
        else:
            en_embeddings = Normal()
            en_embed_size = self.options.bilingual_embed_size

        if self.options.bilingual_zh_embed_file:
            zh_embeddings = load_embeddings(
                self.options.bilingual_zh_embed_file, self.seq_vec)
            zh_embed_size = zh_embeddings.shape[1]
        else:
            zh_embeddings = Normal()
            zh_embed_size = self.options.bilingual_embed_size

        l_en = EmbeddingLayer(l_in,
                              input_size=len(self.seq_vec.tokens),
                              output_size=en_embed_size,
                              W=en_embeddings,
                              name=id_tag + 'desc_embed_en')
        l_en_transformed = dimshuffle(l_en, (0, 2, 1))
        l_en_transformed = NINLayer(l_en_transformed,
                                    num_units=self.options.listener_cell_size,
                                    nonlinearity=None,
                                    name=id_tag + 'desc_embed_en_transformed')
        l_en_transformed = dimshuffle(l_en_transformed, (0, 2, 1))

        l_zh = EmbeddingLayer(l_in,
                              input_size=len(self.seq_vec.tokens),
                              output_size=zh_embed_size,
                              W=zh_embeddings,
                              name=id_tag + 'desc_embed_zh')
        l_zh_transformed = dimshuffle(l_zh, (0, 2, 1))
        l_zh_transformed = NINLayer(l_zh_transformed,
                                    num_units=self.options.listener_cell_size,
                                    nonlinearity=None,
                                    name=id_tag + 'desc_embed_zh_transformed')
        l_zh_transformed = dimshuffle(l_zh_transformed, (0, 2, 1))
        l_merged = SwitchLayer(l_lang, [l_en_transformed, l_zh_transformed],
                               name=id_tag + 'desc_embed_switch')
        return (l_merged, context_vars)
示例#3
0
def broadcast_dot_layer(l_pred, l_targets, feature_dim, id_tag):
    l_broadcast = dimshuffle(l_pred, (0, 1, 'x'), name=id_tag + 'dot_broadcast')
    l_forget = ForgetSizeLayer(l_broadcast, axis=2, name=id_tag + 'dot_nosize')
    l_merge = ElemwiseMergeLayer((l_forget, l_targets), T.mul, name=id_tag + 'dot_elemwise_mul')
    l_pool = FeaturePoolLayer(l_merge, pool_size=feature_dim, axis=1,
                              pool_function=T.sum, name=id_tag + 'dot_pool')
    return reshape(l_pool, ([0], [2]), name=id_tag + 'broadcast_dot')
示例#4
0
def broadcast_sub_layer(l_pred, l_targets, feature_dim, id_tag):
    l_broadcast = dimshuffle(l_pred, (0, 1, 'x'),
                             name=id_tag + 'sub_broadcast')
    l_forget = ForgetSizeLayer(l_broadcast, axis=2, name=id_tag + 'sub_nosize')
    return ElemwiseMergeLayer((l_forget, l_targets),
                              T.sub,
                              name=id_tag + 'broadcast_sub')
示例#5
0
def apply_mask(layer_seq, layer_seq_mask):
    """
    seq: layer of shape (batch_size, length_seq, n_features)
    seq_mask: layer of shape (batch_size, length_seq)
    """
    return ElemwiseMergeLayer(
        [ForgetSizeLayer(dimshuffle(layer_seq_mask,
                                    (0, 1, 'x'))), layer_seq], T.mul)
示例#6
0
def layer_context(layer_ctx,
                  ctx_nblayers,
                  ctx_nbfilters,
                  ctx_winlen,
                  hiddensize,
                  nonlinearity,
                  bn_axes=None,
                  bn_cnn_axes=None,
                  critic=False,
                  useLRN=True):

    layer_ctx = ll.dimshuffle(layer_ctx, [0, 'x', 1, 2],
                              name='ctx.dimshuffle_to_2DCNN')
    for layi in xrange(ctx_nblayers):
        layerstr = 'ctx.l' + str(1 + layi) + '_CNN{}x{}x{}'.format(
            ctx_nbfilters, ctx_winlen, 1)
        layer_ctx = ll.Conv2DLayer(layer_ctx,
                                   num_filters=ctx_nbfilters,
                                   filter_size=[ctx_winlen, 1],
                                   stride=1,
                                   pad='same',
                                   nonlinearity=nonlinearity,
                                   name=layerstr)
        if not critic and (not bn_cnn_axes is None):
            layer_ctx = ll.batch_norm(layer_ctx, axes=bn_cnn_axes)
        # layer_ctx = ll.batch_norm(layer_GatedConv2DLayer(layer_ctx, ctx_nbfilters, [ctx_winlen,1], stride=1, pad='same', nonlinearity=nonlinearity, name=layerstr))
        if critic and useLRN:
            layer_ctx = ll.LocalResponseNormalization2DLayer(layer_ctx)
    layer_ctx = ll.dimshuffle(layer_ctx, [0, 2, 3, 1],
                              name='ctx.dimshuffle_back')
    layer_ctx = ll.flatten(layer_ctx, outdim=3, name='ctx.flatten')

    for layi in xrange(2):
        layerstr = 'ctx.l' + str(1 + ctx_nblayers +
                                 layi) + '_FC{}'.format(hiddensize)
        layer_ctx = ll.DenseLayer(layer_ctx,
                                  hiddensize,
                                  nonlinearity=nonlinearity,
                                  num_leading_axes=2,
                                  name=layerstr)
        if not critic and (not bn_axes is None):
            layer_ctx = ll.batch_norm(layer_ctx, axes=bn_axes)

    return layer_ctx
示例#7
0
def build_cnn():
    data_size = (None, 10, 100)  # Batch size x Img Channels x Height x Width

    input_var = T.tensor3(name="input", dtype='int64')

    values = np.array(np.random.randint(0, 1, (5, 10, 100)))
    input_var.tag.test_value = values
    input_layer = L.InputLayer(data_size, input_var=input_var)

    W = create_char_embedding_matrix()

    embed_layer = L.EmbeddingLayer(input_layer,
                                   input_size=102,
                                   output_size=101,
                                   W=W)

    reshape = L.reshape(embed_layer, (-1, 100, 101))
    dim_shuffle = L.dimshuffle(reshape, (0, 2, 1))
    #conv_layer_1 = L.Conv2DLayer(embed_layer, 4, (1), 1, 0)
    #pool_layer_1 = L.MaxPool1DLayer(conv_layer_1, pool_size=1)
    print L.get_output(dim_shuffle).tag.test_value.shape

    conv_layer_1 = L.Conv1DLayer(dim_shuffle, 50, 2, 1)

    print L.get_output(conv_layer_1).tag.test_value.shape
    print "TEST"
    pool_layer_1 = L.MaxPool1DLayer(conv_layer_1, pool_size=99)
    print L.get_output(pool_layer_1).tag.test_value.shape
    reshape_conv_1 = L.reshape(pool_layer_1, (-1, 50))

    conv_layer_2 = L.Conv1DLayer(dim_shuffle, 50, 3, 1)
    pool_layer_2 = L.MaxPool1DLayer(conv_layer_2, pool_size=98)
    reshape_conv_2 = L.reshape(pool_layer_2, (-1, 50))

    merge_layer = L.ConcatLayer([reshape_conv_1, reshape_conv_2], 1)
    print L.get_output(merge_layer).tag.test_value.shape
    reshape_output = L.reshape(merge_layer, (-1, 10, 100))
    print L.get_output(reshape_output).tag.test_value.shape

    x = T.tensor3(name="testname", dtype='int32')
    #x = T.imatrix()
    #output = L.get_output(conv_layer_1,x)

    #f = theano.function([x],output)

    word = unicode("Tat")
    word_index = np.array([])

    #print word_index

    #x_test = np.array([word_index]).astype('int32')
    #print f(x_test)

    return reshape_output
示例#8
0
    def create_attention(self, gru_con, in_con_mask, condition, batch_size,
                         n_hidden_con, **kwargs):

        # (batch_size, n_attention)
        gru_cond2 = non_flattening_dense_layer(
            gru_con, self.in_con_mask, self.n_attention, nonlinearity=None)
        gru_que2 = DenseLayer(condition, self.n_attention, nonlinearity=None)
        gru_que2 = dimshuffle(gru_que2, (0, 'x', 1))

        att = ElemwiseSumLayer([gru_cond2, gru_que2])
        att = NonlinearityLayer(att, T.tanh)
        att = SliceLayer(non_flattening_dense_layer(
            att, self.in_con_mask, 1, nonlinearity=None), indices=0, axis=2)

        att_softmax = SequenceSoftmax(att, self.in_con_mask)

        rep = ElemwiseMergeLayer(
            [ForgetSizeLayer(dimshuffle(att_softmax, (0, 1, 'x'))),
             gru_con], T.mul)

        return ExpressionLayer(rep, lambda x: T.sum(x, axis=1),
                               lambda s: (s[0],) + s[2:])
    def get_conv_input(self, sidx, tidx, avg=False):
        suf = '_avg' if avg else ''

        feat_embs = [
            self.manager.feats[name].get_emb_layer(sidx, tidx, avg=avg)
            for name in self.args.source_feats
        ]

        # TODO: change the meaning
        if self.args.lex == 'mix':
            concat_emb = L.ElemwiseSumLayer(feat_embs)  # (100, 15, 256)
        else:
            concat_emb = L.concat(feat_embs, axis=2)  # (100, 15, 256+100)

        pos = np.array([0] * (self.args.window_size / 2) + [1] + [0] *
                       (self.args.window_size / 2)).astype(
                           theano.config.floatX)
        post = theano.shared(pos[np.newaxis, :, np.newaxis],
                             borrow=True)  # (1, 15, 1)
        posl = L.InputLayer(
            (None, self.args.window_size, 1),
            input_var=T.extra_ops.repeat(post, sidx.shape[0],
                                         axis=0))  # (100, 15, 1)
        conv_in = L.concat([concat_emb, posl], axis=2)  # (100, 15, 256+1)

        if self.args.pos_emb:
            posint = L.flatten(
                L.ExpressionLayer(posl,
                                  lambda x: T.cast(x, 'int64')))  # (100, 15)
            pos_emb = L.EmbeddingLayer(
                posint,
                self.args.window_size,
                8,
                name='epos' + suf,
                W=Normal(0.01) if not avg else Constant())  # (100, 15, 8)
            pos_emb.params[pos_emb.W].remove('regularizable')
            conv_in = L.concat([concat_emb, posl, pos_emb],
                               axis=2)  # (100, 15, 256+1+8)

        # # squeeze
        # if self.args.squeeze:
        #     conv_in = L.DenseLayer(conv_in, num_units=self.args.squeeze, name='squeeze'+suf, num_leading_axes=2,
        #                     W=HeNormal('relu')) # (100, 15, 256)

        conv_in = L.dimshuffle(conv_in, (0, 2, 1))  # (100, 256+1, 15)

        return conv_in
示例#10
0
def broadcast_sub_layer(l_pred, l_targets, feature_dim, id_tag):
    l_broadcast = dimshuffle(l_pred, (0, 1, 'x'), name=id_tag + 'sub_broadcast')
    l_forget = ForgetSizeLayer(l_broadcast, axis=2, name=id_tag + 'sub_nosize')
    return ElemwiseMergeLayer((l_forget, l_targets), T.sub, name=id_tag + 'broadcast_sub')
示例#11
0
    def get_char2word(self, ic, avg=False):
        suf = '_avg' if avg else ''
        ec = L.EmbeddingLayer(
            ic,
            self.args.vc,
            self.args.nc,
            name='ec' + suf,
            W=HeNormal() if not avg else Constant())  # (100, 24, 32, 16)
        ec.params[ec.W].remove('regularizable')

        if self.args.char_model == 'CNN':
            lds = L.dimshuffle(ec, (0, 3, 1, 2))  # (100, 16, 24, 32)
            ls = []
            for n in self.args.ngrams:
                lconv = L.Conv2DLayer(
                    lds,
                    self.args.nf, (1, n),
                    untie_biases=True,
                    W=HeNormal('relu') if not avg else Constant(),
                    name='conv_%d' % n + suf)  # (100, 64/4, 24, 32-n+1)
                lpool = L.MaxPool2DLayer(
                    lconv, (1, self.args.max_len - n + 1))  # (100, 64, 24, 1)
                lpool = L.flatten(lpool, outdim=3)  # (100, 16, 24)
                lpool = L.dimshuffle(lpool, (0, 2, 1))  # (100, 24, 16)
                ls.append(lpool)
            xc = L.concat(ls, axis=2)  # (100, 24, 64)
            return xc

        elif self.args.char_model == 'LSTM':
            ml = L.ExpressionLayer(
                ic, lambda x: T.neq(x, 0))  # mask layer (100, 24, 32)
            ml = L.reshape(ml, (-1, self.args.max_len))  # (2400, 32)

            gate_params = L.recurrent.Gate(W_in=Orthogonal(),
                                           W_hid=Orthogonal())
            cell_params = L.recurrent.Gate(W_in=Orthogonal(),
                                           W_hid=Orthogonal(),
                                           W_cell=None,
                                           nonlinearity=tanh)

            lstm_in = L.reshape(
                ec, (-1, self.args.max_len, self.args.nc))  # (2400, 32, 16)
            lstm_f = L.LSTMLayer(
                lstm_in,
                self.args.nw / 2,
                mask_input=ml,
                grad_clipping=10.,
                learn_init=True,
                peepholes=False,
                precompute_input=True,
                ingate=gate_params,
                forgetgate=gate_params,
                cell=cell_params,
                outgate=gate_params,
                # unroll_scan=True,
                only_return_final=True,
                name='forward' + suf)  # (2400, 64)
            lstm_b = L.LSTMLayer(
                lstm_in,
                self.args.nw / 2,
                mask_input=ml,
                grad_clipping=10.,
                learn_init=True,
                peepholes=False,
                precompute_input=True,
                ingate=gate_params,
                forgetgate=gate_params,
                cell=cell_params,
                outgate=gate_params,
                # unroll_scan=True,
                only_return_final=True,
                backwards=True,
                name='backward' + suf)  # (2400, 64)
            remove_reg(lstm_f)
            remove_reg(lstm_b)
            if avg:
                set_zero(lstm_f)
                set_zero(lstm_b)
            xc = L.concat([lstm_f, lstm_b], axis=1)  # (2400, 128)
            xc = L.reshape(xc,
                           (-1, self.args.sw, self.args.nw))  # (100, 24, 256)
            return xc
示例#12
0
    def _build_net(self, emb_char_filter_size=5, emb_dropout=True, **kwargs):

        batch_size = self.mask_context_var.shape[0]
        context_len = self.mask_context_var.shape[1]
        question_len = self.question_var.shape[1]
        context_word_len = self.context_char_var.shape[2]
        question_word_len = self.question_char_var.shape[2]

        self.batch_size = batch_size
        self.context_len = context_len
        ''' Inputs and word embeddings'''

        l_context_char = LL.InputLayer(shape=(None, None, None),
                                       input_var=self.context_char_var)
        l_question_char = LL.InputLayer(shape=(None, None, None),
                                        input_var=self.question_char_var)

        l_c_mask = LL.InputLayer(shape=(None, None),
                                 input_var=self.mask_context_var)
        l_q_mask = LL.InputLayer(shape=(None, None),
                                 input_var=self.mask_question_var)

        l_c_char_mask = LL.InputLayer(shape=(None, None, None),
                                      input_var=self.mask_context_char_var)
        l_q_char_mask = LL.InputLayer(shape=(None, None, None),
                                      input_var=self.mask_question_char_var)

        l_c_emb = LL.InputLayer(shape=(None, None, self.emb_size),
                                input_var=self.context_var)
        l_q_emb = LL.InputLayer(shape=(None, None, self.emb_size),
                                input_var=self.question_var)

        if self.train_unk:
            l_c_unk_mask = LL.InputLayer(shape=(None, None),
                                         input_var=self.mask_context_unk_var)
            l_q_unk_mask = LL.InputLayer(shape=(None, None),
                                         input_var=self.mask_question_unk_var)

            l_c_emb = TrainUnkLayer(l_c_emb,
                                    l_c_unk_mask,
                                    output_size=self.emb_size,
                                    W=self.word_embeddings[0])

            l_q_emb = TrainUnkLayer(l_q_emb,
                                    l_q_unk_mask,
                                    output_size=self.emb_size,
                                    W=l_c_emb.W)

        if self.negative:
            l_c_emb = TrainNAWLayer(l_c_emb,
                                    l_c_mask,
                                    output_size=self.emb_size)
        ''' Char-embeddings '''

        # (batch_size x context_len x context_word_len x emb_char_size)
        l_c_char_emb = LL.EmbeddingLayer(l_context_char,
                                         input_size=self.alphabet_size,
                                         output_size=self.emb_char_size)

        l_q_char_emb = LL.EmbeddingLayer(l_question_char,
                                         input_size=self.alphabet_size,
                                         output_size=self.emb_char_size,
                                         W=l_c_char_emb.W)

        # here I do multiplication of character embeddings with masks,
        # because I want to pad them with constant zeros

        l_c_char_mask = ForgetSizeLayer(
            LL.dimshuffle(l_c_char_mask, (0, 1, 2, 'x')))
        l_q_char_mask = ForgetSizeLayer(
            LL.dimshuffle(l_q_char_mask, (0, 1, 2, 'x')))

        l_c_char_emb = LL.ElemwiseMergeLayer([l_c_char_emb, l_c_char_mask],
                                             T.mul)
        l_q_char_emb = LL.ElemwiseMergeLayer([l_q_char_emb, l_q_char_mask],
                                             T.mul)

        # convolutions

        l_c_char_emb = LL.dimshuffle(
            LL.reshape(l_c_char_emb, (batch_size * context_len,
                                      context_word_len, self.emb_char_size)),
            (0, 2, 1))
        l_c_char_conv = LL.Conv1DLayer(l_c_char_emb,
                                       num_filters=self.num_emb_char_filters,
                                       filter_size=emb_char_filter_size,
                                       nonlinearity=L.nonlinearities.tanh,
                                       pad=self.conv)
        # (batch_size * context_len x num_filters x context_word_len + filter_size - 1)

        l_c_char_emb = LL.ExpressionLayer(l_c_char_conv,
                                          lambda X: X.max(2),
                                          output_shape='auto')
        l_c_char_emb = LL.reshape(
            l_c_char_emb, (batch_size, context_len, self.num_emb_char_filters))

        l_q_char_emb = LL.dimshuffle(
            LL.reshape(l_q_char_emb, (batch_size * question_len,
                                      question_word_len, self.emb_char_size)),
            (0, 2, 1))
        l_q_char_conv = LL.Conv1DLayer(l_q_char_emb,
                                       num_filters=self.num_emb_char_filters,
                                       filter_size=emb_char_filter_size,
                                       nonlinearity=L.nonlinearities.tanh,
                                       W=l_c_char_conv.W,
                                       b=l_c_char_conv.b,
                                       pad=self.conv)
        # (batch_size * question_len x num_filters x question_word_len + filter_size - 1)

        l_q_char_emb = LL.ExpressionLayer(l_q_char_conv,
                                          lambda X: X.max(2),
                                          output_shape='auto')
        l_q_char_emb = LL.reshape(
            l_q_char_emb,
            (batch_size, question_len, self.num_emb_char_filters))
        ''' Concatenating both embeddings '''

        l_c_emb = LL.concat([l_c_emb, l_c_char_emb], axis=2)
        l_q_emb = LL.concat([l_q_emb, l_q_char_emb], axis=2)

        # originally I had dropout here
        ''' Highway layer allowing for interaction between embeddings '''

        l_c_P = LL.reshape(l_c_emb,
                           (batch_size * context_len,
                            self.emb_size + self.num_emb_char_filters))
        l_c_P = LL.DenseLayer(l_c_P,
                              num_units=self.rec_size,
                              b=None,
                              nonlinearity=None)

        l_c_high = HighwayLayer(l_c_P)
        l_c_emb = LL.reshape(l_c_high,
                             (batch_size, context_len, self.rec_size))

        l_q_P = LL.reshape(l_q_emb,
                           (batch_size * question_len,
                            self.emb_size + self.num_emb_char_filters))
        l_q_P = LL.DenseLayer(l_q_P,
                              num_units=self.rec_size,
                              W=l_c_P.W,
                              b=None,
                              nonlinearity=None)

        l_q_high = HighwayLayer(l_q_P,
                                W1=l_c_high.W1,
                                b1=l_c_high.b1,
                                W2=l_c_high.W2,
                                b2=l_c_high.b2)
        l_q_emb = LL.reshape(l_q_high,
                             (batch_size, question_len, self.rec_size))
        ''' Calculating wiq features from https://arxiv.org/abs/1703.04816 '''

        l_weighted_feat = WeightedFeatureLayer(
            [l_c_emb, l_q_emb, l_c_mask, l_q_mask])  # batch_size x context_len
        l_weighted_feat = LL.dimshuffle(l_weighted_feat, (0, 1, 'x'))

        # batch_size x context_len
        l_bin_feat = LL.InputLayer(shape=(None, None),
                                   input_var=self.bin_feat_var)
        l_bin_feat = LL.dimshuffle(l_bin_feat, (0, 1, 'x'))
        ''' Dropout at the embeddings '''

        if emb_dropout:
            print('Using dropout after wiq calculation.')
            l_c_emb = LL.dropout(l_c_emb)
            l_q_emb = LL.dropout(l_q_emb)
        ''' Here we concatenate wiq features to embeddings'''

        # both features are concatenated to the embeddings
        # for the question we fix the features to 1
        l_c_emb = LL.concat([l_c_emb, l_bin_feat, l_weighted_feat], axis=2)
        l_q_emb = LL.pad(l_q_emb,
                         width=[(0, 2)],
                         val=L.utils.floatX(1),
                         batch_ndim=2)
        ''' Context and question encoding using the same BiLSTM for both '''

        # output shape is (batch_size x context_len x rec_size)
        l_c_enc_forw = LL.LSTMLayer(l_c_emb,
                                    num_units=self.rec_size,
                                    grad_clipping=100,
                                    mask_input=l_c_mask)

        l_c_enc_back = LL.LSTMLayer(l_c_emb,
                                    num_units=self.rec_size,
                                    grad_clipping=100,
                                    mask_input=l_c_mask,
                                    backwards=True)

        # output shape is (batch_size x question_len x rec_size)
        l_q_enc_forw = LL.LSTMLayer(
            l_q_emb,
            num_units=self.rec_size,
            grad_clipping=100,
            mask_input=l_q_mask,
            ingate=LL.Gate(W_in=l_c_enc_forw.W_in_to_ingate,
                           W_hid=l_c_enc_forw.W_hid_to_ingate,
                           W_cell=l_c_enc_forw.W_cell_to_ingate,
                           b=l_c_enc_forw.b_ingate),
            forgetgate=LL.Gate(W_in=l_c_enc_forw.W_in_to_forgetgate,
                               W_hid=l_c_enc_forw.W_hid_to_forgetgate,
                               W_cell=l_c_enc_forw.W_cell_to_forgetgate,
                               b=l_c_enc_forw.b_forgetgate),
            outgate=LL.Gate(W_in=l_c_enc_forw.W_in_to_outgate,
                            W_hid=l_c_enc_forw.W_hid_to_outgate,
                            W_cell=l_c_enc_forw.W_cell_to_outgate,
                            b=l_c_enc_forw.b_outgate),
            cell=LL.Gate(W_in=l_c_enc_forw.W_in_to_cell,
                         W_hid=l_c_enc_forw.W_hid_to_cell,
                         W_cell=None,
                         b=l_c_enc_forw.b_cell,
                         nonlinearity=L.nonlinearities.tanh))

        l_q_enc_back = LL.LSTMLayer(
            l_q_emb,
            num_units=self.rec_size,
            grad_clipping=100,
            mask_input=l_q_mask,
            backwards=True,
            ingate=LL.Gate(W_in=l_c_enc_back.W_in_to_ingate,
                           W_hid=l_c_enc_back.W_hid_to_ingate,
                           W_cell=l_c_enc_back.W_cell_to_ingate,
                           b=l_c_enc_back.b_ingate),
            forgetgate=LL.Gate(W_in=l_c_enc_back.W_in_to_forgetgate,
                               W_hid=l_c_enc_back.W_hid_to_forgetgate,
                               W_cell=l_c_enc_back.W_cell_to_forgetgate,
                               b=l_c_enc_back.b_forgetgate),
            outgate=LL.Gate(W_in=l_c_enc_back.W_in_to_outgate,
                            W_hid=l_c_enc_back.W_hid_to_outgate,
                            W_cell=l_c_enc_back.W_cell_to_outgate,
                            b=l_c_enc_back.b_outgate),
            cell=LL.Gate(W_in=l_c_enc_back.W_in_to_cell,
                         W_hid=l_c_enc_back.W_hid_to_cell,
                         W_cell=None,
                         b=l_c_enc_back.b_cell,
                         nonlinearity=L.nonlinearities.tanh))

        # batch_size x context_len  x 2*rec_size
        l_c_enc = LL.concat([l_c_enc_forw, l_c_enc_back], axis=2)
        # batch_size x question_len x 2*rec_size
        l_q_enc = LL.concat([l_q_enc_forw, l_q_enc_back], axis=2)

        def proj_init():
            return np.vstack([
                np.eye(self.rec_size, dtype=theano.config.floatX),
                np.eye(self.rec_size, dtype=theano.config.floatX)
            ])

        # this is H from the paper, shape: (batch_size * context_len x
        # rec_size)
        l_c_proj = LL.reshape(l_c_enc,
                              (batch_size * context_len, 2 * self.rec_size))
        l_c_proj = LL.DenseLayer(l_c_proj,
                                 num_units=self.rec_size,
                                 W=proj_init(),
                                 b=None,
                                 nonlinearity=L.nonlinearities.tanh)

        # this is Z from the paper, shape: (batch_size * question_len x
        # rec_size)
        l_q_proj = LL.reshape(l_q_enc,
                              (batch_size * question_len, 2 * self.rec_size))
        l_q_proj = LL.DenseLayer(l_q_proj,
                                 num_units=self.rec_size,
                                 W=proj_init(),
                                 b=None,
                                 nonlinearity=L.nonlinearities.tanh)
        ''' Additional, weighted question encoding (alphas from paper) '''

        l_alpha = LL.DenseLayer(
            l_q_proj,  # batch_size * question_len x 1
            num_units=1,
            b=None,
            nonlinearity=None)

        # batch_size x question_len
        l_alpha = MaskedSoftmaxLayer(
            LL.reshape(l_alpha, (batch_size, question_len)), l_q_mask)

        # batch_size x rec_size
        l_z_hat = BatchedDotLayer([
            LL.reshape(l_q_proj, (batch_size, question_len, self.rec_size)),
            l_alpha
        ])

        return l_c_proj, l_z_hat
示例#13
0
    def additional_layer(self, idx_layer, emb_layer, avg=False):
        suf = '_avg' if avg else ''
        if self.name == 'char':
            if self.args.char_model == 'cnn':
                lds = L.dimshuffle(emb_layer,
                                   (0, 3, 1, 2))  # (100, 16, 26, 32)
                ls = []
                for n in self.args.ngrams:
                    lconv = L.Conv2DLayer(
                        lds,
                        self.args.conv_dim,
                        (1, n),
                        untie_biases=False,
                        # W=HeNormal('relu') if not avg else Constant(),
                        W=GlorotNormal('relu') if not avg else Constant(),
                        name='conv_%d' % n + suf)  # (100, 64/4, 26, 32-n+1)

                    lpool = L.MaxPool2DLayer(lconv,
                                             (1, self.args.max_word_len - n +
                                              1))  # (100, 64, 26, 1)
                    lpool = L.flatten(lpool, outdim=3)  # (100, 16, 26)
                    lpool = L.dimshuffle(lpool, (0, 2, 1))  # (100, 26, 16)
                    ls.append(lpool)
                xc = L.concat(ls, axis=2, name='echar_concat')  # (100, 26, 64)
                # additional
                # xc = L.DenseLayer(xc, self.args.embw_dim, nonlinearity=None, name='echar_affine', num_leading_axes=2,
                # W=HeNormal() if not avg else Constant()) # (100, 26, 100)
                return xc
            elif self.args.char_model == 'lstm':
                ml = L.ExpressionLayer(
                    idx_layer,
                    lambda x: T.neq(x, 0))  # mask layer (100, 24, 32)
                ml = L.reshape(ml, (-1, self.args.max_word_len))  # (1500, 32)

                gate_params = L.recurrent.Gate(W_in=Orthogonal(),
                                               W_hid=Orthogonal())
                cell_params = L.recurrent.Gate(W_in=Orthogonal(),
                                               W_hid=Orthogonal(),
                                               W_cell=None,
                                               nonlinearity=tanh)

                lstm_in = L.reshape(
                    emb_layer,
                    (-1, self.args.max_word_len,
                     self.config['char']['emb_dim']))  # (1500, 32, 16)
                lstm_f = L.LSTMLayer(
                    lstm_in,
                    32,
                    mask_input=ml,
                    grad_clipping=10.,
                    learn_init=True,
                    peepholes=False,
                    precompute_input=True,
                    ingate=gate_params,
                    forgetgate=gate_params,
                    cell=cell_params,
                    outgate=gate_params,
                    # unroll_scan=True,
                    only_return_final=True,
                    name='forward' + suf)  # (1500, 32)
                lstm_b = L.LSTMLayer(
                    lstm_in,
                    32,
                    mask_input=ml,
                    grad_clipping=10.,
                    learn_init=True,
                    peepholes=False,
                    precompute_input=True,
                    ingate=gate_params,
                    forgetgate=gate_params,
                    cell=cell_params,
                    outgate=gate_params,
                    # unroll_scan=True,
                    only_return_final=True,
                    backwards=True,
                    name='backward' + suf)  # (1500, 32)
                remove_reg(lstm_f)
                remove_reg(lstm_b)
                if avg:
                    set_zero(lstm_f)
                    set_zero(lstm_b)
                xc = L.concat([lstm_f, lstm_b], axis=1)  # (1500, 64)
                if self.args.lstm_tagger:
                    xc = L.reshape(
                        xc, (-1, self.args.max_sent_len, 64))  # (100, 161, 64)
                elif self.args.trans_tagger:
                    xc = L.reshape(
                        xc, (-1, self.args.window_size, 64))  # (100, 15, 64)
                else:
                    xc = L.reshape(xc, (-1, 26, 64))  # (100, 26, 64)
                return xc

        elif self.name == 'morph':
            # idx (100, 26/161, 16)  emb (100, 26/161, 16, 32)
            if self.args.morph_model == 'max':
                xm = L.MaxPool2DLayer(
                    emb_layer,
                    (self.args.max_morph_len, 1))  # (100, 26/161, 1, 32)
                # xm = L.reshape(xm, (-1, 26, self.config['morph']['emb_dim'])) # (100, 26/161, 32)
                xm = L.flatten(xm, outdim=3)  # (100, 26/161, 32)
                # xm = L.ExpressionLayer(emb_layer, lambda x: T.max(x, 2))
            elif self.args.morph_model == 'avg':
                mask = L.ExpressionLayer(
                    idx_layer, lambda x: T.neq(x, 0))  # (100, 26, 16)
                mask = L.dimshuffle(mask, (0, 1, 2, 'x'))  # (100, 26, 16, 1)
                mask = L.ExpressionLayer(mask, lambda x: T.extra_ops.repeat(
                    x, self.config['morph']['emb_dim'], 3))  # (100, 26, 16, 1)
                xm = L.ElemwiseMergeLayer([
                    emb_layer, mask
                ], lambda x, m: T.sum(x * m, 2) / T.sum(m, 2))  # (100, 26, 32)
                # xm = L.reshape(xm, (-1, self.args.feat_shape, self.config['morph']['emb_dim'])) # (100, 26, 32)
            return xm
        else:
            return emb_layer
示例#14
0
    def _get_l_out(self, input_vars):
        check_options(self.options)
        id_tag = (self.id + '/') if self.id else ''

        prev_output_var, mask_var = input_vars[-2:]
        color_input_vars = input_vars[:-2]

        context_len = self.context_len if hasattr(self, 'context_len') else 1
        l_color_repr, color_inputs = self.color_vec.get_input_layer(
            color_input_vars,
            recurrent_length=self.seq_vec.max_len - 1,
            cell_size=self.options.speaker_cell_size,
            context_len=context_len,
            id=self.id)
        l_hidden_color = dimshuffle(l_color_repr, (0, 2, 1))
        for i in range(1, self.options.speaker_hidden_color_layers + 1):
            l_hidden_color = NINLayer(
                l_hidden_color,
                num_units=self.options.speaker_cell_size,
                nonlinearity=NONLINEARITIES[self.options.speaker_nonlinearity],
                name=id_tag + 'hidden_color%d' % i)
        l_hidden_color = dimshuffle(l_hidden_color, (0, 2, 1))

        l_prev_out = InputLayer(shape=(None, self.seq_vec.max_len - 1),
                                input_var=prev_output_var,
                                name=id_tag + 'prev_input')
        l_prev_embed = EmbeddingLayer(
            l_prev_out,
            input_size=len(self.seq_vec.tokens),
            output_size=self.options.speaker_cell_size,
            name=id_tag + 'prev_embed')
        l_in = ConcatLayer([l_hidden_color, l_prev_embed],
                           axis=2,
                           name=id_tag + 'color_prev')
        l_mask_in = InputLayer(shape=(None, self.seq_vec.max_len - 1),
                               input_var=mask_var,
                               name=id_tag + 'mask_input')
        l_rec_drop = l_in

        cell = CELLS[self.options.speaker_cell]
        cell_kwargs = {
            'mask_input':
            (None if self.options.speaker_no_mask else l_mask_in),
            'grad_clipping': self.options.speaker_grad_clipping,
            'num_units': self.options.speaker_cell_size,
        }
        if self.options.speaker_cell == 'LSTM':
            cell_kwargs['forgetgate'] = Gate(
                b=Constant(self.options.speaker_forget_bias))
        if self.options.speaker_cell != 'GRU':
            cell_kwargs['nonlinearity'] = NONLINEARITIES[
                self.options.speaker_nonlinearity]

        for i in range(1, self.options.speaker_recurrent_layers):
            l_rec = cell(l_rec_drop, name=id_tag + 'rec%d' % i, **cell_kwargs)
            if self.options.speaker_dropout > 0.0:
                l_rec_drop = DropoutLayer(l_rec,
                                          p=self.options.speaker_dropout,
                                          name=id_tag + 'rec%d_drop' % i)
            else:
                l_rec_drop = l_rec
        l_rec = cell(l_rec_drop,
                     name=id_tag +
                     'rec%d' % self.options.speaker_recurrent_layers,
                     **cell_kwargs)
        l_shape = ReshapeLayer(l_rec, (-1, self.options.speaker_cell_size),
                               name=id_tag + 'reshape')
        l_hidden_out = l_shape
        for i in range(1, self.options.speaker_hidden_out_layers + 1):
            l_hidden_out = DenseLayer(
                l_hidden_out,
                num_units=self.options.speaker_cell_size,
                nonlinearity=NONLINEARITIES[self.options.speaker_nonlinearity],
                name=id_tag + 'hidden_out%d' % i)
        l_softmax = DenseLayer(l_hidden_out,
                               num_units=len(self.seq_vec.tokens),
                               nonlinearity=softmax,
                               name=id_tag + 'softmax')
        l_out = ReshapeLayer(
            l_softmax,
            (-1, self.seq_vec.max_len - 1, len(self.seq_vec.tokens)),
            name=id_tag + 'out')

        return l_out, color_inputs + [l_prev_out, l_mask_in]
示例#15
0
    def _get_l_out(self, input_vars):
        check_options(self.options)
        id_tag = (self.id + '/') if self.id else ''

        input_var = input_vars[0]
        context_vars = input_vars[1:]

        l_in = InputLayer(shape=(None, self.seq_vec.max_len),
                          input_var=input_var,
                          name=id_tag + 'desc_input')
        l_in_embed = EmbeddingLayer(
            l_in,
            input_size=len(self.seq_vec.tokens),
            output_size=self.options.listener_cell_size,
            name=id_tag + 'desc_embed')

        # Context repr has shape (batch_size, seq_len, context_len * repr_size)
        l_context_repr, context_inputs = self.color_vec.get_input_layer(
            context_vars,
            recurrent_length=self.seq_vec.max_len,
            cell_size=self.options.listener_cell_size,
            context_len=self.context_len,
            id=self.id)
        l_context_repr = reshape(
            l_context_repr,
            ([0], [1], self.context_len, self.color_vec.output_size))
        l_hidden_context = dimshuffle(l_context_repr, (0, 3, 1, 2),
                                      name=id_tag + 'shuffle_in')
        for i in range(1, self.options.listener_hidden_color_layers + 1):
            l_hidden_context = NINLayer(
                l_hidden_context,
                num_units=self.options.listener_cell_size,
                nonlinearity=NONLINEARITIES[
                    self.options.listener_nonlinearity],
                b=Constant(0.1),
                name=id_tag + 'hidden_context%d' % i)
        l_pool = FeaturePoolLayer(l_hidden_context,
                                  pool_size=self.context_len,
                                  axis=3,
                                  pool_function=T.mean,
                                  name=id_tag + 'pool')
        l_pool_squeezed = reshape(l_pool, ([0], [1], [2]),
                                  name=id_tag + 'pool_squeezed')
        l_pool_shuffle = dimshuffle(l_pool_squeezed, (0, 2, 1),
                                    name=id_tag + 'shuffle_out')
        l_concat = ConcatLayer([l_pool_shuffle, l_in_embed],
                               axis=2,
                               name=id_tag + 'concat_inp_context')

        cell = CELLS[self.options.listener_cell]
        cell_kwargs = {
            'grad_clipping': self.options.listener_grad_clipping,
            'num_units': self.options.listener_cell_size,
        }
        if self.options.listener_cell == 'LSTM':
            cell_kwargs['forgetgate'] = Gate(
                b=Constant(self.options.listener_forget_bias))
        if self.options.listener_cell != 'GRU':
            cell_kwargs['nonlinearity'] = NONLINEARITIES[
                self.options.listener_nonlinearity]

        # l_rec1_drop = l_concat
        l_rec1 = cell(l_concat, name=id_tag + 'rec1', **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec1_drop = DropoutLayer(l_rec1,
                                       p=self.options.listener_dropout,
                                       name=id_tag + 'rec1_drop')
        else:
            l_rec1_drop = l_rec1
        l_rec2 = cell(l_rec1_drop,
                      name=id_tag + 'rec2',
                      only_return_final=True,
                      **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec2_drop = DropoutLayer(l_rec2,
                                       p=self.options.listener_dropout,
                                       name=id_tag + 'rec2_drop')
        else:
            l_rec2_drop = l_rec2

        l_rec2_drop = NINLayer(l_rec2_drop,
                               num_units=self.options.listener_cell_size,
                               nonlinearity=None,
                               name=id_tag + 'rec2_dense')

        # Context is fed into the RNN as one copy for each time step; just use
        # the first time step for output.
        # Input shape: (batch_size, repr_size, seq_len, context_len)
        # Output shape: (batch_size, repr_size, context_len)
        l_context_nonrec = SliceLayer(l_hidden_context,
                                      indices=0,
                                      axis=2,
                                      name=id_tag + 'context_nonrec')
        l_pool_nonrec = SliceLayer(l_pool_squeezed,
                                   indices=0,
                                   axis=2,
                                   name=id_tag + 'pool_nonrec')

        # Output shape: (batch_size, repr_size, context_len)
        l_sub = broadcast_sub_layer(
            l_pool_nonrec,
            l_context_nonrec,
            feature_dim=self.options.listener_cell_size,
            id_tag=id_tag)
        # Output shape: (batch_size, repr_size * 2, context_len)
        l_concat_sub = ConcatLayer([l_context_nonrec, l_sub],
                                   axis=1,
                                   name=id_tag + 'concat_inp_context')
        # Output shape: (batch_size, cell_size, context_len)
        l_hidden = NINLayer(l_concat_sub,
                            num_units=self.options.listener_cell_size,
                            nonlinearity=None,
                            name=id_tag + 'hidden')
        if self.options.listener_dropout > 0.0:
            l_hidden_drop = DropoutLayer(l_hidden,
                                         p=self.options.listener_dropout,
                                         name=id_tag + 'hidden_drop')
        else:
            l_hidden_drop = l_hidden

        l_dot = broadcast_dot_layer(
            l_rec2_drop,
            l_hidden_drop,
            feature_dim=self.options.listener_cell_size,
            id_tag=id_tag)
        l_dot_bias = l_dot  # BiasLayer(l_dot, name=id_tag + 'dot_bias')
        l_dot_clipped = NonlinearityLayer(
            l_dot_bias,
            nonlinearity=NONLINEARITIES[self.options.listener_nonlinearity],
            name=id_tag + 'dot_clipped')
        l_scores = NonlinearityLayer(l_dot_clipped,
                                     nonlinearity=softmax,
                                     name=id_tag + 'scores')

        return l_scores, [l_in] + context_inputs
示例#16
0
    def _get_l_out(self, input_vars):
        check_options(self.options)
        id_tag = (self.id + '/') if self.id else ''

        input_var = input_vars[0]
        context_vars = input_vars[1:]

        l_in = InputLayer(shape=(None, self.seq_vec.max_len),
                          input_var=input_var,
                          name=id_tag + 'desc_input')
        l_in_embed = EmbeddingLayer(
            l_in,
            input_size=len(self.seq_vec.tokens),
            output_size=self.options.listener_cell_size,
            name=id_tag + 'desc_embed')

        # Context repr has shape (batch_size, seq_len, context_len * repr_size)
        l_context_repr, context_inputs = self.color_vec.get_input_layer(
            context_vars,
            recurrent_length=self.seq_vec.max_len,
            cell_size=self.options.listener_cell_size,
            context_len=self.context_len,
            id=self.id)
        l_hidden_context = dimshuffle(l_context_repr, (0, 2, 1))
        for i in range(1, self.options.listener_hidden_color_layers + 1):
            l_hidden_context = NINLayer(
                l_hidden_context,
                num_units=self.options.listener_cell_size,
                nonlinearity=NONLINEARITIES[
                    self.options.listener_nonlinearity],
                name=id_tag + 'hidden_context%d' % i)
        l_hidden_context = dimshuffle(l_hidden_context, (0, 2, 1))
        l_concat = ConcatLayer([l_hidden_context, l_in_embed],
                               axis=2,
                               name=id_tag + 'concat_inp_context')

        cell = CELLS[self.options.listener_cell]
        cell_kwargs = {
            'grad_clipping': self.options.listener_grad_clipping,
            'num_units': self.options.listener_cell_size,
        }
        if self.options.listener_cell == 'LSTM':
            cell_kwargs['forgetgate'] = Gate(
                b=Constant(self.options.listener_forget_bias))
        if self.options.listener_cell != 'GRU':
            cell_kwargs['nonlinearity'] = NONLINEARITIES[
                self.options.listener_nonlinearity]

        l_rec1 = cell(l_concat, name=id_tag + 'rec1', **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec1_drop = DropoutLayer(l_rec1,
                                       p=self.options.listener_dropout,
                                       name=id_tag + 'rec1_drop')
        else:
            l_rec1_drop = l_rec1
        l_rec2 = cell(l_rec1_drop, name=id_tag + 'rec2', **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec2_drop = DropoutLayer(l_rec2,
                                       p=self.options.listener_dropout,
                                       name=id_tag + 'rec2_drop')
        else:
            l_rec2_drop = l_rec2

        l_hidden = DenseLayer(
            l_rec2_drop,
            num_units=self.options.listener_cell_size,
            nonlinearity=NONLINEARITIES[self.options.listener_nonlinearity],
            name=id_tag + 'hidden')
        if self.options.listener_dropout > 0.0:
            l_hidden_drop = DropoutLayer(l_hidden,
                                         p=self.options.listener_dropout,
                                         name=id_tag + 'hidden_drop')
        else:
            l_hidden_drop = l_hidden
        l_scores = DenseLayer(l_hidden_drop,
                              num_units=self.context_len,
                              nonlinearity=softmax,
                              name=id_tag + 'scores')

        return l_scores, [l_in] + context_inputs
示例#17
0
    def __init__(self,
                 insize,
                 vocoder,
                 hiddensize=256,
                 nonlinearity=lasagne.nonlinearities.very_leaky_rectify,
                 ctx_nblayers=1,
                 ctx_nbfilters=2,
                 ctx_winlen=21,
                 nbcnnlayers=8,
                 nbfilters=16,
                 spec_freqlen=5,
                 noise_freqlen=5,
                 windur=0.025,
                 bn_axes=None,
                 noisesize=100):
        if bn_axes is None: bn_axes = [0, 1]
        model.Model.__init__(self, insize, vocoder, hiddensize)

        self._ctx_nblayers = ctx_nblayers
        self._ctx_nbfilters = ctx_nbfilters
        self._ctx_winlen = ctx_winlen

        self._nbcnnlayers = nbcnnlayers
        self._nbfilters = nbfilters
        self._spec_freqlen = spec_freqlen
        self._noise_freqlen = noise_freqlen
        self._windur = windur

        winlen = int(0.5 * self._windur / 0.005) * 2 + 1

        layer_ctx_input = ll.InputLayer(shape=(None, None, insize),
                                        input_var=self._input_values,
                                        name='ctx.input')

        layer_noise_input = UniformNoiseLayer(layer_ctx_input,
                                              noisesize,
                                              name='noise.input')
        layer_ctx_input = ll.ConcatLayer(
            (layer_ctx_input, layer_noise_input), axis=2,
            name='concat.input')  # TODO Put the noise later on

        self._layer_ctx = layer_context(layer_ctx_input,
                                        ctx_nblayers=self._ctx_nblayers,
                                        ctx_nbfilters=self._ctx_nbfilters,
                                        ctx_winlen=self._ctx_winlen,
                                        hiddensize=self._hiddensize,
                                        nonlinearity=nonlinearity,
                                        bn_axes=[0, 1],
                                        bn_cnn_axes=[0, 2, 3])

        layers_toconcat = []

        if vocoder.f0size() > 0:
            # F0 - BLSTM layer
            layer_f0 = self._layer_ctx
            grad_clipping = 50
            for layi in xrange(1):
                layerstr = 'f0_l' + str(1 + layi) + '_BLSTM{}'.format(
                    self._hiddensize)
                fwd = models_basic.layer_LSTM(layer_f0,
                                              self._hiddensize,
                                              nonlinearity,
                                              backwards=False,
                                              grad_clipping=grad_clipping,
                                              name=layerstr + '.fwd')
                bck = models_basic.layer_LSTM(layer_f0,
                                              self._hiddensize,
                                              nonlinearity,
                                              backwards=True,
                                              grad_clipping=grad_clipping,
                                              name=layerstr + '.bck')
                layer_f0 = ll.ConcatLayer((fwd, bck),
                                          axis=2,
                                          name=layerstr + '.concat')
                # TODO Replace by CNN ?? It didn't work well, maybe didn't work well with WGAN loss, but f0 is not more on WGAN loss
            layer_f0 = ll.DenseLayer(layer_f0,
                                     num_units=vocoder.f0size(),
                                     nonlinearity=None,
                                     num_leading_axes=2,
                                     name='f0_lout_projection')
            layers_toconcat.append(layer_f0)

        if vocoder.specsize() > 0:
            # Amplitude spectrum - 2D Gated Conv layers
            layer_spec_proj = ll.batch_norm(ll.DenseLayer(
                self._layer_ctx,
                vocoder.specsize(),
                nonlinearity=nonlinearity,
                num_leading_axes=2,
                name='spec_projection'),
                                            axes=bn_axes)
            # layer_spec_proj = ll.DenseLayer(self._layer_ctx, vocoder.specsize(), nonlinearity=None, num_leading_axes=2, name='spec_projection')
            layer_spec = ll.dimshuffle(layer_spec_proj, [0, 'x', 1, 2],
                                       name='spec_dimshuffle')
            for layi in xrange(nbcnnlayers):
                layerstr = 'spec_l' + str(1 + layi) + '_GC{}x{}x{}'.format(
                    self._nbfilters, winlen, self._spec_freqlen)
                layer_spec = ll.batch_norm(
                    layer_GatedConv2DLayer(layer_spec,
                                           self._nbfilters,
                                           [winlen, self._spec_freqlen],
                                           stride=1,
                                           pad='same',
                                           nonlinearity=nonlinearity,
                                           name=layerstr))
            layer_spec = ll.Conv2DLayer(layer_spec,
                                        1, [winlen, self._spec_freqlen],
                                        pad='same',
                                        nonlinearity=None,
                                        name='spec_lout_2DC')
            layer_spec = ll.dimshuffle(layer_spec, [0, 2, 3, 1],
                                       name='spec_dimshuffle')
            layer_spec = ll.flatten(layer_spec, outdim=3, name='spec_flatten')
            # layer_spec = ll.ElemwiseSumLayer([layer_spec, layer_spec_proj], name='skip')
            layers_toconcat.append(layer_spec)

        if vocoder.noisesize() > 0:
            layer_noise = self._layer_ctx
            for layi in xrange(np.max((1, int(np.ceil(nbcnnlayers / 2))))):
                layerstr = 'noise_l' + str(1 +
                                           layi) + '_FC{}'.format(hiddensize)
                layer_noise = ll.DenseLayer(layer_noise,
                                            num_units=hiddensize,
                                            nonlinearity=nonlinearity,
                                            num_leading_axes=2,
                                            name=layerstr)
            if isinstance(vocoder, vocoders.VocoderPML):
                layer_noise = ll.DenseLayer(
                    layer_noise,
                    num_units=vocoder.nm_size,
                    nonlinearity=lasagne.nonlinearities.sigmoid,
                    num_leading_axes=2,
                    name='lo_noise'
                )  # sig is best among nonlin_saturatedsigmoid nonlin_tanh_saturated nonlin_tanh_bysigmoid
            else:
                layer_noise = ll.DenseLayer(layer_noise,
                                            num_units=vocoder.nm_size,
                                            nonlinearity=None,
                                            num_leading_axes=2,
                                            name='lo_noise')
            layers_toconcat.append(layer_noise)

        if vocoder.vuvsize() > 0:
            # VUV - BLSTM layer
            layer_vuv = self._layer_ctx
            grad_clipping = 50
            for layi in xrange(1):
                layerstr = 'vuv_l' + str(1 + layi) + '_BLSTM{}'.format(
                    self._hiddensize)
                fwd = models_basic.layer_LSTM(layer_vuv,
                                              self._hiddensize,
                                              nonlinearity,
                                              backwards=False,
                                              grad_clipping=grad_clipping,
                                              name=layerstr + '.fwd')
                bck = models_basic.layer_LSTM(layer_vuv,
                                              self._hiddensize,
                                              nonlinearity,
                                              backwards=True,
                                              grad_clipping=grad_clipping,
                                              name=layerstr + '.bck')
                layer_vuv = ll.ConcatLayer((fwd, bck),
                                           axis=2,
                                           name=layerstr + '.concat')
            layer_vuv = ll.DenseLayer(layer_vuv,
                                      num_units=vocoder.vuvsize(),
                                      nonlinearity=None,
                                      num_leading_axes=2,
                                      name='vuv_lout_projection')
            layers_toconcat.append(layer_vuv)

        layer = ll.ConcatLayer(layers_toconcat, axis=2, name='lout.concat')

        self.init_finish(
            layer
        )  # Has to be called at the end of the __init__ to print out the architecture, get the trainable params, etc.
示例#18
0
    def __init__(
            self,
            input_shape,
            output_dim,
            hidden_sizes,
            conv_filters,
            conv_filter_sizes,
            conv_strides,
            conv_pads,
            hidden_W_init=LI.GlorotUniform(),
            hidden_b_init=LI.Constant(0.),
            output_W_init=LI.GlorotUniform(),
            output_b_init=LI.Constant(0.),
            # conv_W_init=LI.GlorotUniform(), conv_b_init=LI.Constant(0.),
            hidden_nonlinearity=LN.rectify,
            output_nonlinearity=LN.softmax,
            name=None,
            input_var=None):

        if name is None:
            prefix = ""
        else:
            prefix = name + "_"

        if len(input_shape) == 3:
            l_in = L.InputLayer(shape=(None, np.prod(input_shape)),
                                input_var=input_var)
            input_shape = ([0], ) + input_shape
            l_hid = L.reshape(l_in, input_shape)
            l_hid = L.dimshuffle(l_hid, (0, 3, 1, 2))  ## theano ordering
        elif len(input_shape) == 2:
            l_in = L.InputLayer(shape=(None, np.prod(input_shape)),
                                input_var=input_var)
            input_shape = (1, ) + input_shape
            l_hid = L.reshape(l_in, ([0], ) + input_shape)
        else:
            l_in = L.InputLayer(shape=(None, ) + input_shape,
                                input_var=input_var)
            l_hid = l_in
        for idx, conv_filter, filter_size, stride, pad in zip(
                range(len(conv_filters)),
                conv_filters,
                conv_filter_sizes,
                conv_strides,
                conv_pads,
        ):
            l_hid = L.Conv2DLayer(
                l_hid,
                num_filters=conv_filter,
                filter_size=filter_size,
                stride=(stride, stride),
                pad=pad,
                nonlinearity=hidden_nonlinearity,
                name="%sconv_hidden_%d" % (prefix, idx),
                convolution=wrapped_conv,
            )
        for idx, hidden_size in enumerate(hidden_sizes):
            l_hid = L.DenseLayer(
                l_hid,
                num_units=hidden_size,
                nonlinearity=hidden_nonlinearity,
                name="%shidden_%d" % (prefix, idx),
                W=hidden_W_init,
                b=hidden_b_init,
            )
        l_out = L.DenseLayer(
            l_hid,
            num_units=output_dim,
            nonlinearity=output_nonlinearity,
            name="%soutput" % (prefix, ),
            W=output_W_init,
            b=output_b_init,
        )
        self._l_in = l_in
        self._l_out = l_out
        self._input_var = l_in.input_var
示例#19
0
    def _get_l_out(self, input_vars, multi_utt='ignored'):
        check_options(self.options)
        id_tag = (self.id + '/') if self.id else ''

        input_var = input_vars[0]
        context_vars = input_vars[1:]

        l_in = InputLayer(shape=(None, self.seq_vec.max_len), input_var=input_var,
                          name=id_tag + 'desc_input')
        l_in_embed = EmbeddingLayer(l_in, input_size=len(self.seq_vec.tokens),
                                    output_size=self.options.listener_cell_size,
                                    name=id_tag + 'desc_embed')

        # Context repr has shape (batch_size, seq_len, context_len * repr_size)
        l_context_repr, context_inputs = self.color_vec.get_input_layer(
            context_vars,
            recurrent_length=self.seq_vec.max_len,
            cell_size=self.options.listener_cell_size,
            context_len=self.context_len,
            id=self.id
        )
        l_context_repr = reshape(l_context_repr, ([0], [1], self.context_len,
                                                  self.color_vec.output_size))
        l_hidden_context = dimshuffle(l_context_repr, (0, 3, 1, 2), name=id_tag + 'shuffle_in')
        for i in range(1, self.options.listener_hidden_color_layers + 1):
            l_hidden_context = NINLayer(
                l_hidden_context, num_units=self.options.listener_cell_size,
                nonlinearity=NONLINEARITIES[self.options.listener_nonlinearity],
                b=Constant(0.1),
                name=id_tag + 'hidden_context%d' % i)
        l_pool = FeaturePoolLayer(l_hidden_context, pool_size=self.context_len, axis=3,
                                  pool_function=T.mean, name=id_tag + 'pool')
        l_pool_squeezed = reshape(l_pool, ([0], [1], [2]), name=id_tag + 'pool_squeezed')
        l_pool_shuffle = dimshuffle(l_pool_squeezed, (0, 2, 1), name=id_tag + 'shuffle_out')
        l_concat = ConcatLayer([l_pool_shuffle, l_in_embed], axis=2,
                               name=id_tag + 'concat_inp_context')

        cell = CELLS[self.options.listener_cell]
        cell_kwargs = {
            'grad_clipping': self.options.listener_grad_clipping,
            'num_units': self.options.listener_cell_size,
        }
        if self.options.listener_cell == 'LSTM':
            cell_kwargs['forgetgate'] = Gate(b=Constant(self.options.listener_forget_bias))
        if self.options.listener_cell != 'GRU':
            cell_kwargs['nonlinearity'] = NONLINEARITIES[self.options.listener_nonlinearity]

        # l_rec1_drop = l_concat
        l_rec1 = cell(l_concat, name=id_tag + 'rec1', **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec1_drop = DropoutLayer(l_rec1, p=self.options.listener_dropout,
                                       name=id_tag + 'rec1_drop')
        else:
            l_rec1_drop = l_rec1
        l_rec2 = cell(l_rec1_drop, name=id_tag + 'rec2', only_return_final=True, **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec2_drop = DropoutLayer(l_rec2, p=self.options.listener_dropout,
                                       name=id_tag + 'rec2_drop')
        else:
            l_rec2_drop = l_rec2

        l_rec2_drop = NINLayer(l_rec2_drop, num_units=self.options.listener_cell_size,
                               nonlinearity=None, name=id_tag + 'rec2_dense')

        # Context is fed into the RNN as one copy for each time step; just use
        # the first time step for output.
        # Input shape: (batch_size, repr_size, seq_len, context_len)
        # Output shape: (batch_size, repr_size, context_len)
        l_context_nonrec = SliceLayer(l_hidden_context, indices=0, axis=2,
                                      name=id_tag + 'context_nonrec')
        l_pool_nonrec = SliceLayer(l_pool_squeezed, indices=0, axis=2,
                                   name=id_tag + 'pool_nonrec')

        # Output shape: (batch_size, repr_size, context_len)
        l_sub = broadcast_sub_layer(l_pool_nonrec, l_context_nonrec,
                                    feature_dim=self.options.listener_cell_size,
                                    id_tag=id_tag)
        # Output shape: (batch_size, repr_size * 2, context_len)
        l_concat_sub = ConcatLayer([l_context_nonrec, l_sub], axis=1,
                                   name=id_tag + 'concat_inp_context')
        # Output shape: (batch_size, cell_size, context_len)
        l_hidden = NINLayer(l_concat_sub, num_units=self.options.listener_cell_size,
                            nonlinearity=None, name=id_tag + 'hidden')
        if self.options.listener_dropout > 0.0:
            l_hidden_drop = DropoutLayer(l_hidden, p=self.options.listener_dropout,
                                         name=id_tag + 'hidden_drop')
        else:
            l_hidden_drop = l_hidden

        l_dot = broadcast_dot_layer(l_rec2_drop, l_hidden_drop,
                                    feature_dim=self.options.listener_cell_size,
                                    id_tag=id_tag)
        l_dot_bias = l_dot  # BiasLayer(l_dot, name=id_tag + 'dot_bias')
        l_dot_clipped = NonlinearityLayer(
            l_dot_bias,
            nonlinearity=NONLINEARITIES[self.options.listener_nonlinearity],
            name=id_tag + 'dot_clipped')
        l_scores = NonlinearityLayer(l_dot_clipped, nonlinearity=softmax, name=id_tag + 'scores')

        return l_scores, [l_in] + context_inputs
示例#20
0
    def _get_l_out(self, input_vars, multi_utt='ignored'):
        check_options(self.options)
        id_tag = (self.id + '/') if self.id else ''

        input_var = input_vars[0]
        extra_vars = input_vars[1:]

        l_in = InputLayer(shape=(None, self.seq_vec.max_len), input_var=input_var,
                          name=id_tag + 'desc_input')
        l_in_embed, context_vars = self.get_embedding_layer(l_in, extra_vars)

        # Context repr has shape (batch_size, seq_len, context_len * repr_size)
        l_context_repr, context_inputs = self.color_vec.get_input_layer(
            context_vars,
            recurrent_length=self.seq_vec.max_len,
            cell_size=self.options.listener_cell_size,
            context_len=self.context_len,
            id=self.id
        )
        l_hidden_context = dimshuffle(l_context_repr, (0, 2, 1))
        for i in range(1, self.options.listener_hidden_color_layers + 1):
            l_hidden_context = NINLayer(
                l_hidden_context, num_units=self.options.listener_cell_size,
                nonlinearity=NONLINEARITIES[self.options.listener_nonlinearity],
                name=id_tag + 'hidden_context%d' % i)
        l_hidden_context = dimshuffle(l_hidden_context, (0, 2, 1))
        l_concat = ConcatLayer([l_hidden_context, l_in_embed], axis=2,
                               name=id_tag + 'concat_inp_context')

        cell = CELLS[self.options.listener_cell]
        cell_kwargs = {
            'grad_clipping': self.options.listener_grad_clipping,
            'num_units': self.options.listener_cell_size,
        }
        if self.options.listener_cell == 'LSTM':
            cell_kwargs['forgetgate'] = Gate(b=Constant(self.options.listener_forget_bias))
        if self.options.listener_cell != 'GRU':
            cell_kwargs['nonlinearity'] = NONLINEARITIES[self.options.listener_nonlinearity]

        l_rec1 = cell(l_concat, name=id_tag + 'rec1', **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec1_drop = DropoutLayer(l_rec1, p=self.options.listener_dropout,
                                       name=id_tag + 'rec1_drop')
        else:
            l_rec1_drop = l_rec1
        l_rec2 = cell(l_rec1_drop, name=id_tag + 'rec2', **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec2_drop = DropoutLayer(l_rec2, p=self.options.listener_dropout,
                                       name=id_tag + 'rec2_drop')
        else:
            l_rec2_drop = l_rec2

        l_hidden = DenseLayer(l_rec2_drop, num_units=self.options.listener_cell_size,
                              nonlinearity=NONLINEARITIES[self.options.listener_nonlinearity],
                              name=id_tag + 'hidden')
        if self.options.listener_dropout > 0.0:
            l_hidden_drop = DropoutLayer(l_hidden, p=self.options.listener_dropout,
                                         name=id_tag + 'hidden_drop')
        else:
            l_hidden_drop = l_hidden
        l_scores = DenseLayer(l_hidden_drop, num_units=self.context_len, nonlinearity=softmax,
                              name=id_tag + 'scores')

        return l_scores, [l_in] + context_inputs
示例#21
0
    def build_critic(self,
                     critic_input_var,
                     condition_var,
                     vocoder,
                     ctxsize,
                     nonlinearity=lasagne.nonlinearities.very_leaky_rectify,
                     postlayers_nb=6,
                     use_LSweighting=True,
                     LSWGANtransfreqcutoff=4000,
                     LSWGANtranscoef=1.0 / 8.0,
                     use_WGAN_incnoisefeature=False):

        useLRN = False  # TODO

        layer_critic = ll.InputLayer(shape=(None, None,
                                            vocoder.featuressize()),
                                     input_var=critic_input_var,
                                     name='input')

        winlen = int(0.5 * self._windur / 0.005) * 2 + 1

        layerstoconcats = []

        # Amplitude spectrum
        layer = ll.SliceLayer(layer_critic,
                              indices=slice(
                                  vocoder.f0size(),
                                  vocoder.f0size() + vocoder.specsize()),
                              axis=2,
                              name='spec_slice')  # Assumed feature order

        if use_LSweighting:  # Using weighted WGAN+LS
            print(
                'WGAN Weighted LS - critic - SPEC (trans cutoff {}Hz)'.format(
                    LSWGANtransfreqcutoff))
            # wganls_spec_weights_ = nonlin_sigmoidparm(np.arange(vocoder.specsize(), dtype=theano.config.floatX),  int(LSWGANtransfreqcutoff*vocoder.specsize()), LSWGANtranscoef)
            wganls_spec_weights_ = nonlin_sigmoidparm(
                np.arange(vocoder.specsize(), dtype=theano.config.floatX),
                sp.freq2fwspecidx(LSWGANtransfreqcutoff, vocoder.fs,
                                  vocoder.specsize()), LSWGANtranscoef)
            wganls_weights = theano.shared(
                value=np.asarray(wganls_spec_weights_),
                name='wganls_spec_weights_')
            layer = CstMulLayer(layer,
                                cstW=wganls_weights,
                                name='cstdot_wganls_weights')

        layer = ll.dimshuffle(layer, [0, 'x', 1, 2], name='spec_dimshuffle')
        for layi in xrange(self._nbcnnlayers):
            layerstr = 'spec_l' + str(1 + layi) + '_GC{}x{}x{}'.format(
                self._nbfilters, winlen, self._spec_freqlen)
            # strides>1 make the first two Conv layers pyramidal. Increase patches' effects here and there, bad.
            layer = layer_GatedConv2DLayer(layer,
                                           self._nbfilters,
                                           [winlen, self._spec_freqlen],
                                           pad='same',
                                           nonlinearity=nonlinearity,
                                           name=layerstr)
            if useLRN: layer = ll.LocalResponseNormalization2DLayer(layer)
        layer = ll.dimshuffle(layer, [0, 2, 3, 1], name='spec_dimshuffle')
        layer_spec = ll.flatten(layer, outdim=3, name='spec_flatten')
        layerstoconcats.append(layer_spec)

        if use_WGAN_incnoisefeature and vocoder.noisesize(
        ) > 0:  # Add noise in critic
            layer = ll.SliceLayer(layer_critic,
                                  indices=slice(
                                      vocoder.f0size() + vocoder.specsize(),
                                      vocoder.f0size() + vocoder.specsize() +
                                      vocoder.noisesize()),
                                  axis=2,
                                  name='nm_slice')

            if use_LSweighting:  # Using weighted WGAN+LS
                print('WGAN Weighted LS - critic - NM (trans cutoff {}Hz)'.
                      format(LSWGANtransfreqcutoff))
                # wganls_spec_weights_ = nonlin_sigmoidparm(np.arange(vocoder.noisesize(), dtype=theano.config.floatX),  int(LSWGANtransfreqcutoff*vocoder.noisesize()), LSWGANtranscoef)
                wganls_spec_weights_ = nonlin_sigmoidparm(
                    np.arange(vocoder.noisesize(), dtype=theano.config.floatX),
                    sp.freq2fwspecidx(LSWGANtransfreqcutoff, vocoder.fs,
                                      vocoder.noisesize()), LSWGANtranscoef)
                wganls_weights = theano.shared(
                    value=np.asarray(wganls_spec_weights_),
                    name='wganls_spec_weights_')
                layer = CstMulLayer(layer,
                                    cstW=wganls_weights,
                                    name='cstdot_wganls_weights')

            layer = ll.dimshuffle(layer, [0, 'x', 1, 2], name='nm_dimshuffle')
            for layi in xrange(np.max(
                (1, int(np.ceil(self._nbcnnlayers / 2))))):
                layerstr = 'nm_l' + str(1 + layi) + '_GC{}x{}x{}'.format(
                    self._nbfilters, winlen, self._noise_freqlen)
                layer = layer_GatedConv2DLayer(layer,
                                               self._nbfilters,
                                               [winlen, self._noise_freqlen],
                                               pad='same',
                                               nonlinearity=nonlinearity,
                                               name=layerstr)
                if useLRN: layer = ll.LocalResponseNormalization2DLayer(layer)
            layer = ll.dimshuffle(layer, [0, 2, 3, 1], name='nm_dimshuffle')
            layer_bndnm = ll.flatten(layer, outdim=3, name='nm_flatten')
            layerstoconcats.append(layer_bndnm)

        # Add the contexts
        layer_ctx_input = ll.InputLayer(shape=(None, None, ctxsize),
                                        input_var=condition_var,
                                        name='ctx_input')
        layer_ctx = layer_context(layer_ctx_input,
                                  ctx_nblayers=self._ctx_nblayers,
                                  ctx_nbfilters=self._ctx_nbfilters,
                                  ctx_winlen=self._ctx_winlen,
                                  hiddensize=self._hiddensize,
                                  nonlinearity=nonlinearity,
                                  bn_axes=None,
                                  bn_cnn_axes=None,
                                  critic=True,
                                  useLRN=useLRN)
        layerstoconcats.append(layer_ctx)

        # Concatenate the features analysis with the contexts...
        layer = ll.ConcatLayer(layerstoconcats,
                               axis=2,
                               name='ctx_features.concat')

        # ... and finalize with a common FC network
        for layi in xrange(postlayers_nb):
            layerstr = 'post.l' + str(1 + layi) + '_FC' + str(self._hiddensize)
            layer = ll.DenseLayer(layer,
                                  self._hiddensize,
                                  nonlinearity=nonlinearity,
                                  num_leading_axes=2,
                                  name=layerstr)

        # output layer (linear)
        layer = ll.DenseLayer(layer,
                              1,
                              nonlinearity=None,
                              num_leading_axes=2,
                              name='projection')  # No nonlin for this output
        return [layer, layer_critic, layer_ctx_input]