Exemplo n.º 1
0
    def get_context(self, conv_in, avg=False):
        suf = '_avg' if avg else ''

        conv_out = []
        # for n in [2,3,4,5,6,7,8,9]:
        # for n in [2,3,4,5]:
        for n in self.args.context_ngrams:
            conv = conv_in
            for i in range(self.args.conv_layers):
                conv = L.Conv1DLayer(
                    conv,
                    128,
                    n,
                    name='conv_window_%d(%d)%s' % (n, i, suf),
                    # W=HeNormal('relu') if not avg else Constant()) # (100, 128, 15-n+1)
                    W=GlorotNormal('relu')
                    if not avg else Constant())  # (100, 128, 15-n+1)

            conv = L.MaxPool1DLayer(
                conv, self.args.window_size -
                (n - 1) * self.args.conv_layers)  # (100, 128, 1)
            conv = L.flatten(conv, 2)  # (100, 128)
            conv_out.append(conv)

        x = L.concat(conv_out, axis=1)  # (100, 1024)

        return x
Exemplo n.º 2
0
 def _buildConv(self):
     layer = layers.InputLayer(shape=(None, 3, 32, 32), input_var=self.X)
     layer = layers.DropoutLayer(layer, p=0.2)
     layer = maxoutConv(layer,
                        num_filters=32 * 5,
                        ds=5,
                        filter_size=(5, 5),
                        stride=(1, 1),
                        pad='same')
     layer = layers.DropoutLayer(layer, p=0.5)
     layer = maxoutConv(layer,
                        num_filters=32 * 5,
                        ds=5,
                        filter_size=(5, 5),
                        stride=(1, 1),
                        pad='same')
     layer = layers.flatten(layer, outdim=2)  # 不加入展开层也可以,DenseLayer自动展开
     layer = layers.DropoutLayer(layer, p=0.5)
     layer = layers.DenseLayer(layer,
                               num_units=256,
                               W=init.GlorotUniform(),
                               b=init.Constant(0.),
                               nonlinearity=nonlinearities.rectify)
     layer = layers.DropoutLayer(layer, p=0.5)
     layer = layers.DenseLayer(layer,
                               num_units=10,
                               W=init.GlorotUniform(),
                               b=init.Constant(0.),
                               nonlinearity=nonlinearities.softmax)
     return layer
def build_triamese_inception(inputlist, imgh=50, imgw=50):
    """
    'triamese' (one branch for each view, feeding a fully-connected network),
    model using a slightly modified set of Google inception modules
    """
    input_var_x, input_var_u, input_var_v = \
        inputlist[0], inputlist[1], inputlist[2]
    net = {}
    # Input layer
    tshape = (None, 1, imgw, imgh)
    net['input_x'] = InputLayer(shape=tshape, input_var=input_var_x)
    net['input_u'] = InputLayer(shape=tshape, input_var=input_var_u)
    net['input_v'] = InputLayer(shape=tshape, input_var=input_var_v)

    # nfilters: (pool_proj, 1x1, 3x3_reduce, 3x3, 5x5_reduce, 5x5)
    nfilters = [32, 64, 96, 128, 16, 32]
    net.update(build_inception_module('inc_x1', net['input_x'], nfilters))
    net.update(build_inception_module('inc_u1', net['input_u'], nfilters))
    net.update(build_inception_module('inc_v1', net['input_v'], nfilters))

    net['dense_x'] = DenseLayer(
        dropout(flatten(net['inc_x1/output']), p=.5),
        num_units=100, nonlinearity=lasagne.nonlinearities.rectify)
    net['dense_u'] = DenseLayer(
        dropout(flatten(net['inc_u1/output']), p=.5),
        num_units=100, nonlinearity=lasagne.nonlinearities.rectify)
    net['dense_v'] = DenseLayer(
        dropout(flatten(net['inc_v1/output']), p=.5),
        num_units=100, nonlinearity=lasagne.nonlinearities.rectify)

    # Concatenate the parallel inputs
    net['concat'] = ConcatLayer((net['dense_x'],
                                 net['dense_u'],
                                 net['dense_v']))

    # And, finally, the 11-unit output layer with 50% dropout on its inputs:
    net['output_prob'] = DenseLayer(
        dropout(net['concat'], p=.5),
        num_units=11,
        nonlinearity=lasagne.nonlinearities.softmax)

    logger.info("n-parameters: {}".format(
        lasagne.layers.count_params(net['output_prob']))
    )
    return net['output_prob']
Exemplo n.º 4
0
def build_triamese_inception(inputlist, imgh=50, imgw=50):
    """
    'triamese' (one branch for each view, feeding a fully-connected network),
    model using a slightly modified set of Google inception modules
    """
    input_var_x, input_var_u, input_var_v = \
        inputlist[0], inputlist[1], inputlist[2]
    net = {}
    # Input layer
    tshape = (None, 1, imgw, imgh)
    net['input_x'] = InputLayer(shape=tshape, input_var=input_var_x)
    net['input_u'] = InputLayer(shape=tshape, input_var=input_var_u)
    net['input_v'] = InputLayer(shape=tshape, input_var=input_var_v)

    # nfilters: (pool_proj, 1x1, 3x3_reduce, 3x3, 5x5_reduce, 5x5)
    nfilters = [32, 64, 96, 128, 16, 32]
    net.update(build_inception_module('inc_x1', net['input_x'], nfilters))
    net.update(build_inception_module('inc_u1', net['input_u'], nfilters))
    net.update(build_inception_module('inc_v1', net['input_v'], nfilters))

    net['dense_x'] = DenseLayer(
        dropout(flatten(net['inc_x1/output']), p=.5),
        num_units=100, nonlinearity=lasagne.nonlinearities.rectify)
    net['dense_u'] = DenseLayer(
        dropout(flatten(net['inc_u1/output']), p=.5),
        num_units=100, nonlinearity=lasagne.nonlinearities.rectify)
    net['dense_v'] = DenseLayer(
        dropout(flatten(net['inc_v1/output']), p=.5),
        num_units=100, nonlinearity=lasagne.nonlinearities.rectify)

    # Concatenate the parallel inputs
    net['concat'] = ConcatLayer((net['dense_x'],
                                 net['dense_u'],
                                 net['dense_v']))

    # And, finally, the 11-unit output layer with 50% dropout on its inputs:
    net['output_prob'] = DenseLayer(
        dropout(net['concat'], p=.5),
        num_units=11,
        nonlinearity=lasagne.nonlinearities.softmax)

    print("n-parameters: ", lasagne.layers.count_params(net['output_prob']))
    return net['output_prob']
Exemplo n.º 5
0
def build_cnn(input):
    #data_size = (None,103,130)  # Batch size x Img Channels x Height x Width

    #input_var = T.tensor3(name = "input",dtype='int64')
    input_var = input

    #values = np.array(np.random.randint(0,102,(1,9,50)))

    #input_var.tag.test_value = values
    #number sentences x words x characters
    input_layer = L.InputLayer((None,9,50), input_var=input)

    W = create_char_embedding_matrix()

    embed_layer = L.EmbeddingLayer(input_layer, input_size=103,output_size=101, W=W)
    #print "EMBED", L.get_output(embed_layer).tag.test_value.shape
    reshape_embed = L.reshape(embed_layer,(-1,50,101))
    #print "reshap embed", L.get_output(reshape_embed).tag.test_value.shape
    conv_layer_1 = L.Conv1DLayer(reshape_embed, 55, 2)
    conv_layer_2 = L.Conv1DLayer(reshape_embed, 55, 3)
    #print "TEST"
    #print "Convolution Layer 1", L.get_output(conv_layer_1).tag.test_value.shape
    #print "Convolution Layer 2", L.get_output(conv_layer_2).tag.test_value.shape

    #flatten_conv_1 = L.flatten(conv_layer_1,3)
    #flatten_conv_2 = L.flatten(conv_layer_2,3)

    #reshape_max_1 = L.reshape(flatten_conv_1,(-1,49))
    #reshape_max_2 = L.reshape(flatten_conv_2, (-1,48))

    #print "OUTPUT Flatten1", L.get_output(flatten_conv_1).tag.test_value.shape
    #print "OUTPUT Flatten2", L.get_output(flatten_conv_2).tag.test_value.shape

    #print "OUTPUT reshape_max_1", L.get_output(reshape_max_1).tag.test_value.shape
    #print "OUTPUT reshape_max_2", L.get_output(reshape_max_2).tag.test_value.shape

    pool_layer_1 = L.MaxPool1DLayer(conv_layer_1, pool_size=54)
    pool_layer_2 = L.MaxPool1DLayer(conv_layer_2, pool_size=53)


    #print "OUTPUT POOL1", L.get_output(pool_layer_1).tag.test_value.shape
    #print "OUTPUT POOL2",L.get_output(pool_layer_2).tag.test_value.shape

    merge_layer = L.ConcatLayer([pool_layer_1, pool_layer_2], 1)

    flatten_merge = L.flatten(merge_layer, 2)
    reshape_merge = L.reshape(flatten_merge, (1,9,110))
    print L.get_output(reshape_embed).shape
    #print L.get_output(reshape_merge).tag.test_value.shape

    return reshape_merge, char_index_lookup
Exemplo n.º 6
0
    def get_conv_input(self, sidx, tidx, avg=False):
        suf = '_avg' if avg else ''

        feat_embs = [
            self.manager.feats[name].get_emb_layer(sidx, tidx, avg=avg)
            for name in self.args.source_feats
        ]

        # TODO: change the meaning
        if self.args.lex == 'mix':
            concat_emb = L.ElemwiseSumLayer(feat_embs)  # (100, 15, 256)
        else:
            concat_emb = L.concat(feat_embs, axis=2)  # (100, 15, 256+100)

        pos = np.array([0] * (self.args.window_size / 2) + [1] + [0] *
                       (self.args.window_size / 2)).astype(
                           theano.config.floatX)
        post = theano.shared(pos[np.newaxis, :, np.newaxis],
                             borrow=True)  # (1, 15, 1)
        posl = L.InputLayer(
            (None, self.args.window_size, 1),
            input_var=T.extra_ops.repeat(post, sidx.shape[0],
                                         axis=0))  # (100, 15, 1)
        conv_in = L.concat([concat_emb, posl], axis=2)  # (100, 15, 256+1)

        if self.args.pos_emb:
            posint = L.flatten(
                L.ExpressionLayer(posl,
                                  lambda x: T.cast(x, 'int64')))  # (100, 15)
            pos_emb = L.EmbeddingLayer(
                posint,
                self.args.window_size,
                8,
                name='epos' + suf,
                W=Normal(0.01) if not avg else Constant())  # (100, 15, 8)
            pos_emb.params[pos_emb.W].remove('regularizable')
            conv_in = L.concat([concat_emb, posl, pos_emb],
                               axis=2)  # (100, 15, 256+1+8)

        # # squeeze
        # if self.args.squeeze:
        #     conv_in = L.DenseLayer(conv_in, num_units=self.args.squeeze, name='squeeze'+suf, num_leading_axes=2,
        #                     W=HeNormal('relu')) # (100, 15, 256)

        conv_in = L.dimshuffle(conv_in, (0, 2, 1))  # (100, 256+1, 15)

        return conv_in
Exemplo n.º 7
0
def _build(X):
    layer = layers.InputLayer(shape=(None, 1, 28, 28), input_var=X)
    layer = layers.Conv2DLayer(layer,
                               num_filters=32,
                               filter_size=(5, 5),
                               stride=(1, 1),
                               pad='same',
                               untie_biases=False,
                               W=init.GlorotUniform(),
                               b=init.Constant(0.),
                               nonlinearity=nonlinearities.rectify)
    visual1 = layers.get_output(layer)
    layer = layers.MaxPool2DLayer(layer,
                                  pool_size=(2, 2),
                                  stride=None,
                                  pad=(0, 0),
                                  ignore_border=False)
    layer = layers.Conv2DLayer(layer,
                               num_filters=32,
                               filter_size=(5, 5),
                               stride=(1, 1),
                               pad='same',
                               untie_biases=False,
                               W=init.GlorotUniform(),
                               b=init.Constant(0.),
                               nonlinearity=nonlinearities.rectify)
    visual2 = layers.get_output(layer)
    layer = layers.MaxPool2DLayer(layer,
                                  pool_size=(2, 2),
                                  stride=None,
                                  pad=(0, 0),
                                  ignore_border=False)
    layer = layers.flatten(layer, outdim=2)
    layer = layers.DropoutLayer(layer, p=0.5)
    layer = layers.DenseLayer(layer,
                              num_units=256,
                              W=init.GlorotUniform(),
                              b=init.Constant(0.),
                              nonlinearity=nonlinearities.rectify)
    layer = layers.DropoutLayer(layer, p=0.5)
    layer = layers.DenseLayer(layer,
                              num_units=10,
                              W=init.GlorotUniform(),
                              b=init.Constant(0.),
                              nonlinearity=nonlinearities.softmax)
    return layer, visual1, visual2
Exemplo n.º 8
0
def layer_context(layer_ctx,
                  ctx_nblayers,
                  ctx_nbfilters,
                  ctx_winlen,
                  hiddensize,
                  nonlinearity,
                  bn_axes=None,
                  bn_cnn_axes=None,
                  critic=False,
                  useLRN=True):

    layer_ctx = ll.dimshuffle(layer_ctx, [0, 'x', 1, 2],
                              name='ctx.dimshuffle_to_2DCNN')
    for layi in xrange(ctx_nblayers):
        layerstr = 'ctx.l' + str(1 + layi) + '_CNN{}x{}x{}'.format(
            ctx_nbfilters, ctx_winlen, 1)
        layer_ctx = ll.Conv2DLayer(layer_ctx,
                                   num_filters=ctx_nbfilters,
                                   filter_size=[ctx_winlen, 1],
                                   stride=1,
                                   pad='same',
                                   nonlinearity=nonlinearity,
                                   name=layerstr)
        if not critic and (not bn_cnn_axes is None):
            layer_ctx = ll.batch_norm(layer_ctx, axes=bn_cnn_axes)
        # layer_ctx = ll.batch_norm(layer_GatedConv2DLayer(layer_ctx, ctx_nbfilters, [ctx_winlen,1], stride=1, pad='same', nonlinearity=nonlinearity, name=layerstr))
        if critic and useLRN:
            layer_ctx = ll.LocalResponseNormalization2DLayer(layer_ctx)
    layer_ctx = ll.dimshuffle(layer_ctx, [0, 2, 3, 1],
                              name='ctx.dimshuffle_back')
    layer_ctx = ll.flatten(layer_ctx, outdim=3, name='ctx.flatten')

    for layi in xrange(2):
        layerstr = 'ctx.l' + str(1 + ctx_nblayers +
                                 layi) + '_FC{}'.format(hiddensize)
        layer_ctx = ll.DenseLayer(layer_ctx,
                                  hiddensize,
                                  nonlinearity=nonlinearity,
                                  num_leading_axes=2,
                                  name=layerstr)
        if not critic and (not bn_axes is None):
            layer_ctx = ll.batch_norm(layer_ctx, axes=bn_axes)

    return layer_ctx
Exemplo n.º 9
0
def build_model(input_var):
    layer = layers.InputLayer(shape=(None, 3, 224, 224), input_var=input_var)
    layer = layers.Conv2DLayer(layer, num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same')
    layer = layers.MaxPool2DLayer(layer, pool_size=(3, 3), stride=(2, 2), pad=(0, 0), ignore_border=False)
    layer = layers.Conv2DLayer(layer, num_filters=128, filter_size=(3, 3), stride=(1, 1), pad='same')
    layer = layers.MaxPool2DLayer(layer, pool_size=(3, 3), stride=(2, 2), pad=(0, 0), ignore_border=False)
    layer = layers.Conv2DLayer(layer, num_filters=256, filter_size=(3, 3), stride=(1, 1), pad='same')
    layer = layers.MaxPool2DLayer(layer, pool_size=(3, 3), stride=(2, 2), pad=(0, 0), ignore_border=False)
    layer = layers.Conv2DLayer(layer, num_filters=512, filter_size=(3, 3), stride=(1, 1), pad='same')
    layer = layers.MaxPool2DLayer(layer, pool_size=(3, 3), stride=(2, 2), pad=(0, 0), ignore_border=False)
    layer = layers.Conv2DLayer(layer, num_filters=512, filter_size=(3, 3), stride=(1, 1), pad='same')
    layer = layers.MaxPool2DLayer(layer, pool_size=(3, 3), stride=(2, 2), pad=(0, 0), ignore_border=False)
    layer = layers.flatten(layer, outdim=2)
    layer = layers.DenseLayer(layer, num_units=4096, nonlinearity=nonlinearities.rectify)
    layer = layers.DropoutLayer(layer, p=0.5)
    layer = layers.DenseLayer(layer, num_units=4096, nonlinearity=nonlinearities.rectify)
    layer = layers.DropoutLayer(layer, p=0.5)
    layer = layers.DenseLayer(layer, num_units=2, nonlinearity=nonlinearities.softmax)
    return layer
Exemplo n.º 10
0
 def _build(self):
     layer = layers.InputLayer(shape=(None, 3, 32, 32), input_var=self.X)
     layer = nin(layer,
                 conv_filters=192,
                 filter_size=(5, 5),
                 pad=2,
                 cccp1_filters=160,
                 cccp2_filters=96)
     layer = layers.Pool2DLayer(layer,
                                pool_size=(3, 3),
                                stride=2,
                                pad=(0, 0),
                                ignore_border=False,
                                mode='max')
     layer = layers.DropoutLayer(layer, p=0.5)
     layer = nin(layer,
                 conv_filters=192,
                 filter_size=(5, 5),
                 pad=2,
                 cccp1_filters=192,
                 cccp2_filters=192)
     layer = layers.Pool2DLayer(layer,
                                pool_size=(3, 3),
                                stride=2,
                                ignore_border=False,
                                mode='average_exc_pad')
     layer = layers.DropoutLayer(layer, p=0.5)
     layer = nin(layer,
                 conv_filters=192,
                 filter_size=(3, 3),
                 pad=1,
                 cccp1_filters=192,
                 cccp2_filters=10)
     layer = layers.Pool2DLayer(layer,
                                pool_size=(8, 8),
                                stride=1,
                                ignore_border=False,
                                mode='average_exc_pad')
     layer = layers.flatten(layer, outdim=2)
     layer = layers.NonlinearityLayer(layer,
                                      nonlinearity=nonlinearities.softmax)
     return layer
Exemplo n.º 11
0
 def _build(self):
     layer = layers.InputLayer(shape=(None, 3, 112, 112), input_var=self.X)
     layer = layers.Conv2DLayer(layer, num_filters=64, filter_size=(5, 5), stride=(1, 1), pad='same',
                                untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.),
                                nonlinearity=nonlinearities.rectify)
     layer = layers.MaxPool2DLayer(layer, pool_size=(2, 2), stride=None, pad=(0, 0), ignore_border=False)
     layer = layers.Conv2DLayer(layer, num_filters=64, filter_size=(5, 5), stride=(1, 1), pad='same',
                                untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.),
                                nonlinearity=nonlinearities.rectify)
     layer = layers.MaxPool2DLayer(layer, pool_size=(8, 8), stride=None, pad=(0, 0), ignore_border=False)
     layer = layers.flatten(layer, outdim=2)  # 不加入展开层也可以,DenseLayer自动展开
     layer = layers.DropoutLayer(layer, p=0.5)
     layer = layers.DenseLayer(layer, num_units=2048,
                               W=init.GlorotUniform(), b=init.Constant(0.),
                               nonlinearity=nonlinearities.rectify)
     layer = layers.DropoutLayer(layer, p=0.5)
     layer = layers.DenseLayer(layer, num_units=2,
                               W=init.GlorotUniform(), b=init.Constant(0.),
                               nonlinearity=nonlinearities.softmax)
     return layer
Exemplo n.º 12
0
def _create_network(available_actions_num, input_shape, visual_input_var, n_variables, variables_input_var):

    dqn = InputLayer(shape=[None, input_shape.frames, input_shape.y, input_shape.x], input_var=visual_input_var)

    dqn = Conv2DLayer(dqn, num_filters=32, filter_size=[8, 8], stride=[4, 4],
                      nonlinearity=rectify, W=GlorotUniform("relu"),
                      b=Constant(.1))
    dqn = Conv2DLayer(dqn, num_filters=64, filter_size=[4, 4], stride=[2, 2],
                      nonlinearity=rectify, W=GlorotUniform("relu"),
                      b=Constant(.1))

    dqn = Conv2DLayer(dqn, num_filters=64, filter_size=[3, 3],
                      nonlinearity=rectify, W=GlorotUniform("relu"),
                      b=Constant(.1))
    if n_variables > 0:
        variables_layer = InputLayer(shape=[None, n_variables], input_var=variables_input_var)
        dqn = ConcatLayer((flatten(dqn), variables_layer))
    dqn = DenseLayer(dqn, num_units=512, nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1))

    dqn = DenseLayer(dqn, num_units=available_actions_num, nonlinearity=None)
    return dqn
Exemplo n.º 13
0
def create_nn():

	'''
	Returns the theano function - train,test 
	Returns the 'KerasNet'

	Using default values of adam - learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08

	Input to the NN is (batch_size,3,32,32) and corresponding classes it belong to (batch_size,)
	'''

	l_in = InputLayer((batch_size,3,32,32))
	l_in_bn = BatchNormLayer(l_in)
	
	conv1 = Conv2DLayer(l_in_bn,pad='same',num_filters=64,filter_size=(3,3),nonlinearity=lasagne.nonlinearities.rectify) #Bx64x32x32
	conv1_1 = Conv2DLayer(conv1,pad='same',num_filters=64,filter_size=(3,3),nonlinearity=lasagne.nonlinearities.rectify) #Bx64x32x32
	conv1_mp = MaxPool2DLayer(conv1_1,pool_size=(2,2)) #Bx64x16x16
	conv1_do = dropout(conv1_mp,p=0.25)

	conv2 = Conv2DLayer(conv1_do,pad='same',num_filters=128,filter_size=(3,3),nonlinearity=lasagne.nonlinearities.rectify) #Bx128x16x16
	conv2_1 = Conv2DLayer(conv2,pad='same',num_filters=128,filter_size=(3,3),nonlinearity=lasagne.nonlinearities.rectify) #Bx128x16x16
	conv2_mp = MaxPool2DLayer(conv2_1,pool_size=(2,2)) #Bx128x8x8
	conv2_do = dropout(conv2_mp,p=0.25)

	flat = flatten(conv2_do,2) #Bx8192
	fc = DenseLayer(flat,num_units=512,nonlinearity=lasagne.nonlinearities.rectify) #Bx512
	fc_do = dropout(fc, p=0.5) 
	network = DenseLayer(fc_do, num_units=nb_classes, nonlinearity=lasagne.nonlinearities.softmax	) #Bxnb_classes

	net_output = lasagne.layers.get_output(network)
	true_output = T.matrix()

	all_params = lasagne.layers.get_all_params(network,trainable=True)
	loss = T.mean(lasagne.objectives.categorical_crossentropy(net_output,true_output))
	updates = lasagne.updates.adam(loss,all_params)

	train = theano.function(inputs= [l_in.input_var,true_output] , outputs=[net_output,loss], updates = updates)
	test = theano.function(inputs= [l_in.input_var], outputs= [net_output])

	return train,test,network
Exemplo n.º 14
0
def build_network(W,
                  number_unique_tags,
                  longest_word,
                  longest_sentence,
                  input_var=None):
    print("Building network ...")

    input_layer = L.InputLayer((None, longest_sentence, longest_word),
                               input_var=input_var)

    embed_layer = L.EmbeddingLayer(input_layer,
                                   input_size=103,
                                   output_size=101,
                                   W=W)

    reshape_embed = L.reshape(embed_layer, (-1, longest_word, 101))

    conv_layer_1 = L.Conv1DLayer(reshape_embed, longest_word, 2)
    conv_layer_2 = L.Conv1DLayer(reshape_embed, longest_word, 3)

    pool_layer_1 = L.MaxPool1DLayer(conv_layer_1, pool_size=longest_word - 1)
    pool_layer_2 = L.MaxPool1DLayer(conv_layer_2, pool_size=longest_word - 2)

    merge_layer = L.ConcatLayer([pool_layer_1, pool_layer_2], 1)
    flatten_merge = L.flatten(merge_layer, 2)
    reshape_merge = L.reshape(flatten_merge,
                              (-1, longest_sentence, int(longest_word * 2)))

    l_re = lasagne.layers.RecurrentLayer(
        reshape_merge,
        N_HIDDEN,
        nonlinearity=lasagne.nonlinearities.sigmoid,
        mask_input=None)
    l_out = lasagne.layers.DenseLayer(
        l_re, number_unique_tags, nonlinearity=lasagne.nonlinearities.softmax)

    print "DONE BUILDING NETWORK"
    return l_out
Exemplo n.º 15
0
 def _build(self):
     layer = layers.InputLayer(shape=(None, 1, 28, 28), input_var=self.X)
     layer = layers.Conv2DLayer(layer, num_filters=128, filter_size=(1, 1), stride=(1, 1), pad='same',
                                untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.),
                                nonlinearity=nonlinearities.rectify)
     layer = layers.Conv2DLayer(layer, num_filters=128, filter_size=(1, 1), stride=(1, 1), pad='same',
                                untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.),
                                nonlinearity=nonlinearities.rectify)
     layer = layers.Pool2DLayer(layer, pool_size=(2, 2), stride=None, pad=(0, 0), ignore_border=False,
                                mode='average_exc_pad')
     layer = layers.Conv2DLayer(layer, num_filters=512, filter_size=(1, 1), stride=(1, 1), pad='same',
                                untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.),
                                nonlinearity=nonlinearities.rectify)
     layer = layers.Conv2DLayer(layer, num_filters=512, filter_size=(1, 1), stride=(1, 1), pad='same',
                                untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.),
                                nonlinearity=nonlinearities.rectify)
     layer = layers.Pool2DLayer(layer, pool_size=(2, 2), stride=None, pad=(0, 0), ignore_border=False,
                                mode='average_exc_pad')
     layer = layers.Conv2DLayer(layer, num_filters=2048, filter_size=(1, 1), stride=(1, 1), pad='same',
                                untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.),
                                nonlinearity=nonlinearities.rectify)
     layer = layers.Conv2DLayer(layer, num_filters=2048, filter_size=(1, 1), stride=(1, 1), pad='same',
                                untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.),
                                nonlinearity=nonlinearities.rectify)
     layer = layers.Pool2DLayer(layer, pool_size=(2, 2), stride=None, pad=(0, 0), ignore_border=False,
                                mode='max')
     layer = layers.flatten(layer, outdim=2)  # 不加入展开层也可以,DenseLayer自动展开
     layer = layers.DropoutLayer(layer, p=0.5)
     layer = layers.DenseLayer(layer, num_units=256,
                               W=init.GlorotUniform(), b=init.Constant(0.),
                               nonlinearity=nonlinearities.rectify)
     layer = layers.DropoutLayer(layer, p=0.5)
     layer = layers.DenseLayer(layer, num_units=10,
                               W=init.GlorotUniform(), b=init.Constant(0.),
                               nonlinearity=nonlinearities.softmax)
     return layer
Exemplo n.º 16
0
		drop = dropout(pool,0.2)

		return drop

	l_in = InputLayer(shape=(filter_size=(3,3)2, num_filters=32))
	inputNorm = BatchNormLayer(l_in)
	input_drop = dropout(inputNorm,0.2)

	## The network has 3 sets of conv and maxout networks. 
	set1 = get_multiple_block(input_drop,num_filt=32,k=3,justheconv=1)
	set2 = get_multiple_block(set1,num_filt=48,k=2)
	set3 = get_multiple_block(set2,num_filt=80)
	set4 = get_multiple_block(set3,num_filt=128, pooling_size=(8,8))

	# Dense Layers follow.
	h_flat = flatten(set4)

	## 5 Way Max-Out Layer (DenseMaxout)

	'''
	Reference - https://github.com/fchollet/keras/pull/3128
	'''
	
	h_dense = []
	for _ in xrange(5):
		h_dense.append( DenseLayer(h_flat,500,W = lasagne.init.GlorotUniform(), nonlinearity = lasagne.nonlinearities.linear))
	
	h17 = ElemwiseMergeLayer( h_dense, merge_function=T.maximum())

	h17 = BatchNormLayer(h17)
	h17_drop = dropout(h17,0.2)
Exemplo n.º 17
0
    def __init__(self,
                 gate_controllers,
                 channels,
                 gate_nonlinearities=nonlinearities.sigmoid,
                 bias_init=init.Constant(),
                 weight_init=init.Normal(),
                 **kwargs):
        """
        An overly generic interface for one-step gate, stacked gates or gate applier.
        If several channels are given, stacks them for quicker execution. 
        
        gate_controllers - a single layer or a list/tuple of such
            layers that gate depends on (for most RNNs, that's input and previous memory state)
        
        channels - a single layer or integer or a list/tuple of layers/integers
            if a layer, that defines a layer that should be multiplied by the gate output
            if an integer - that defines a number of units of a gate -- and these are the units to be returned
            
        gate_nonlinearities - a single function or a list of such(channel-wise), 
            - defining nonlinearities for gates on corresponding channels
        
        bias_init - an initializer or a list (channel-wise) of initializers for bias(b) parameters 
            - (None, lasagne.init, theano variable or numpy array) 
            - None means no bias
        weight init - an initializer OR a list of initializers for  (channel-wise) 
            - OR a list of lists of initializers (channel, controller) 
            - (lasagne.init, theano variable or numpy array) 
        """
        
        self.channels = check_list(channels)
        self.gate_controllers = check_list(gate_controllers)

        # check channel types
        for chl in self.channels:
            assert is_layer(chl) or (type(chl) == int)

        # separate layers from non-layers
        self.channel_layers = list(filter(is_layer, self.channels))
        self.channel_ints = [v for v in self.channels if not is_layer(v)]

        # flatten layers to 2 dimensions
        for i in range(len(self.channel_layers)):
            layer = self.channel_layers[i]
            if type(layer) == int:
                continue

            lname = layer.name or ""
            if len(layer.output_shape) != 2:
                warn("One of the channels (name='%s') has an input dimension of %s and will be flattened." % (
                    lname, layer.output_shape))
                self.channel_layers[i] = flatten(layer,
                                                 outdim=2,
                                                 name=lname)
                assert len(self.channel_layers[i].output_shape) == 2

        # flatten layers to 2 dimensions
        for i in range(len(self.gate_controllers)):
            layer = self.gate_controllers[i]
            lname = layer.name or ""
            if len(layer.output_shape) != 2:
                warn("One of the gate controllers (name='%s') has an input dimension of %s and will be flattened." % (
                    lname, layer.output_shape))
                self.gate_controllers[i] = flatten(layer,
                                                   outdim=2,
                                                   name=lname)
                assert len(self.gate_controllers[i].output_shape) == 2

        # initialize merge layer
        incomings = self.channel_layers + self.gate_controllers

        # default name
        kwargs["name"] = kwargs.get("name", "YetAnother" + self.__class__.__name__)
        
        output_names = ["%s.channel.%i"%(kwargs["name"],i) for i in range(len(self.channels))]

        
        # determine whether or not user defined a fixed batch size
        batch_sizes = [chl.output_shape[0] for chl in filter(is_layer, self.channels)]
        batch_size = reduce(lambda a,b: a or b, batch_sizes,None)

        
        output_shapes = [ chl.output_shape if is_layer(chl) else (batch_size,chl) for chl in self.channels]
        output_shapes = OrderedDict(zip(output_names,output_shapes))
        
        output_dtypes = [ get_layer_dtype(chl) for chl in self.channels]
        output_dtypes = OrderedDict(zip(output_names,output_dtypes))
        
        
        super(GateLayer, self).__init__(incomings, 
                                        output_shapes=output_shapes,
                                        output_dtypes=output_dtypes,
                                        **kwargs)

        # nonlinearities
        self.gate_nonlinearities = check_list(gate_nonlinearities)
        self.gate_nonlinearities = [(nl if (nl is not None) else (lambda v: v))
                                    for nl in self.gate_nonlinearities]
        # must be either one common nonlinearity or one per channel
        assert len(self.gate_nonlinearities) in (1, len(self.channels))

        if len(self.gate_nonlinearities) == 1:
            self.gate_nonlinearities *= len(self.channels)

        # cast bias init to a list
        bias_init = check_list(bias_init)
        assert len(bias_init) in (1, len(self.channels))
        if len(bias_init) == 1:
            bias_init *= len(self.channels)

        # cast weight init to a list of lists [channel][controller]
        weight_init = check_list(weight_init)
        assert len(weight_init) in (1, len(self.channels))
        if len(weight_init) == 1:
            weight_init *= len(self.channels)

        for i in range(len(self.channels)):
            weight_init[i] = check_list(weight_init[i])
            assert len(weight_init[i]) in (1, len(self.gate_controllers))
            if len(weight_init[i]) == 1:
                weight_init[i] *= len(self.gate_controllers)

        self.gate_b = []  # a list of biases for channels
        self.gate_W = [list() for _ in self.gate_controllers]  # a list of lists of weights [controller][channel]
        
        for chl_i, (channel, b_init, channel_w_inits) in enumerate(zip(self.channels,
                                                                       bias_init,
                                                                       weight_init
                                                                       )):

            if is_layer(channel):
                channel_name = channel.name or "chl" + str(chl_i)
                channel_n_units = channel.output_shape[1]
            else:
                channel_name = "chl" + str(chl_i)
                channel_n_units = channel

            # add bias
            if b_init is not None:
                self.gate_b.append(
                    self.add_param(
                        spec=b_init,
                        shape=(channel_n_units,),
                        name="b_%s" % (channel_name)
                    )
                )
            else:
                self.gate_b.append(T.zeros((channel_n_units,)))

            # add weights
            for ctrl_i, (controller, w_init) in enumerate(zip(self.gate_controllers,
                                                              channel_w_inits
                                                              )):
                ctrl_name = controller.name or "ctrl" + str(ctrl_i)
                # add bias
                self.gate_W[ctrl_i].append(
                    self.add_param(
                        spec=w_init,
                        shape=(controller.output_shape[1], channel_n_units),
                        name="W_%s_%s" % (ctrl_name, channel_name)
                    ))

        # a list where i-th element contains weights[i-th_gate_controller] for all outputs stacked
        self.gate_W_stacked = [T.concatenate(weights, axis=1) for weights in self.gate_W]
        # a list of biases for the respective outputs stacked
        self.gate_b_stacked = T.concatenate(self.gate_b)
Exemplo n.º 18
0
    def __init__(self,
                 gate_controllers,
                 channels,
                 gate_nonlinearities=nonlinearities.sigmoid,
                 bias_init=init.Constant(),
                 weight_init=init.Normal(),
                 **kwargs):

        
        self.channels = check_list(channels)
        self.gate_controllers = check_list(gate_controllers)

        # check channel types
        for chl in self.channels:
            assert is_layer(chl) or (type(chl) == int)

        # separate layers from non-layers
        self.channel_layers = list(filter(is_layer, self.channels))
        self.channel_ints = [v for v in self.channels if not is_layer(v)]

        # flatten layers to 2 dimensions
        for i in range(len(self.channel_layers)):
            layer = self.channel_layers[i]
            if type(layer) == int:
                continue

            lname = layer.name or ""
            if len(layer.output_shape) != 2:
                warn("One of the channels (name='%s') has an input dimension of %s and will be flattened." % (
                    lname, layer.output_shape))
                self.channel_layers[i] = flatten(layer,
                                                 outdim=2,
                                                 name=lname)
                assert len(self.channel_layers[i].output_shape) == 2

        # flatten layers to 2 dimensions
        for i in range(len(self.gate_controllers)):
            layer = self.gate_controllers[i]
            lname = layer.name or ""
            if len(layer.output_shape) != 2:
                warn("One of the gate controllers (name='%s') has an input dimension of %s and will be flattened." % (
                    lname, layer.output_shape))
                self.gate_controllers[i] = flatten(layer,
                                                   outdim=2,
                                                   name=lname)
                assert len(self.gate_controllers[i].output_shape) == 2

        # initialize merge layer
        incomings = self.channel_layers + self.gate_controllers

        # default name
        kwargs["name"] = kwargs.get("name", "YetAnother" + self.__class__.__name__)
        
        output_names = ["%s.channel.%i"%(kwargs["name"],i) for i in range(len(self.channels))]

        
        # determine whether or not user defined a fixed batch size
        batch_sizes = [chl.output_shape[0] for chl in filter(is_layer, self.channels)]
        batch_size = reduce(lambda a,b: a or b, batch_sizes,None)

        
        output_shapes = [ chl.output_shape if is_layer(chl) else (batch_size,chl) for chl in self.channels]
        output_shapes = OrderedDict(zip(output_names,output_shapes))
        
        output_dtypes = [ get_layer_dtype(chl) for chl in self.channels]
        output_dtypes = OrderedDict(zip(output_names,output_dtypes))
        
        
        super(GateLayer, self).__init__(incomings, 
                                        output_shapes=output_shapes,
                                        output_dtypes=output_dtypes,
                                        **kwargs)

        # nonlinearities
        self.gate_nonlinearities = check_list(gate_nonlinearities)
        self.gate_nonlinearities = [(nl if (nl is not None) else (lambda v: v))
                                    for nl in self.gate_nonlinearities]
        # must be either one common nonlinearity or one per channel
        assert len(self.gate_nonlinearities) in (1, len(self.channels))

        if len(self.gate_nonlinearities) == 1:
            self.gate_nonlinearities *= len(self.channels)

        # cast bias init to a list
        bias_init = check_list(bias_init)
        assert len(bias_init) in (1, len(self.channels))
        if len(bias_init) == 1:
            bias_init *= len(self.channels)

        # cast weight init to a list of lists [channel][controller]
        weight_init = check_list(weight_init)
        assert len(weight_init) in (1, len(self.channels))
        if len(weight_init) == 1:
            weight_init *= len(self.channels)

        for i in range(len(self.channels)):
            weight_init[i] = check_list(weight_init[i])
            assert len(weight_init[i]) in (1, len(self.gate_controllers))
            if len(weight_init[i]) == 1:
                weight_init[i] *= len(self.gate_controllers)

        self.gate_b = []  # a list of biases for channels
        self.gate_W = [list() for _ in self.gate_controllers]  # a list of lists of weights [controller][channel]
        
        for chl_i, (channel, b_init, channel_w_inits) in enumerate(zip(self.channels,
                                                                       bias_init,
                                                                       weight_init
                                                                       )):

            if is_layer(channel):
                channel_name = channel.name or "chl" + str(chl_i)
                channel_n_units = channel.output_shape[1]
            else:
                channel_name = "chl" + str(chl_i)
                channel_n_units = channel

            # add bias
            if b_init is not None:
                self.gate_b.append(
                    self.add_param(
                        spec=b_init,
                        shape=(channel_n_units,),
                        name="b_%s" % (channel_name)
                    )
                )
            else:
                self.gate_b.append(T.zeros((channel_n_units,)))

            # add weights
            for ctrl_i, (controller, w_init) in enumerate(zip(self.gate_controllers,
                                                              channel_w_inits
                                                              )):
                ctrl_name = controller.name or "ctrl" + str(ctrl_i)
                # add bias
                self.gate_W[ctrl_i].append(
                    self.add_param(
                        spec=w_init,
                        shape=(controller.output_shape[1], channel_n_units),
                        name="W_%s_%s" % (ctrl_name, channel_name)
                    ))

        # a list where i-th element contains weights[i-th_gate_controller] for all outputs stacked
        self.gate_W_stacked = [T.concatenate(weights, axis=1) for weights in self.gate_W]
        # a list of biases for the respective outputs stacked
        self.gate_b_stacked = T.concatenate(self.gate_b)
Exemplo n.º 19
0
    def get_char2word(self, ic, avg=False):
        suf = '_avg' if avg else ''
        ec = L.EmbeddingLayer(
            ic,
            self.args.vc,
            self.args.nc,
            name='ec' + suf,
            W=HeNormal() if not avg else Constant())  # (100, 24, 32, 16)
        ec.params[ec.W].remove('regularizable')

        if self.args.char_model == 'CNN':
            lds = L.dimshuffle(ec, (0, 3, 1, 2))  # (100, 16, 24, 32)
            ls = []
            for n in self.args.ngrams:
                lconv = L.Conv2DLayer(
                    lds,
                    self.args.nf, (1, n),
                    untie_biases=True,
                    W=HeNormal('relu') if not avg else Constant(),
                    name='conv_%d' % n + suf)  # (100, 64/4, 24, 32-n+1)
                lpool = L.MaxPool2DLayer(
                    lconv, (1, self.args.max_len - n + 1))  # (100, 64, 24, 1)
                lpool = L.flatten(lpool, outdim=3)  # (100, 16, 24)
                lpool = L.dimshuffle(lpool, (0, 2, 1))  # (100, 24, 16)
                ls.append(lpool)
            xc = L.concat(ls, axis=2)  # (100, 24, 64)
            return xc

        elif self.args.char_model == 'LSTM':
            ml = L.ExpressionLayer(
                ic, lambda x: T.neq(x, 0))  # mask layer (100, 24, 32)
            ml = L.reshape(ml, (-1, self.args.max_len))  # (2400, 32)

            gate_params = L.recurrent.Gate(W_in=Orthogonal(),
                                           W_hid=Orthogonal())
            cell_params = L.recurrent.Gate(W_in=Orthogonal(),
                                           W_hid=Orthogonal(),
                                           W_cell=None,
                                           nonlinearity=tanh)

            lstm_in = L.reshape(
                ec, (-1, self.args.max_len, self.args.nc))  # (2400, 32, 16)
            lstm_f = L.LSTMLayer(
                lstm_in,
                self.args.nw / 2,
                mask_input=ml,
                grad_clipping=10.,
                learn_init=True,
                peepholes=False,
                precompute_input=True,
                ingate=gate_params,
                forgetgate=gate_params,
                cell=cell_params,
                outgate=gate_params,
                # unroll_scan=True,
                only_return_final=True,
                name='forward' + suf)  # (2400, 64)
            lstm_b = L.LSTMLayer(
                lstm_in,
                self.args.nw / 2,
                mask_input=ml,
                grad_clipping=10.,
                learn_init=True,
                peepholes=False,
                precompute_input=True,
                ingate=gate_params,
                forgetgate=gate_params,
                cell=cell_params,
                outgate=gate_params,
                # unroll_scan=True,
                only_return_final=True,
                backwards=True,
                name='backward' + suf)  # (2400, 64)
            remove_reg(lstm_f)
            remove_reg(lstm_b)
            if avg:
                set_zero(lstm_f)
                set_zero(lstm_b)
            xc = L.concat([lstm_f, lstm_b], axis=1)  # (2400, 128)
            xc = L.reshape(xc,
                           (-1, self.args.sw, self.args.nw))  # (100, 24, 256)
            return xc
Exemplo n.º 20
0
    def __init__(self, emb_dim, rnn_dim, hid_dim, vocab_size, context,
                 cell='lstm', add_dense=True, dropout_p=0.2, depth=1,
                 prod=False, **cell_args):
        self.emb_dim = emb_dim
        self.rnn_dim = rnn_dim
        self.hid_dim = hid_dim
        self.vocab_size = vocab_size
        self.context = context
        self.cell = cell
        self.add_dense = add_dense
        self.depth = depth
        self.cell_args = cell_args

        self.prod = prod

        # Input is integer matrices (batch_size, seq_length)
        input_layer = InputLayer(shape=(None, context * 2),
                                 input_var=T.imatrix())
        self.emb_W = np.random.uniform(size=(vocab_size, emb_dim),
                                       low=-0.05,
                                       high=0.05).astype(np.float32)
        emb = EmbeddingLayer(input_layer, input_size=vocab_size,
                             output_size=emb_dim, W=self.emb_W)
        batch_size, _ = input_layer.input_var.shape
        rnn_shape = (batch_size, context * 2, rnn_dim)
        rnn = bid_layer(
            emb, rnn_dim, batch_size, rnn_shape, cell=cell,
            add_dense=add_dense, dropout_p=dropout_p, depth=depth, **cell_args)
        # time distributed dense
        output_shape = (batch_size, context * 2, hid_dim)
        rnn = ReshapeLayer(rnn, (-1, rnn_dim))
        rnn = DenseLayer(rnn, num_units=hid_dim)
        rnn = ReshapeLayer(dropout(rnn, p=dropout_p), output_shape)
        # flatten
        rnn = flatten(rnn)
        self.output = DenseLayer(
            rnn, num_units=vocab_size, nonlinearity=softmax)

        # Don't compile train and test functions in production mode
        if not prod:
            # T.nnet.categorical_crossentropy allows to represent true dist
            # as an integer vector (implicitely casting to a one-hot matrix)
            lr, targets = T.fscalar('lr'), T.ivector('targets')
            pred = get_output(self.output)
            loss = T.nnet.categorical_crossentropy(pred, targets).mean()
            params = get_all_params(self.output, trainable=True)
            updates = lasagne.updates.rmsprop(loss, params, lr)
            print("Compiling training function")
            self._train = theano.function(
                [input_layer.input_var, targets, lr],
                loss, updates=updates, allow_input_downcast=True)

            test_pred = get_output(self.output, deterministic=True)
            test_loss = T.nnet.categorical_crossentropy(test_pred, targets).mean()
            test_acc = accuracy(test_pred, targets)

            print("Compiling test function")
            self._test = theano.function(
                [input_layer.input_var, targets],
                [test_loss, test_acc], allow_input_downcast=True)

        print("Compiling predict function")
        if prod:
            pred = get_output(self.output, deterministic=True)
        else:
            pred = test_pred
        self._predict = theano.function(
            [input_layer.input_var], pred, allow_input_downcast=True)
Exemplo n.º 21
0
def multihead_attention(input_sequence, query,
                        key_sequence=None, mask_input=None,
                        num_heads=1,key_size=None,value_size=None,
                        attn_class=DotAttentionLayer, name='multihead_attn',
                        **kwargs):
    """
    A convenience function that computes K attention "heads" in parallel and concatenates them. 
    Each "head" is based on num_heads linear transformations of input sequence, query, and keys 
    
    :param attn_class: what kind of attention layer to apply in multi-headed mode (Attention or DotAttention)
    :param num heads: the amount of parallel "heads"
    :param key_size: num units in attention query and key, defaults to key_sequence.shape[-1]
    :param value_size: num units in attention values, defaults to input_sequence.shape[-1] 
    
    :param input_sequence: sequence of inputs to be processed with attention
    :type input_sequence: lasagne.layers.Layer with shape [batch,seq_length,units]

    :param query: single time-step state of decoder that is used as query (usually custom layer or lstm/gru/rnn hid)  
        If it matches input_sequence one-step size, query is used as is. 
        Otherwise, DotAttention is performed from DenseLayer(query,input_units,nonlinearity=None). 
    :type query: lasagne.layers.Layer with shape [batch,units]
    
    :param key_sequence: a sequence of keys to compute dot_product with. By default, uses input_sequence instead.
    :type key_sequence: lasagne.layers.Layer with shape [batch,seq_length,units] or None

    :param mask_input: mask for input_sequence (like other lasagne masks). Default is no mask
    :type mask_input: lasagne.layers.Layer with shape [batch,seq_length]

    
    Heavily inspired by https://arxiv.org/abs/1706.03762 and http://bit.ly/2vsYX0R
    """
    assert len(input_sequence.output_shape) == 3, "input_sequence must be a 3-dimensional (batch,time,units)"
    assert len(query.output_shape) == 2, "query must be a 2-dimensional for single tick (batch,units)"
    assert mask_input is None or len(
        mask_input.output_shape) == 2, "mask_input must be 2-dimensional (batch,time) or None"
    assert key_sequence is None or len(key_sequence.output_shape) == 3, "key_sequence must be 3-dimensional " \
                                                                        "of shape (batch,time,units) or None"

    key_sequence = key_sequence or input_sequence
    key_size = key_size or key_sequence.output_shape[-1]
    value_size = value_size or input_sequence.output_shape[-1]

    def make_broadcasted_heads(incoming,head_size,name=None):
        ndim = len(incoming.output_shape)
        assert ndim in (2,3), "incoming must be 2-dimensional (query) or 3-dimensional (key or value)"

        heads = DenseLayer(incoming,head_size*num_heads,nonlinearity=None,
                           num_leading_axes=ndim-1,name=name)    #[batch,time,head_size*num_heads]

        if ndim == 3:
            heads = reshape(heads,([0],[1],head_size,num_heads), name=name)    #[batch,time,head_size,num_heads]
            broadcasted_heads = BroadcastLayer(heads, (0, 3), name=name)         #[batch*heads,time,head_size]

        else: #ndim == 2
            heads = reshape(heads, ([0], head_size, num_heads), name=name)  # [batch,head_size,num_heads]
            broadcasted_heads = BroadcastLayer(heads, (0, 2), name=name)    # [batch*heads, head_size]

        return broadcasted_heads

    query_heads = make_broadcasted_heads(query, key_size,name=name + "_query_heads")

    value_heads = make_broadcasted_heads(input_sequence, value_size, name=name + "_value_heads")

    if key_sequence is not None:
        key_heads = make_broadcasted_heads(key_sequence, key_size, name=name + "_key_heads")
    else:
        key_heads = None

    if mask_input is not None:
        mask_heads  = UpcastLayer(mask_input,broadcast_layer=query_heads)
    else:
        mask_heads = None

    attn_heads  = attn_class(value_heads,query_heads,key_sequence=key_heads,
                             mask_input=mask_heads,name=name,**kwargs)  #[batch*heads,value_size]

    attn_vectors = UnbroadcastLayer(attn_heads['attn'],broadcast_layer=query_heads) #[batch,value,heads]

    attn_vectors = flatten(attn_vectors,outdim=2)

    attn_probs = reshape(attn_heads['probs'],(-1,num_heads,[1]))   #[batch,head,probs]

    return {'attn':  attn_vectors,  #[batch, value*heads]
            'probs': attn_probs}
Exemplo n.º 22
0
    def get_actor(self, sidx, tidx, valid, avg=False):
        suf = '_avg' if avg else ''
        feat_embs = [
            self.manager.feats[name].get_emb_layer(sidx, tidx, avg=avg)
            for name in self.args.source_feats
        ]

        x = L.concat(feat_embs, axis=2)  # (100, 26, 256+32+32+...)
        if self.args.squeeze:
            x = L.DenseLayer(x,
                             num_units=self.args.squeeze,
                             name='h0' + suf,
                             num_leading_axes=2,
                             W=HeNormal('relu'))  # (100, 26, 256)

        x = L.flatten(x)  # (100, 26*256)

        h1 = L.DenseLayer(x,
                          num_units=self.args.nh1,
                          name='h1' + suf,
                          W=HeNormal('relu'))  # (100, 512)

        h1 = L.dropout(h1, self.args.dropout)

        taggers = {}
        if self.args.aux_tagger:
            hids = [h1]
            for name in self.args.target_feats:
                hid = L.DenseLayer(h1,
                                   256,
                                   name='hid-%s%s' % (name, suf),
                                   W=HeNormal('relu'))  # (100, 512)
                hids.append(hid)
                hid = L.dropout(hid, self.args.dropout)
                # h1 = L.dropout(h1, self.args.dropout)
                taggers[name] = L.DenseLayer(hid,
                                             len(self.manager.feats[name].map),
                                             name='tagger-%s' % name,
                                             W=HeNormal(),
                                             nonlinearity=softmax)  # (100, 25)
            h1 = L.concat(hids, axis=1)

        h2 = L.DenseLayer(h1,
                          num_units=self.args.nh2,
                          name='h2' + suf,
                          W=HeNormal('relu'))  # (100, 256)

        h2 = L.dropout(h2, self.args.dropout)
        h3y = L.DenseLayer(h2,
                           num_units=self.args.nh3,
                           name='h3y' + suf,
                           W=HeNormal(),
                           nonlinearity=softmax)  # (100, 4) num of actions
        h3s = L.concat(
            [h2, h3y], axis=1
        )  # (100, 256+4+4), this way shouldn't output <UNK> if its not SHIFT
        h3z = L.DenseLayer(h2,
                           num_units=self.args.size['label'],
                           name='h3z' + suf,
                           W=HeNormal(),
                           nonlinearity=softmax)  # (100, 25) number of labels

        if avg:
            set_all_zero([h3y, h3z] + taggers.values())

        return h3y, h3z, taggers
Exemplo n.º 23
0
def test_memory(game_title='SpaceInvaders-v0',
                        n_parallel_games=3,
                        replay_seq_len=2,
                        ):
    """
    :param game_title: name of atari game in Gym
    :param n_parallel_games: how many games we run in parallel
    :param replay_seq_len: how long is one replay session from a batch
    """

    atari = gym.make(game_title)
    atari.reset()

    # Game Parameters
    n_actions = atari.action_space.n
    observation_shape = (None,) + atari.observation_space.shape
    action_names = atari.get_action_meanings()
    del atari
    # ##### Agent observations

    # image observation at current tick goes here
    observation_layer = InputLayer(observation_shape, name="images input")

    # reshape to [batch, color, x, y] to allow for convolutional layers to work correctly
    observation_reshape = DimshuffleLayer(observation_layer, (0, 3, 1, 2))

    # Agent memory states
    
    memory_dict = OrderedDict([])
    
    
    ###Window
    window_size = 3

    # prev state input
    prev_window = InputLayer((None, window_size) + tuple(observation_reshape.output_shape[1:]),
                             name="previous window state")
    

    # our window
    window = WindowAugmentation(observation_reshape,
                                prev_window,
                                name="new window state")
    
    # pixel-wise maximum over the temporal window (to avoid flickering)
    window_max = ExpressionLayer(window,
                                 lambda a: a.max(axis=1),
                                 output_shape=(None,) + window.output_shape[2:])

    
    memory_dict[window] = prev_window
    
    ###Stack
    #prev stack
    stack_w,stack_h = 4, 5
    stack_inputs = DenseLayer(observation_reshape,stack_w,name="prev_stack")
    stack_controls = DenseLayer(observation_reshape,3,
                              nonlinearity=lasagne.nonlinearities.softmax,
                              name="prev_stack")
    prev_stack = InputLayer((None,stack_h,stack_w),
                             name="previous stack state")
    stack = StackAugmentation(stack_inputs,prev_stack, stack_controls)
    memory_dict[stack] = prev_stack
    
    stack_top = lasagne.layers.SliceLayer(stack,0,1)

    
    ###RNN preset
    
    prev_rnn = InputLayer((None,16),
                             name="previous RNN state")
    new_rnn = RNNCell(prev_rnn,observation_reshape)
    memory_dict[new_rnn] = prev_rnn
    
    ###GRU preset
    prev_gru = InputLayer((None,16),
                             name="previous GRUcell state")
    new_gru = GRUCell(prev_gru,observation_reshape)
    memory_dict[new_gru] = prev_gru
    
    ###GRUmemorylayer
    prev_gru1 = InputLayer((None,15),
                             name="previous GRUcell state")
    new_gru1 = GRUMemoryLayer(15,observation_reshape,prev_gru1)
    memory_dict[new_gru1] = prev_gru1
    
    #LSTM with peepholes
    prev_lstm0_cell = InputLayer((None,13),
                             name="previous LSTMCell hidden state [with peepholes]")
    
    prev_lstm0_out = InputLayer((None,13),
                             name="previous LSTMCell output state [with peepholes]")

    new_lstm0_cell,new_lstm0_out = LSTMCell(prev_lstm0_cell,prev_lstm0_out,
                                            input_or_inputs = observation_reshape,
                                            peepholes=True,name="newLSTM1 [with peepholes]")
    
    memory_dict[new_lstm0_cell] = prev_lstm0_cell
    memory_dict[new_lstm0_out] = prev_lstm0_out


    #LSTM without peepholes
    prev_lstm1_cell = InputLayer((None,14),
                             name="previous LSTMCell hidden state [no peepholes]")
    
    prev_lstm1_out = InputLayer((None,14),
                             name="previous LSTMCell output state [no peepholes]")

    new_lstm1_cell,new_lstm1_out = LSTMCell(prev_lstm1_cell,prev_lstm1_out,
                                            input_or_inputs = observation_reshape,
                                            peepholes=False,name="newLSTM1 [no peepholes]")
    
    memory_dict[new_lstm1_cell] = prev_lstm1_cell
    memory_dict[new_lstm1_out] = prev_lstm1_out
    
    ##concat everything
    
    for i in [flatten(window_max),stack_top,new_rnn,new_gru,new_gru1]:
        print(i.output_shape)
    all_memory = concat([flatten(window_max),stack_top,new_rnn,new_gru,new_gru1,new_lstm0_out,new_lstm1_out,])
    
    
    

    # ##### Neural network body
    # you may use any other lasagne layers, including convolutions, batch_norms, maxout, etc


    # a simple lasagne network (try replacing with any other lasagne network and see what works best)
    nn = DenseLayer(all_memory, num_units=50, name='dense0')

    # Agent policy and action picking
    q_eval = DenseLayer(nn,
                        num_units=n_actions,
                        nonlinearity=lasagne.nonlinearities.linear,
                        name="QEvaluator")

    # resolver
    resolver = EpsilonGreedyResolver(q_eval, epsilon=0.1, name="resolver")

    # agent
    agent = Agent(observation_layer,
                  memory_dict,
                  q_eval, resolver)

    # Since it's a single lasagne network, one can get it's weights, output, etc
    weights = lasagne.layers.get_all_params(resolver, trainable=True)

    # Agent step function
    print('compiling react')
    applier_fun = agent.get_react_function()

    # a nice pythonic interface
    def step(observation, prev_memories='zeros', batch_size=n_parallel_games):
        """ returns actions and new states given observation and prev state
        Prev state in default setup should be [prev window,]"""
        # default to zeros
        if prev_memories == 'zeros':
            prev_memories = [np.zeros((batch_size,) + tuple(mem.output_shape[1:]),
                                      dtype='float32')
                             for mem in agent.agent_states]
        res = applier_fun(np.array(observation), *prev_memories)
        action = res[0]
        memories = res[1:]
        return action, memories

    # # Create and manage a pool of atari sessions to play with

    pool = GamePool(game_title, n_parallel_games)

    observation_log, action_log, reward_log, _, _, _ = pool.interact(step, 50)

    print(np.array(action_names)[np.array(action_log)[:3, :5]])

    # # experience replay pool
    # Create an environment with all default parameters
    env = SessionPoolEnvironment(observations=observation_layer,
                                 actions=resolver,
                                 agent_memories=agent.agent_states)

    def update_pool(env, pool, n_steps=100):
        """ a function that creates new sessions and ads them into the pool
        throwing the old ones away entirely for simplicity"""

        preceding_memory_states = list(pool.prev_memory_states)

        # get interaction sessions
        observation_tensor, action_tensor, reward_tensor, _, is_alive_tensor, _ = pool.interact(step, n_steps=n_steps)

        # load them into experience replay environment
        env.load_sessions(observation_tensor, action_tensor, reward_tensor, is_alive_tensor, preceding_memory_states)

    # load first  sessions
    update_pool(env, pool, replay_seq_len)

    # A more sophisticated way of training is to store a large pool of sessions and train on random batches of them.
    # ### Training via experience replay

    # get agent's Q-values obtained via experience replay
    _env_states, _observations, _memories, _imagined_actions, q_values_sequence = agent.get_sessions(
        env,
        session_length=replay_seq_len,
        batch_size=env.batch_size,
        optimize_experience_replay=True,
    )

    # Evaluating loss function

    scaled_reward_seq = env.rewards
    # For SpaceInvaders, however, not scaling rewards is at least working


    elwise_mse_loss = qlearning.get_elementwise_objective(q_values_sequence,
                                                          env.actions[0],
                                                          scaled_reward_seq,
                                                          env.is_alive,
                                                          gamma_or_gammas=0.99, )

    # compute mean over "alive" fragments
    mse_loss = elwise_mse_loss.sum() / env.is_alive.sum()

    # regularize network weights
    reg_l2 = regularize_network_params(resolver, l2) * 10 ** -4

    loss = mse_loss + reg_l2

    # Compute weight updates
    updates = lasagne.updates.adadelta(loss, weights, learning_rate=0.01)

    # mean session reward
    mean_session_reward = env.rewards.sum(axis=1).mean()

    # # Compile train and evaluation functions

    print('compiling')
    train_fun = theano.function([], [loss, mean_session_reward], updates=updates)
    evaluation_fun = theano.function([], [loss, mse_loss, reg_l2, mean_session_reward])
    print("I've compiled!")

    # # Training loop

    for epoch_counter in range(10):
        update_pool(env, pool, replay_seq_len)
        loss, avg_reward = train_fun()
        full_loss, q_loss, l2_penalty, avg_reward_current = evaluation_fun()

        print("epoch %i,loss %.5f, rewards: %.5f " % (
            epoch_counter, full_loss, avg_reward_current))
        print("rec %.3f reg %.3f" % (q_loss, l2_penalty))
Exemplo n.º 24
0
    def additional_layer(self, idx_layer, emb_layer, avg=False):
        suf = '_avg' if avg else ''
        if self.name == 'char':
            if self.args.char_model == 'cnn':
                lds = L.dimshuffle(emb_layer,
                                   (0, 3, 1, 2))  # (100, 16, 26, 32)
                ls = []
                for n in self.args.ngrams:
                    lconv = L.Conv2DLayer(
                        lds,
                        self.args.conv_dim,
                        (1, n),
                        untie_biases=False,
                        # W=HeNormal('relu') if not avg else Constant(),
                        W=GlorotNormal('relu') if not avg else Constant(),
                        name='conv_%d' % n + suf)  # (100, 64/4, 26, 32-n+1)

                    lpool = L.MaxPool2DLayer(lconv,
                                             (1, self.args.max_word_len - n +
                                              1))  # (100, 64, 26, 1)
                    lpool = L.flatten(lpool, outdim=3)  # (100, 16, 26)
                    lpool = L.dimshuffle(lpool, (0, 2, 1))  # (100, 26, 16)
                    ls.append(lpool)
                xc = L.concat(ls, axis=2, name='echar_concat')  # (100, 26, 64)
                # additional
                # xc = L.DenseLayer(xc, self.args.embw_dim, nonlinearity=None, name='echar_affine', num_leading_axes=2,
                # W=HeNormal() if not avg else Constant()) # (100, 26, 100)
                return xc
            elif self.args.char_model == 'lstm':
                ml = L.ExpressionLayer(
                    idx_layer,
                    lambda x: T.neq(x, 0))  # mask layer (100, 24, 32)
                ml = L.reshape(ml, (-1, self.args.max_word_len))  # (1500, 32)

                gate_params = L.recurrent.Gate(W_in=Orthogonal(),
                                               W_hid=Orthogonal())
                cell_params = L.recurrent.Gate(W_in=Orthogonal(),
                                               W_hid=Orthogonal(),
                                               W_cell=None,
                                               nonlinearity=tanh)

                lstm_in = L.reshape(
                    emb_layer,
                    (-1, self.args.max_word_len,
                     self.config['char']['emb_dim']))  # (1500, 32, 16)
                lstm_f = L.LSTMLayer(
                    lstm_in,
                    32,
                    mask_input=ml,
                    grad_clipping=10.,
                    learn_init=True,
                    peepholes=False,
                    precompute_input=True,
                    ingate=gate_params,
                    forgetgate=gate_params,
                    cell=cell_params,
                    outgate=gate_params,
                    # unroll_scan=True,
                    only_return_final=True,
                    name='forward' + suf)  # (1500, 32)
                lstm_b = L.LSTMLayer(
                    lstm_in,
                    32,
                    mask_input=ml,
                    grad_clipping=10.,
                    learn_init=True,
                    peepholes=False,
                    precompute_input=True,
                    ingate=gate_params,
                    forgetgate=gate_params,
                    cell=cell_params,
                    outgate=gate_params,
                    # unroll_scan=True,
                    only_return_final=True,
                    backwards=True,
                    name='backward' + suf)  # (1500, 32)
                remove_reg(lstm_f)
                remove_reg(lstm_b)
                if avg:
                    set_zero(lstm_f)
                    set_zero(lstm_b)
                xc = L.concat([lstm_f, lstm_b], axis=1)  # (1500, 64)
                if self.args.lstm_tagger:
                    xc = L.reshape(
                        xc, (-1, self.args.max_sent_len, 64))  # (100, 161, 64)
                elif self.args.trans_tagger:
                    xc = L.reshape(
                        xc, (-1, self.args.window_size, 64))  # (100, 15, 64)
                else:
                    xc = L.reshape(xc, (-1, 26, 64))  # (100, 26, 64)
                return xc

        elif self.name == 'morph':
            # idx (100, 26/161, 16)  emb (100, 26/161, 16, 32)
            if self.args.morph_model == 'max':
                xm = L.MaxPool2DLayer(
                    emb_layer,
                    (self.args.max_morph_len, 1))  # (100, 26/161, 1, 32)
                # xm = L.reshape(xm, (-1, 26, self.config['morph']['emb_dim'])) # (100, 26/161, 32)
                xm = L.flatten(xm, outdim=3)  # (100, 26/161, 32)
                # xm = L.ExpressionLayer(emb_layer, lambda x: T.max(x, 2))
            elif self.args.morph_model == 'avg':
                mask = L.ExpressionLayer(
                    idx_layer, lambda x: T.neq(x, 0))  # (100, 26, 16)
                mask = L.dimshuffle(mask, (0, 1, 2, 'x'))  # (100, 26, 16, 1)
                mask = L.ExpressionLayer(mask, lambda x: T.extra_ops.repeat(
                    x, self.config['morph']['emb_dim'], 3))  # (100, 26, 16, 1)
                xm = L.ElemwiseMergeLayer([
                    emb_layer, mask
                ], lambda x, m: T.sum(x * m, 2) / T.sum(m, 2))  # (100, 26, 32)
                # xm = L.reshape(xm, (-1, self.args.feat_shape, self.config['morph']['emb_dim'])) # (100, 26, 32)
            return xm
        else:
            return emb_layer
Exemplo n.º 25
0
    def build_critic(self,
                     critic_input_var,
                     condition_var,
                     vocoder,
                     ctxsize,
                     nonlinearity=lasagne.nonlinearities.very_leaky_rectify,
                     postlayers_nb=6,
                     use_LSweighting=True,
                     LSWGANtransfreqcutoff=4000,
                     LSWGANtranscoef=1.0 / 8.0,
                     use_WGAN_incnoisefeature=False):

        useLRN = False  # TODO

        layer_critic = ll.InputLayer(shape=(None, None,
                                            vocoder.featuressize()),
                                     input_var=critic_input_var,
                                     name='input')

        winlen = int(0.5 * self._windur / 0.005) * 2 + 1

        layerstoconcats = []

        # Amplitude spectrum
        layer = ll.SliceLayer(layer_critic,
                              indices=slice(
                                  vocoder.f0size(),
                                  vocoder.f0size() + vocoder.specsize()),
                              axis=2,
                              name='spec_slice')  # Assumed feature order

        if use_LSweighting:  # Using weighted WGAN+LS
            print(
                'WGAN Weighted LS - critic - SPEC (trans cutoff {}Hz)'.format(
                    LSWGANtransfreqcutoff))
            # wganls_spec_weights_ = nonlin_sigmoidparm(np.arange(vocoder.specsize(), dtype=theano.config.floatX),  int(LSWGANtransfreqcutoff*vocoder.specsize()), LSWGANtranscoef)
            wganls_spec_weights_ = nonlin_sigmoidparm(
                np.arange(vocoder.specsize(), dtype=theano.config.floatX),
                sp.freq2fwspecidx(LSWGANtransfreqcutoff, vocoder.fs,
                                  vocoder.specsize()), LSWGANtranscoef)
            wganls_weights = theano.shared(
                value=np.asarray(wganls_spec_weights_),
                name='wganls_spec_weights_')
            layer = CstMulLayer(layer,
                                cstW=wganls_weights,
                                name='cstdot_wganls_weights')

        layer = ll.dimshuffle(layer, [0, 'x', 1, 2], name='spec_dimshuffle')
        for layi in xrange(self._nbcnnlayers):
            layerstr = 'spec_l' + str(1 + layi) + '_GC{}x{}x{}'.format(
                self._nbfilters, winlen, self._spec_freqlen)
            # strides>1 make the first two Conv layers pyramidal. Increase patches' effects here and there, bad.
            layer = layer_GatedConv2DLayer(layer,
                                           self._nbfilters,
                                           [winlen, self._spec_freqlen],
                                           pad='same',
                                           nonlinearity=nonlinearity,
                                           name=layerstr)
            if useLRN: layer = ll.LocalResponseNormalization2DLayer(layer)
        layer = ll.dimshuffle(layer, [0, 2, 3, 1], name='spec_dimshuffle')
        layer_spec = ll.flatten(layer, outdim=3, name='spec_flatten')
        layerstoconcats.append(layer_spec)

        if use_WGAN_incnoisefeature and vocoder.noisesize(
        ) > 0:  # Add noise in critic
            layer = ll.SliceLayer(layer_critic,
                                  indices=slice(
                                      vocoder.f0size() + vocoder.specsize(),
                                      vocoder.f0size() + vocoder.specsize() +
                                      vocoder.noisesize()),
                                  axis=2,
                                  name='nm_slice')

            if use_LSweighting:  # Using weighted WGAN+LS
                print('WGAN Weighted LS - critic - NM (trans cutoff {}Hz)'.
                      format(LSWGANtransfreqcutoff))
                # wganls_spec_weights_ = nonlin_sigmoidparm(np.arange(vocoder.noisesize(), dtype=theano.config.floatX),  int(LSWGANtransfreqcutoff*vocoder.noisesize()), LSWGANtranscoef)
                wganls_spec_weights_ = nonlin_sigmoidparm(
                    np.arange(vocoder.noisesize(), dtype=theano.config.floatX),
                    sp.freq2fwspecidx(LSWGANtransfreqcutoff, vocoder.fs,
                                      vocoder.noisesize()), LSWGANtranscoef)
                wganls_weights = theano.shared(
                    value=np.asarray(wganls_spec_weights_),
                    name='wganls_spec_weights_')
                layer = CstMulLayer(layer,
                                    cstW=wganls_weights,
                                    name='cstdot_wganls_weights')

            layer = ll.dimshuffle(layer, [0, 'x', 1, 2], name='nm_dimshuffle')
            for layi in xrange(np.max(
                (1, int(np.ceil(self._nbcnnlayers / 2))))):
                layerstr = 'nm_l' + str(1 + layi) + '_GC{}x{}x{}'.format(
                    self._nbfilters, winlen, self._noise_freqlen)
                layer = layer_GatedConv2DLayer(layer,
                                               self._nbfilters,
                                               [winlen, self._noise_freqlen],
                                               pad='same',
                                               nonlinearity=nonlinearity,
                                               name=layerstr)
                if useLRN: layer = ll.LocalResponseNormalization2DLayer(layer)
            layer = ll.dimshuffle(layer, [0, 2, 3, 1], name='nm_dimshuffle')
            layer_bndnm = ll.flatten(layer, outdim=3, name='nm_flatten')
            layerstoconcats.append(layer_bndnm)

        # Add the contexts
        layer_ctx_input = ll.InputLayer(shape=(None, None, ctxsize),
                                        input_var=condition_var,
                                        name='ctx_input')
        layer_ctx = layer_context(layer_ctx_input,
                                  ctx_nblayers=self._ctx_nblayers,
                                  ctx_nbfilters=self._ctx_nbfilters,
                                  ctx_winlen=self._ctx_winlen,
                                  hiddensize=self._hiddensize,
                                  nonlinearity=nonlinearity,
                                  bn_axes=None,
                                  bn_cnn_axes=None,
                                  critic=True,
                                  useLRN=useLRN)
        layerstoconcats.append(layer_ctx)

        # Concatenate the features analysis with the contexts...
        layer = ll.ConcatLayer(layerstoconcats,
                               axis=2,
                               name='ctx_features.concat')

        # ... and finalize with a common FC network
        for layi in xrange(postlayers_nb):
            layerstr = 'post.l' + str(1 + layi) + '_FC' + str(self._hiddensize)
            layer = ll.DenseLayer(layer,
                                  self._hiddensize,
                                  nonlinearity=nonlinearity,
                                  num_leading_axes=2,
                                  name=layerstr)

        # output layer (linear)
        layer = ll.DenseLayer(layer,
                              1,
                              nonlinearity=None,
                              num_leading_axes=2,
                              name='projection')  # No nonlin for this output
        return [layer, layer_critic, layer_ctx_input]
Exemplo n.º 26
0
    def __init__(self,
                 insize,
                 vocoder,
                 hiddensize=256,
                 nonlinearity=lasagne.nonlinearities.very_leaky_rectify,
                 ctx_nblayers=1,
                 ctx_nbfilters=2,
                 ctx_winlen=21,
                 nbcnnlayers=8,
                 nbfilters=16,
                 spec_freqlen=5,
                 noise_freqlen=5,
                 windur=0.025,
                 bn_axes=None,
                 noisesize=100):
        if bn_axes is None: bn_axes = [0, 1]
        model.Model.__init__(self, insize, vocoder, hiddensize)

        self._ctx_nblayers = ctx_nblayers
        self._ctx_nbfilters = ctx_nbfilters
        self._ctx_winlen = ctx_winlen

        self._nbcnnlayers = nbcnnlayers
        self._nbfilters = nbfilters
        self._spec_freqlen = spec_freqlen
        self._noise_freqlen = noise_freqlen
        self._windur = windur

        winlen = int(0.5 * self._windur / 0.005) * 2 + 1

        layer_ctx_input = ll.InputLayer(shape=(None, None, insize),
                                        input_var=self._input_values,
                                        name='ctx.input')

        layer_noise_input = UniformNoiseLayer(layer_ctx_input,
                                              noisesize,
                                              name='noise.input')
        layer_ctx_input = ll.ConcatLayer(
            (layer_ctx_input, layer_noise_input), axis=2,
            name='concat.input')  # TODO Put the noise later on

        self._layer_ctx = layer_context(layer_ctx_input,
                                        ctx_nblayers=self._ctx_nblayers,
                                        ctx_nbfilters=self._ctx_nbfilters,
                                        ctx_winlen=self._ctx_winlen,
                                        hiddensize=self._hiddensize,
                                        nonlinearity=nonlinearity,
                                        bn_axes=[0, 1],
                                        bn_cnn_axes=[0, 2, 3])

        layers_toconcat = []

        if vocoder.f0size() > 0:
            # F0 - BLSTM layer
            layer_f0 = self._layer_ctx
            grad_clipping = 50
            for layi in xrange(1):
                layerstr = 'f0_l' + str(1 + layi) + '_BLSTM{}'.format(
                    self._hiddensize)
                fwd = models_basic.layer_LSTM(layer_f0,
                                              self._hiddensize,
                                              nonlinearity,
                                              backwards=False,
                                              grad_clipping=grad_clipping,
                                              name=layerstr + '.fwd')
                bck = models_basic.layer_LSTM(layer_f0,
                                              self._hiddensize,
                                              nonlinearity,
                                              backwards=True,
                                              grad_clipping=grad_clipping,
                                              name=layerstr + '.bck')
                layer_f0 = ll.ConcatLayer((fwd, bck),
                                          axis=2,
                                          name=layerstr + '.concat')
                # TODO Replace by CNN ?? It didn't work well, maybe didn't work well with WGAN loss, but f0 is not more on WGAN loss
            layer_f0 = ll.DenseLayer(layer_f0,
                                     num_units=vocoder.f0size(),
                                     nonlinearity=None,
                                     num_leading_axes=2,
                                     name='f0_lout_projection')
            layers_toconcat.append(layer_f0)

        if vocoder.specsize() > 0:
            # Amplitude spectrum - 2D Gated Conv layers
            layer_spec_proj = ll.batch_norm(ll.DenseLayer(
                self._layer_ctx,
                vocoder.specsize(),
                nonlinearity=nonlinearity,
                num_leading_axes=2,
                name='spec_projection'),
                                            axes=bn_axes)
            # layer_spec_proj = ll.DenseLayer(self._layer_ctx, vocoder.specsize(), nonlinearity=None, num_leading_axes=2, name='spec_projection')
            layer_spec = ll.dimshuffle(layer_spec_proj, [0, 'x', 1, 2],
                                       name='spec_dimshuffle')
            for layi in xrange(nbcnnlayers):
                layerstr = 'spec_l' + str(1 + layi) + '_GC{}x{}x{}'.format(
                    self._nbfilters, winlen, self._spec_freqlen)
                layer_spec = ll.batch_norm(
                    layer_GatedConv2DLayer(layer_spec,
                                           self._nbfilters,
                                           [winlen, self._spec_freqlen],
                                           stride=1,
                                           pad='same',
                                           nonlinearity=nonlinearity,
                                           name=layerstr))
            layer_spec = ll.Conv2DLayer(layer_spec,
                                        1, [winlen, self._spec_freqlen],
                                        pad='same',
                                        nonlinearity=None,
                                        name='spec_lout_2DC')
            layer_spec = ll.dimshuffle(layer_spec, [0, 2, 3, 1],
                                       name='spec_dimshuffle')
            layer_spec = ll.flatten(layer_spec, outdim=3, name='spec_flatten')
            # layer_spec = ll.ElemwiseSumLayer([layer_spec, layer_spec_proj], name='skip')
            layers_toconcat.append(layer_spec)

        if vocoder.noisesize() > 0:
            layer_noise = self._layer_ctx
            for layi in xrange(np.max((1, int(np.ceil(nbcnnlayers / 2))))):
                layerstr = 'noise_l' + str(1 +
                                           layi) + '_FC{}'.format(hiddensize)
                layer_noise = ll.DenseLayer(layer_noise,
                                            num_units=hiddensize,
                                            nonlinearity=nonlinearity,
                                            num_leading_axes=2,
                                            name=layerstr)
            if isinstance(vocoder, vocoders.VocoderPML):
                layer_noise = ll.DenseLayer(
                    layer_noise,
                    num_units=vocoder.nm_size,
                    nonlinearity=lasagne.nonlinearities.sigmoid,
                    num_leading_axes=2,
                    name='lo_noise'
                )  # sig is best among nonlin_saturatedsigmoid nonlin_tanh_saturated nonlin_tanh_bysigmoid
            else:
                layer_noise = ll.DenseLayer(layer_noise,
                                            num_units=vocoder.nm_size,
                                            nonlinearity=None,
                                            num_leading_axes=2,
                                            name='lo_noise')
            layers_toconcat.append(layer_noise)

        if vocoder.vuvsize() > 0:
            # VUV - BLSTM layer
            layer_vuv = self._layer_ctx
            grad_clipping = 50
            for layi in xrange(1):
                layerstr = 'vuv_l' + str(1 + layi) + '_BLSTM{}'.format(
                    self._hiddensize)
                fwd = models_basic.layer_LSTM(layer_vuv,
                                              self._hiddensize,
                                              nonlinearity,
                                              backwards=False,
                                              grad_clipping=grad_clipping,
                                              name=layerstr + '.fwd')
                bck = models_basic.layer_LSTM(layer_vuv,
                                              self._hiddensize,
                                              nonlinearity,
                                              backwards=True,
                                              grad_clipping=grad_clipping,
                                              name=layerstr + '.bck')
                layer_vuv = ll.ConcatLayer((fwd, bck),
                                           axis=2,
                                           name=layerstr + '.concat')
            layer_vuv = ll.DenseLayer(layer_vuv,
                                      num_units=vocoder.vuvsize(),
                                      nonlinearity=None,
                                      num_leading_axes=2,
                                      name='vuv_lout_projection')
            layers_toconcat.append(layer_vuv)

        layer = ll.ConcatLayer(layers_toconcat, axis=2, name='lout.concat')

        self.init_finish(
            layer
        )  # Has to be called at the end of the __init__ to print out the architecture, get the trainable params, etc.
Exemplo n.º 27
0
    def create_dadgm_model(self, X, Y, n_dim, n_out, n_chan=1, n_class=10):
        n_cat = 20  # number of categorical distributions
        n_lat = n_class * n_cat  # latent stochastic variables
        n_aux = 10  # number of auxiliary variables
        n_hid = 500  # size of hidden layer in encoder/decoder
        n_in = n_out = n_dim * n_dim * n_chan
        tau = self.tau
        hid_nl = T.nnet.relu
        relu_shift = lambda av: T.nnet.relu(av + 10) - 10

        # create the encoder network
        # - create q(a|x)
        qa_net_in = InputLayer(shape=(None, n_in), input_var=X)
        qa_net = DenseLayer(
            qa_net_in,
            num_units=n_hid,
            W=GlorotNormal('relu'),
            b=Normal(1e-3),
            nonlinearity=hid_nl,
        )
        qa_net_mu = DenseLayer(
            qa_net,
            num_units=n_aux,
            W=GlorotNormal(),
            b=Normal(1e-3),
            nonlinearity=None,
        )
        qa_net_logsigma = DenseLayer(
            qa_net,
            num_units=n_aux,
            W=GlorotNormal(),
            b=Normal(1e-3),
            nonlinearity=relu_shift,
        )
        qa_net_sample = GaussianSampleLayer(qa_net_mu, qa_net_logsigma)
        # - create q(z|a, x)
        qz_net_in = lasagne.layers.InputLayer((None, n_aux))
        qz_net_a = DenseLayer(
            qz_net_in,
            num_units=n_hid,
            nonlinearity=hid_nl,
        )
        qz_net_b = DenseLayer(
            qa_net_in,
            num_units=n_hid,
            nonlinearity=hid_nl,
        )
        qz_net = ElemwiseSumLayer([qz_net_a, qz_net_b])
        qz_net = DenseLayer(qz_net, num_units=n_hid, nonlinearity=hid_nl)
        qz_net_mu = DenseLayer(
            qz_net,
            num_units=n_lat,
            nonlinearity=None,
        )
        qz_net_mu = reshape(qz_net_mu, (-1, n_class))
        qz_net_sample = GumbelSoftmaxSampleLayer(qz_net_mu, tau)
        qz_net_sample = reshape(qz_net_sample, (-1, n_cat, n_class))
        # create the decoder network
        # - create p(x|z)
        px_net_in = lasagne.layers.InputLayer((None, n_cat, n_class))
        # --- rest is created from RBM ---
        # - create p(a|z)
        pa_net = DenseLayer(
            flatten(px_net_in),
            num_units=n_hid,
            W=GlorotNormal('relu'),
            b=Normal(1e-3),
            nonlinearity=hid_nl,
        )
        pa_net_mu = DenseLayer(
            pa_net,
            num_units=n_aux,
            W=GlorotNormal(),
            b=Normal(1e-3),
            nonlinearity=None,
        )
        pa_net_logsigma = DenseLayer(
            pa_net,
            num_units=n_aux,
            W=GlorotNormal(),
            b=Normal(1e-3),
            nonlinearity=relu_shift,
        )
        # save network params
        self.n_cat = n_cat
        self.input_layers = (qa_net_in, qz_net_in, px_net_in)

        return pa_net_mu, pa_net_logsigma, qz_net_mu, \
            qa_net_mu, qa_net_logsigma, qz_net_sample, qa_net_sample,
Exemplo n.º 28
0
    def __init__(self,
                 gate_controllers,
                 channels,
                 gate_nonlinearities=nonlinearities.sigmoid,
                 bias_init=init.Constant(),
                 weight_init=init.Normal(),
                 **kwargs):
        """
        An overly generic interface for one-step gate, stacked gates or gate applier.
        If several channels are given, stacks them for quicker execution. 
        
        gate_controllers - a single layer or a list/tuple of such
            layers that gate depends on (for most RNNs, that's input and previous memory state)
        
        channels - a single layer or integer or a list/tuple of layers/integers
            if a layer, that defines a layer that should be multiplied by the gate output
            if an integer - that defines a number of units of a gate -- and these are the units to be returned
            
        gate_nonlinearities - a single function or a list of such(channel-wise), 
            - defining nonlinearities for gates on corresponding channels
        
        bias_init - an initializer or a list (channel-wise) of initializers for bias(b) parameters 
            - (None, lasagne.init, theano variable or numpy array) 
            - None means no bias
        weight init - an initializer OR a list of initializers for  (channel-wise) 
            - OR a list of lists of initializers (channel, controller) 
            - (lasagne.init, theano variable or numpy array) 
        """

        self.channels = check_list(channels)
        self.gate_controllers = check_list(gate_controllers)

        # check channel types
        for chl in self.channels:
            assert is_layer(chl) or (type(chl) == int)

        # separate layers from non-layers
        self.channel_layers = list(filter(is_layer, self.channels))
        self.channel_ints = [v for v in self.channels if not is_layer(v)]

        # flatten layers to 2 dimensions
        for i in range(len(self.channel_layers)):
            layer = self.channel_layers[i]
            if type(layer) == int:
                continue

            lname = layer.name or ""
            if len(layer.output_shape) != 2:
                warn(
                    "One of the channels (name='%s') has an input dimension of %s and will be flattened."
                    % (lname, layer.output_shape))
                self.channel_layers[i] = flatten(layer, outdim=2, name=lname)
                assert len(self.channel_layers[i].output_shape) == 2

        # flatten layers to 2 dimensions
        for i in range(len(self.gate_controllers)):
            layer = self.gate_controllers[i]
            lname = layer.name or ""
            if len(layer.output_shape) != 2:
                warn(
                    "One of the gate controllers (name='%s') has an input dimension of %s and will be flattened."
                    % (lname, layer.output_shape))
                self.gate_controllers[i] = flatten(layer, outdim=2, name=lname)
                assert len(self.gate_controllers[i].output_shape) == 2

        # initialize merge layer
        incomings = self.channel_layers + self.gate_controllers

        # default name
        kwargs["name"] = kwargs.get("name",
                                    "YetAnother" + self.__class__.__name__)

        output_names = [
            "%s.channel.%i" % (kwargs["name"], i)
            for i in range(len(self.channels))
        ]

        # determine whether or not user defined a fixed batch size
        batch_sizes = [
            chl.output_shape[0] for chl in filter(is_layer, self.channels)
        ]
        batch_size = reduce(lambda a, b: a or b, batch_sizes, None)

        output_shapes = [
            chl.output_shape if is_layer(chl) else (batch_size, chl)
            for chl in self.channels
        ]
        output_shapes = OrderedDict(zip(output_names, output_shapes))

        output_dtypes = [get_layer_dtype(chl) for chl in self.channels]
        output_dtypes = OrderedDict(zip(output_names, output_dtypes))

        super(GateLayer, self).__init__(incomings,
                                        output_shapes=output_shapes,
                                        output_dtypes=output_dtypes,
                                        **kwargs)

        # nonlinearities
        self.gate_nonlinearities = check_list(gate_nonlinearities)
        self.gate_nonlinearities = [(nl if (nl is not None) else (lambda v: v))
                                    for nl in self.gate_nonlinearities]
        # must be either one common nonlinearity or one per channel
        assert len(self.gate_nonlinearities) in (1, len(self.channels))

        if len(self.gate_nonlinearities) == 1:
            self.gate_nonlinearities *= len(self.channels)

        # cast bias init to a list
        bias_init = check_list(bias_init)
        assert len(bias_init) in (1, len(self.channels))
        if len(bias_init) == 1:
            bias_init *= len(self.channels)

        # cast weight init to a list of lists [channel][controller]
        weight_init = check_list(weight_init)
        assert len(weight_init) in (1, len(self.channels))
        if len(weight_init) == 1:
            weight_init *= len(self.channels)

        for i in range(len(self.channels)):
            weight_init[i] = check_list(weight_init[i])
            assert len(weight_init[i]) in (1, len(self.gate_controllers))
            if len(weight_init[i]) == 1:
                weight_init[i] *= len(self.gate_controllers)

        self.gate_b = []  # a list of biases for channels
        self.gate_W = [list() for _ in self.gate_controllers
                       ]  # a list of lists of weights [controller][channel]

        for chl_i, (channel, b_init, channel_w_inits) in enumerate(
                zip(self.channels, bias_init, weight_init)):

            if is_layer(channel):
                channel_name = channel.name or "chl" + str(chl_i)
                channel_n_units = channel.output_shape[1]
            else:
                channel_name = "chl" + str(chl_i)
                channel_n_units = channel

            # add bias
            if b_init is not None:
                self.gate_b.append(
                    self.add_param(spec=b_init,
                                   shape=(channel_n_units, ),
                                   name="b_%s" % (channel_name)))
            else:
                self.gate_b.append(T.zeros((channel_n_units, )))

            # add weights
            for ctrl_i, (controller, w_init) in enumerate(
                    zip(self.gate_controllers, channel_w_inits)):
                ctrl_name = controller.name or "ctrl" + str(ctrl_i)
                # add bias
                self.gate_W[ctrl_i].append(
                    self.add_param(spec=w_init,
                                   shape=(controller.output_shape[1],
                                          channel_n_units),
                                   name="W_%s_%s" % (ctrl_name, channel_name)))

        # a list where i-th element contains weights[i-th_gate_controller] for all outputs stacked
        self.gate_W_stacked = [
            T.concatenate(weights, axis=1) for weights in self.gate_W
        ]
        # a list of biases for the respective outputs stacked
        self.gate_b_stacked = T.concatenate(self.gate_b)
def build_critic(input_var=None, cond_var=None, n_conds=0, arch=0,
                 with_BatchNorm=True, loss_type='wgan'):
    from lasagne.layers import (
        InputLayer, Conv2DLayer, DenseLayer, MaxPool2DLayer, concat,
        dropout, flatten)
    from lasagne.nonlinearities import rectify, LeakyRectify
    from lasagne.init import GlorotUniform  # Normal
    lrelu = LeakyRectify(0.2)
    layer = InputLayer(
        shape=(None, 1, 128, 128), input_var=input_var, name='d_in_data')
    # init = Normal(0.02, 0.0)
    init = GlorotUniform()

    if cond_var:
        # class: from data or from generator input
        layer_cond = InputLayer(
            shape=(None, n_conds), input_var=cond_var, name='d_in_condition')
        layer_cond = BatchNorm(DenseLayer(
            layer_cond, 1024, W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
    if arch == 'dcgan':
        # DCGAN inspired
        layer = BatchNorm(Conv2DLayer(
            layer, 32, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 64, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 128, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 256, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 512, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
    elif arch == 'cont-enc':
        # convolution layers
        layer = BatchNorm(Conv2DLayer(
            layer, 64, 4, stride=2, pad=1, W=init, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 64, 4, stride=2, pad=1, W=init, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 128, 4, stride=2, pad=1, W=init, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 256, 4, stride=2, pad=1, W=init, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 512, 4, stride=2, pad=1, W=init, nonlinearity=lrelu),
            with_BatchNorm)
    elif arch == 'mnist':
        # Jan Schluechter's MNIST discriminator
        # convolution layers
        layer = BatchNorm(Conv2DLayer(
            layer, 128, 5, stride=2, pad='same', W=init, b=None,
            nonlinearity=lrelu), with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 128, 5, stride=2, pad='same', W=init, b=None,
            nonlinearity=lrelu), with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 128, 5, stride=2, pad='same', W=init, b=None,
            nonlinearity=lrelu), with_BatchNorm)
        # layer = BatchNorm(Conv2DLayer(
        #     layer, 128, 5, stride=2, pad='same', W=init, b=None,
        #      nonlinearity=lrelu), with_BatchNorm)
        # fully-connected layer
        # layer = BatchNorm(DenseLayer(
        #     layer, 1024, W=init, b=None, nonlinearity=lrelu), with_BatchNorm)
    elif arch == 'lsgan':
        layer = batch_norm(Conv2DLayer(
            layer, 256, 5, stride=2, pad='same', nonlinearity=lrelu))
        layer = batch_norm(Conv2DLayer(
            layer, 256, 5, stride=2, pad='same', nonlinearity=lrelu))
        layer = batch_norm(Conv2DLayer(
            layer, 256, 5, stride=2, pad='same', nonlinearity=lrelu))
    elif arch == 'crepe':
        # CREPE
        # form words from sequence of characters
        layer = BatchNorm(Conv2DLayer(
            layer, 1024, (128, 7), W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
        layer = MaxPool2DLayer(layer, (1, 3))
        # temporal convolution, 7-gram
        layer = BatchNorm(Conv2DLayer(
            layer, 512, (1, 7), W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
        layer = MaxPool2DLayer(layer, (1, 3))
        # temporal convolution, 3-gram
        layer = BatchNorm(Conv2DLayer(
            layer, 256, (1, 3), W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 256, (1, 3), W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 256, (1, 3), W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 256, (1, 3), W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
        layer = flatten(layer)
        # fully-connected layers
        layer = dropout(DenseLayer(
            layer, 1024, W=init, b=None, nonlinearity=rectify))
        layer = dropout(DenseLayer(
            layer, 1024, W=init, b=None, nonlinearity=rectify))
    else:
        raise Exception("Model architecture {} is not supported".format(arch))
        # output layer (linear and without bias)
    if cond_var is not None:
        layer = DenseLayer(layer, 1024, nonlinearity=lrelu, b=None)
        layer = concat([layer, layer_cond])

    layer = DenseLayer(layer, 1, b=None, nonlinearity=None)
    print("Critic output:", layer.output_shape)
    return layer
Exemplo n.º 30
0
    def __init__(self,
                 num_units,
                 observation_input,
                 prev_state_input,
                 resetgate=Gate(W_cell=None),
                 updategate=Gate(W_cell=None),
                 hidden_update=Gate(W_cell=None, nonlinearity=nonlinearities.tanh),

                 grad_clipping=5.,
                 **kwargs):

        assert len(prev_state_input.output_shape) == 2

        if len(observation_input.output_shape) != 2:
            observation_input = flatten(observation_input, outdim=2)

        assert len(observation_input.output_shape) == 2

        # default name
        if "name" not in kwargs:
            kwargs["name"] = "YetAnother" + self.__class__.__name__

        self.num_units = num_units

        super(GRUMemoryLayer, self).__init__([prev_state_input, observation_input], **kwargs)
        self.grad_clipping = grad_clipping

        # Retrieve the dimensionality of the incoming layer
        last_state_shape, observation_shape = self.input_shapes

        # Input dimensionality is the output dimensionality of the input layer
        last_num_units = np.prod(last_state_shape[1:])
        inp_num_inputs = np.prod(observation_shape[1:])

        # hidden shapes must match
        assert last_num_units == self.num_units

        def add_gate_params(gate, gate_name):
            """ Convenience function for adding layer parameters from a Gate
            instance. """
            return (self.add_param(gate.W_in, (inp_num_inputs, num_units),
                                   name="W_in_to_{}".format(gate_name)),
                    self.add_param(gate.W_hid, (num_units, num_units),
                                   name="W_hid_to_{}".format(gate_name)),
                    self.add_param(gate.b, (num_units,),
                                   name="b_{}".format(gate_name),
                                   regularizable=False),
                    gate.nonlinearity)

        # Add in all parameters from gates
        (self.W_in_to_updategate, self.W_hid_to_updategate, self.b_updategate,
         self.nonlinearity_updategate) = add_gate_params(updategate,
                                                         'updategate')
        (self.W_in_to_resetgate, self.W_hid_to_resetgate, self.b_resetgate,
         self.nonlinearity_resetgate) = add_gate_params(resetgate, 'resetgate')

        (self.W_in_to_hidden_update, self.W_hid_to_hidden_update,
         self.b_hidden_update, self.nonlinearity_hid) = add_gate_params(
            hidden_update, 'hidden_update')

        # Stack input weight matrices into a (num_inputs, 3*num_units)
        # matrix, which speeds up computation
        self.W_in_stacked = T.concatenate(
            [self.W_in_to_resetgate, self.W_in_to_updategate,
             self.W_in_to_hidden_update], axis=1)

        # Same for hidden weight matrices
        self.W_hid_stacked = T.concatenate(
            [self.W_hid_to_resetgate, self.W_hid_to_updategate,
             self.W_hid_to_hidden_update], axis=1)

        # Stack gate biases into a (3*num_units) vector
        self.b_stacked = T.concatenate(
            [self.b_resetgate, self.b_updategate,
             self.b_hidden_update], axis=0)
Exemplo n.º 31
0
    def __init__(self,
                 num_units,
                 observation_input,
                 prev_state_input,
                 resetgate=Gate(W_cell=None),
                 updategate=Gate(W_cell=None),
                 hidden_update=Gate(W_cell=None,
                                    nonlinearity=nonlinearities.tanh),
                 grad_clipping=5.,
                 **kwargs):

        assert len(prev_state_input.output_shape) == 2

        if len(observation_input.output_shape) != 2:
            observation_input = flatten(observation_input, outdim=2)

        assert len(observation_input.output_shape) == 2

        # default name
        if "name" not in kwargs:
            kwargs["name"] = "YetAnother" + self.__class__.__name__

        self.num_units = num_units

        super(GRUMemoryLayer,
              self).__init__([prev_state_input, observation_input], **kwargs)
        self.grad_clipping = grad_clipping

        # Retrieve the dimensionality of the incoming layer
        last_state_shape, observation_shape = self.input_shapes

        # Input dimensionality is the output dimensionality of the input layer
        last_num_units = np.prod(last_state_shape[1:])
        inp_num_inputs = np.prod(observation_shape[1:])

        # hidden shapes must match
        assert last_num_units == self.num_units

        def add_gate_params(gate, gate_name):
            """ Convenience function for adding layer parameters from a Gate
            instance. """
            return (self.add_param(gate.W_in, (inp_num_inputs, num_units),
                                   name="W_in_to_{}".format(gate_name)),
                    self.add_param(gate.W_hid, (num_units, num_units),
                                   name="W_hid_to_{}".format(gate_name)),
                    self.add_param(gate.b, (num_units, ),
                                   name="b_{}".format(gate_name),
                                   regularizable=False), gate.nonlinearity)

        # Add in all parameters from gates
        (self.W_in_to_updategate, self.W_hid_to_updategate, self.b_updategate,
         self.nonlinearity_updategate) = add_gate_params(
             updategate, 'updategate')
        (self.W_in_to_resetgate, self.W_hid_to_resetgate, self.b_resetgate,
         self.nonlinearity_resetgate) = add_gate_params(
             resetgate, 'resetgate')

        (self.W_in_to_hidden_update, self.W_hid_to_hidden_update,
         self.b_hidden_update,
         self.nonlinearity_hid) = add_gate_params(hidden_update,
                                                  'hidden_update')

        # Stack input weight matrices into a (num_inputs, 3*num_units)
        # matrix, which speeds up computation
        self.W_in_stacked = T.concatenate([
            self.W_in_to_resetgate, self.W_in_to_updategate,
            self.W_in_to_hidden_update
        ],
                                          axis=1)

        # Same for hidden weight matrices
        self.W_hid_stacked = T.concatenate([
            self.W_hid_to_resetgate, self.W_hid_to_updategate,
            self.W_hid_to_hidden_update
        ],
                                           axis=1)

        # Stack gate biases into a (3*num_units) vector
        self.b_stacked = T.concatenate(
            [self.b_resetgate, self.b_updategate, self.b_hidden_update],
            axis=0)
Exemplo n.º 32
0
    def __init__(self,
                 num_units,
                 observation_input,
                 prev_state_input,
                 resetgate=Gate(W_cell=None),
                 updategate=Gate(W_cell=None),
                 hidden_update=Gate(W_cell=None,
                                    nonlinearity=nonlinearities.tanh),
                 bias_init=init.Constant(),
                 weight_init=init.Normal(),
                 grad_clipping=5.,
                 **kwargs):
        """
        a Gated Recurrent Unit implementation of a memory layer.

        Unlike lasagne.layers.GRUlayer, this layer does not produce the whole time series at a time, 
        but yields it's next state given last state and observation one tick at a time.
        This is done to simplify usage within external loops along with other MDP components.
    
        parameters: 
            - num_units: amount of units in the hidden state. 
                - If you are using prev_state_input, put anything here.
            - observation_input - a lasagne layer that provides
            float[batch_id, input_id]: input observation at this tick
            -- as an output.
            - prev_state_input [optional] - a lasagne layer that generates the previous batch
            of hidden states (in case you wish several layers to handle the same sequence)
            - concatenate_input: if true, appends observation_input of current tick to own activation at this tick

        instance that
        - generates first (a-priori) agent state
        - determines new agent state given previous agent state and an observation|previous input

        """
        assert len(prev_state_input.output_shape) == 2

        if len(observation_input.output_shape) != 2:
            observation_input = flatten(observation_input, outdim=2)

        assert len(observation_input.output_shape) == 2

        # default name
        if "name" not in kwargs:
            kwargs["name"] = "YetAnother" + self.__class__.__name__

        self.num_units = num_units

        super(GRUMemoryLayer,
              self).__init__([prev_state_input, observation_input], **kwargs)
        self.grad_clipping = grad_clipping

        # Retrieve the dimensionality of the incoming layer
        last_state_shape, observation_shape = self.input_shapes

        # Input dimensionality is the output dimensionality of the input layer
        last_num_units = np.prod(last_state_shape[1:])
        inp_num_inputs = np.prod(observation_shape[1:])

        # hidden shapes must match
        assert last_num_units == self.num_units

        def add_gate_params(gate, gate_name):
            """ Convenience function for adding layer parameters from a Gate
            instance. """
            return (self.add_param(gate.W_in, (inp_num_inputs, num_units),
                                   name="W_in_to_{}".format(gate_name)),
                    self.add_param(gate.W_hid, (num_units, num_units),
                                   name="W_hid_to_{}".format(gate_name)),
                    self.add_param(gate.b, (num_units, ),
                                   name="b_{}".format(gate_name),
                                   regularizable=False), gate.nonlinearity)

        # Add in all parameters from gates
        (self.W_in_to_updategate, self.W_hid_to_updategate, self.b_updategate,
         self.nonlinearity_updategate) = add_gate_params(
             updategate, 'updategate')
        (self.W_in_to_resetgate, self.W_hid_to_resetgate, self.b_resetgate,
         self.nonlinearity_resetgate) = add_gate_params(
             resetgate, 'resetgate')

        (self.W_in_to_hidden_update, self.W_hid_to_hidden_update,
         self.b_hidden_update,
         self.nonlinearity_hid) = add_gate_params(hidden_update,
                                                  'hidden_update')

        # Stack input weight matrices into a (num_inputs, 3*num_units)
        # matrix, which speeds up computation
        self.W_in_stacked = T.concatenate([
            self.W_in_to_resetgate, self.W_in_to_updategate,
            self.W_in_to_hidden_update
        ],
                                          axis=1)

        # Same for hidden weight matrices
        self.W_hid_stacked = T.concatenate([
            self.W_hid_to_resetgate, self.W_hid_to_updategate,
            self.W_hid_to_hidden_update
        ],
                                           axis=1)

        # Stack gate biases into a (3*num_units) vector
        self.b_stacked = T.concatenate(
            [self.b_resetgate, self.b_updategate, self.b_hidden_update],
            axis=0)
Exemplo n.º 33
0
def test_memory(
    game_title='SpaceInvaders-v0',
    n_parallel_games=3,
    replay_seq_len=2,
):
    """
    :param game_title: name of atari game in Gym
    :param n_parallel_games: how many games we run in parallel
    :param replay_seq_len: how long is one replay session from a batch
    """

    atari = gym.make(game_title)
    atari.reset()

    # Game Parameters
    n_actions = atari.action_space.n
    observation_shape = (None, ) + atari.observation_space.shape
    action_names = atari.get_action_meanings()
    del atari
    # ##### Agent observations

    # image observation at current tick goes here
    observation_layer = InputLayer(observation_shape, name="images input")

    # reshape to [batch, color, x, y] to allow for convolutional layers to work correctly
    observation_reshape = DimshuffleLayer(observation_layer, (0, 3, 1, 2))

    # Agent memory states

    memory_dict = OrderedDict([])

    ###Window
    window_size = 3

    # prev state input
    prev_window = InputLayer(
        (None, window_size) + tuple(observation_reshape.output_shape[1:]),
        name="previous window state")

    # our window
    window = WindowAugmentation(observation_reshape,
                                prev_window,
                                name="new window state")

    # pixel-wise maximum over the temporal window (to avoid flickering)
    window_max = ExpressionLayer(window,
                                 lambda a: a.max(axis=1),
                                 output_shape=(None, ) +
                                 window.output_shape[2:])

    memory_dict[window] = prev_window

    ###Stack
    #prev stack
    stack_w, stack_h = 4, 5
    stack_inputs = DenseLayer(observation_reshape, stack_w, name="prev_stack")
    stack_controls = DenseLayer(observation_reshape,
                                3,
                                nonlinearity=lasagne.nonlinearities.softmax,
                                name="prev_stack")
    prev_stack = InputLayer((None, stack_h, stack_w),
                            name="previous stack state")
    stack = StackAugmentation(stack_inputs, prev_stack, stack_controls)
    memory_dict[stack] = prev_stack

    stack_top = lasagne.layers.SliceLayer(stack, 0, 1)

    ###RNN preset

    prev_rnn = InputLayer((None, 16), name="previous RNN state")
    new_rnn = RNNCell(prev_rnn, observation_reshape)
    memory_dict[new_rnn] = prev_rnn

    ###GRU preset
    prev_gru = InputLayer((None, 16), name="previous GRUcell state")
    new_gru = GRUCell(prev_gru, observation_reshape)
    memory_dict[new_gru] = prev_gru

    ###GRUmemorylayer
    prev_gru1 = InputLayer((None, 15), name="previous GRUcell state")
    new_gru1 = GRUMemoryLayer(15, observation_reshape, prev_gru1)
    memory_dict[new_gru1] = prev_gru1

    #LSTM with peepholes
    prev_lstm0_cell = InputLayer(
        (None, 13), name="previous LSTMCell hidden state [with peepholes]")

    prev_lstm0_out = InputLayer(
        (None, 13), name="previous LSTMCell output state [with peepholes]")

    new_lstm0_cell, new_lstm0_out = LSTMCell(
        prev_lstm0_cell,
        prev_lstm0_out,
        input_or_inputs=observation_reshape,
        peepholes=True,
        name="newLSTM1 [with peepholes]")

    memory_dict[new_lstm0_cell] = prev_lstm0_cell
    memory_dict[new_lstm0_out] = prev_lstm0_out

    #LSTM without peepholes
    prev_lstm1_cell = InputLayer(
        (None, 14), name="previous LSTMCell hidden state [no peepholes]")

    prev_lstm1_out = InputLayer(
        (None, 14), name="previous LSTMCell output state [no peepholes]")

    new_lstm1_cell, new_lstm1_out = LSTMCell(
        prev_lstm1_cell,
        prev_lstm1_out,
        input_or_inputs=observation_reshape,
        peepholes=False,
        name="newLSTM1 [no peepholes]")

    memory_dict[new_lstm1_cell] = prev_lstm1_cell
    memory_dict[new_lstm1_out] = prev_lstm1_out

    ##concat everything

    for i in [flatten(window_max), stack_top, new_rnn, new_gru, new_gru1]:
        print(i.output_shape)
    all_memory = concat([
        flatten(window_max),
        stack_top,
        new_rnn,
        new_gru,
        new_gru1,
        new_lstm0_out,
        new_lstm1_out,
    ])

    # ##### Neural network body
    # you may use any other lasagne layers, including convolutions, batch_norms, maxout, etc

    # a simple lasagne network (try replacing with any other lasagne network and see what works best)
    nn = DenseLayer(all_memory, num_units=50, name='dense0')

    # Agent policy and action picking
    q_eval = DenseLayer(nn,
                        num_units=n_actions,
                        nonlinearity=lasagne.nonlinearities.linear,
                        name="QEvaluator")

    # resolver
    resolver = EpsilonGreedyResolver(q_eval, epsilon=0.1, name="resolver")

    # agent
    agent = Agent(observation_layer, memory_dict, q_eval, resolver)

    # Since it's a single lasagne network, one can get it's weights, output, etc
    weights = lasagne.layers.get_all_params(resolver, trainable=True)

    # Agent step function
    print('compiling react')
    applier_fun = agent.get_react_function()

    # a nice pythonic interface
    def step(observation, prev_memories='zeros', batch_size=n_parallel_games):
        """ returns actions and new states given observation and prev state
        Prev state in default setup should be [prev window,]"""
        # default to zeros
        if prev_memories == 'zeros':
            prev_memories = [
                np.zeros((batch_size, ) + tuple(mem.output_shape[1:]),
                         dtype='float32') for mem in agent.agent_states
            ]
        res = applier_fun(np.array(observation), *prev_memories)
        action = res[0]
        memories = res[1:]
        return action, memories

    # # Create and manage a pool of atari sessions to play with

    pool = GamePool(game_title, n_parallel_games)

    observation_log, action_log, reward_log, _, _, _ = pool.interact(step, 50)

    print(np.array(action_names)[np.array(action_log)[:3, :5]])

    # # experience replay pool
    # Create an environment with all default parameters
    env = SessionPoolEnvironment(observations=observation_layer,
                                 actions=resolver,
                                 agent_memories=agent.agent_states)

    def update_pool(env, pool, n_steps=100):
        """ a function that creates new sessions and ads them into the pool
        throwing the old ones away entirely for simplicity"""

        preceding_memory_states = list(pool.prev_memory_states)

        # get interaction sessions
        observation_tensor, action_tensor, reward_tensor, _, is_alive_tensor, _ = pool.interact(
            step, n_steps=n_steps)

        # load them into experience replay environment
        env.load_sessions(observation_tensor, action_tensor, reward_tensor,
                          is_alive_tensor, preceding_memory_states)

    # load first  sessions
    update_pool(env, pool, replay_seq_len)

    # A more sophisticated way of training is to store a large pool of sessions and train on random batches of them.
    # ### Training via experience replay

    # get agent's Q-values obtained via experience replay
    _env_states, _observations, _memories, _imagined_actions, q_values_sequence = agent.get_sessions(
        env,
        session_length=replay_seq_len,
        batch_size=env.batch_size,
        optimize_experience_replay=True,
    )

    # Evaluating loss function

    scaled_reward_seq = env.rewards
    # For SpaceInvaders, however, not scaling rewards is at least working

    elwise_mse_loss = qlearning.get_elementwise_objective(
        q_values_sequence,
        env.actions[0],
        scaled_reward_seq,
        env.is_alive,
        gamma_or_gammas=0.99,
    )

    # compute mean over "alive" fragments
    mse_loss = elwise_mse_loss.sum() / env.is_alive.sum()

    # regularize network weights
    reg_l2 = regularize_network_params(resolver, l2) * 10**-4

    loss = mse_loss + reg_l2

    # Compute weight updates
    updates = lasagne.updates.adadelta(loss, weights, learning_rate=0.01)

    # mean session reward
    mean_session_reward = env.rewards.sum(axis=1).mean()

    # # Compile train and evaluation functions

    print('compiling')
    train_fun = theano.function([], [loss, mean_session_reward],
                                updates=updates)
    evaluation_fun = theano.function(
        [], [loss, mse_loss, reg_l2, mean_session_reward])
    print("I've compiled!")

    # # Training loop

    for epoch_counter in range(10):
        update_pool(env, pool, replay_seq_len)
        loss, avg_reward = train_fun()
        full_loss, q_loss, l2_penalty, avg_reward_current = evaluation_fun()

        print("epoch %i,loss %.5f, rewards: %.5f " %
              (epoch_counter, full_loss, avg_reward_current))
        print("rec %.3f reg %.3f" % (q_loss, l2_penalty))
Exemplo n.º 34
0
    def __init__(self,
                 num_units,
                 observation_input,
                 prev_state_input,
                 resetgate=Gate(W_cell=None),
                 updategate=Gate(W_cell=None),
                 hidden_update=Gate(W_cell=None,
                                    nonlinearity=nonlinearities.tanh),
                 
                 
                 
                 grad_clipping=5.,
                 **kwargs):
        """
        a Gated Recurrent Unit implementation of a memory layer.

        Unlike lasagne.layers.GRUlayer, this layer does not produce the whole time series at a time, 
        but yields it's next state given last state and observation one tick at a time.
        This is done to simplify usage within external loops along with other MDP components.
    
        parameters: 
            - num_units: amount of units in the hidden state. 
                - If you are using prev_state_input, put anything here.
            - observation_input - a lasagne layer that provides
            float[batch_id, input_id]: input observation at this tick
            -- as an output.
            - prev_state_input [optional] - a lasagne layer that generates the previous batch
            of hidden states (in case you wish several layers to handle the same sequence)
            - concatenate_input: if true, appends observation_input of current tick to own activation at this tick

        instance that
        - generates first (a-priori) agent state
        - determines new agent state given previous agent state and an observation|previous input

        """
        assert len(prev_state_input.output_shape) ==2
        
        if len(observation_input.output_shape) !=2:
            observation_input = flatten(observation_input,outdim=2)
        
        assert len(observation_input.output_shape)  == 2


        
        #default name
        if "name" not in kwargs:
            kwargs["name"] = "YetAnother"+self.__class__.__name__
        
        self.num_units = num_units

        
        super(GRUMemoryLayer, self).__init__([prev_state_input,observation_input], **kwargs)
        self.grad_clipping = grad_clipping
        
            
        # Retrieve the dimensionality of the incoming layer
        last_state_shape, observation_shape = self.input_shapes
        
        # Input dimensionality is the output dimensionality of the input layer
        last_num_units = np.prod(last_state_shape[1:])
        inp_num_inputs = np.prod(observation_shape[1:])

        #hidden shapes must match
        assert last_num_units == self.num_units
        
        
        def add_gate_params(gate, gate_name):
            """ Convenience function for adding layer parameters from a Gate
            instance. """
            return (self.add_param(gate.W_in, (inp_num_inputs, num_units),
                                   name="W_in_to_{}".format(gate_name)),
                    self.add_param(gate.W_hid, (num_units, num_units),
                                   name="W_hid_to_{}".format(gate_name)),
                    self.add_param(gate.b, (num_units,),
                                   name="b_{}".format(gate_name),
                                   regularizable=False),
                    gate.nonlinearity)
        
        # Add in all parameters from gates
        (self.W_in_to_updategate, self.W_hid_to_updategate, self.b_updategate,
         self.nonlinearity_updategate) = add_gate_params(updategate,
                                                         'updategate')
        (self.W_in_to_resetgate, self.W_hid_to_resetgate, self.b_resetgate,
         self.nonlinearity_resetgate) = add_gate_params(resetgate, 'resetgate')

        (self.W_in_to_hidden_update, self.W_hid_to_hidden_update,
         self.b_hidden_update, self.nonlinearity_hid) = add_gate_params(
             hidden_update, 'hidden_update')
        
        # Stack input weight matrices into a (num_inputs, 3*num_units)
        # matrix, which speeds up computation
        self.W_in_stacked = T.concatenate(
            [self.W_in_to_resetgate, self.W_in_to_updategate,
             self.W_in_to_hidden_update], axis=1)

        # Same for hidden weight matrices
        self.W_hid_stacked = T.concatenate(
            [self.W_hid_to_resetgate, self.W_hid_to_updategate,
             self.W_hid_to_hidden_update], axis=1)

        # Stack gate biases into a (3*num_units) vector
        self.b_stacked = T.concatenate(
            [self.b_resetgate, self.b_updategate,
             self.b_hidden_update], axis=0)
Exemplo n.º 35
0
def create_nn():

	'''
	Returns the theano function - train,test 
	Returns the 'X-KerasNet'

	Using default values of adam - learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08

	Input to the NN is (batch_size,3,32,32) and corresponding classes it belong to (batch_size,)
	'''

	a_l_in = InputLayer((batch_size,1,32,32))
	a_l_in_bn = BatchNormLayer(a_l_in)
	b_l_in = InputLayer((batch_size,1,32,32))
	b_l_in_bn = BatchNormLayer(b_l_in)
	c_l_in = InputLayer((batch_size,1,32,32))
	c_l_in_bn = BatchNormLayer(c_l_in)

	
	a_conv1 = Conv2DLayer(a_l_in_bn,pad='same',num_filters=32,filter_size=(3,3),nonlinearity=lasagne.nonlinearities.rectify) #Bx32x32x32
	b_conv1 = Conv2DLayer(b_l_in_bn,pad='same',num_filters=16,filter_size=(3,3),nonlinearity=lasagne.nonlinearities.rectify) #Bx16x32x32
	c_conv1 = Conv2DLayer(c_l_in_bn,pad='same',num_filters=16,filter_size=(3,3),nonlinearity=lasagne.nonlinearities.rectify) #Bx16x32x32
	
	a_conv1_1 = Conv2DLayer(a_conv1,pad='same',num_filters=32,filter_size=(3,3),nonlinearity=lasagne.nonlinearities.rectify) #Bx32x32x32
	b_conv1_1 = Conv2DLayer(b_conv1,pad='same',num_filters=16,filter_size=(3,3),nonlinearity=lasagne.nonlinearities.rectify) #Bx16x32x32
	c_conv1_1 = Conv2DLayer(c_conv1,pad='same',num_filters=16,filter_size=(3,3),nonlinearity=lasagne.nonlinearities.rectify) #Bx16x32x32
	
	a_mp1 = MaxPool2DLayer(a_conv1_1,pool_size=(2,2)) #Bx32x16x16
	b_mp1 = MaxPool2DLayer(b_conv1_1,pool_size=(2,2)) #Bx16x16x16
	c_mp1 = MaxPool2DLayer(c_conv1_1,pool_size=(2,2)) #Bx16x16x16
	
	a_do1 = dropout(a_mp1,p=0.25) #Bx32x16x16
	b_do1 = dropout(b_mp1,p=0.25) #Bx16x16x16
	c_do1 = dropout(c_mp1,p=0.25) #Bx16x16x16

	#Exchange of feature maps

	a_to_bc = Conv2DLayer(a_do1,pad='same',num_filters=32,filter_size=(1,1),nonlinearity=lasagne.nonlinearities.rectify) #Bx32x16x16
	b_to_a = Conv2DLayer(b_do1,pad='same',num_filters=16,filter_size=(1,1),nonlinearity=lasagne.nonlinearities.rectify)  #Bx16x16x16
	c_to_a = Conv2DLayer(c_do1,pad='same',num_filters=16,filter_size=(1,1),nonlinearity=lasagne.nonlinearities.rectify)  #Bx16x16x16

	#Merging

	a_merge1 = lasagne.layers.ConcatLayer([a_do1,b_to_a,c_to_a]) #Bx64x16x16
	b_merge1 = lasagne.layers.ConcatLayer([b_do1,a_to_bc])       #Bx48x16x16
	c_merge1 = lasagne.layers.ConcatLayer([c_do1,a_to_bc])		 #Bx48x16x16


	a_conv2 = Conv2DLayer(a_merge1,pad='same',num_filters=64,filter_size=(3,3),nonlinearity=lasagne.nonlinearities.rectify) #Bx64x16x16
	b_conv2 = Conv2DLayer(b_merge1,pad='same',num_filters=32,filter_size=(3,3),nonlinearity=lasagne.nonlinearities.rectify) #Bx32x16x16
	c_conv2 = Conv2DLayer(c_merge1,pad='same',num_filters=32,filter_size=(3,3),nonlinearity=lasagne.nonlinearities.rectify) #Bx32x16x16
	
	a_conv2_1 = Conv2DLayer(a_conv2,pad='same',num_filters=64,filter_size=(3,3),nonlinearity=lasagne.nonlinearities.rectify) #Bx64x16x16
	b_conv2_1 = Conv2DLayer(b_conv2,pad='same',num_filters=32,filter_size=(3,3),nonlinearity=lasagne.nonlinearities.rectify) #Bx32x16x16
	c_conv2_1 = Conv2DLayer(c_conv2,pad='same',num_filters=32,filter_size=(3,3),nonlinearity=lasagne.nonlinearities.rectify) #Bx32x16x16
	
	a_mp2 = MaxPool2DLayer(a_conv2_1,pool_size=(2,2)) #Bx64x8x8
	b_mp2 = MaxPool2DLayer(b_conv2_1,pool_size=(2,2)) #Bx32x8x8
	c_mp2 = MaxPool2DLayer(c_conv2_1,pool_size=(2,2)) #Bx32x8x8
	
	a_do2 = dropout(a_mp2,p=0.25) #Bx64x8x8
	b_do2 = dropout(b_mp2,p=0.25) #Bx32x8x8
	c_do2 = dropout(c_mp2,p=0.25) #Bx32x8x8

	#Final Merge

	merge2 = lasagne.layers.ConcatLayer([a_do2,b_do2,c_do2]) #Bx128x8x8

	flat = flatten(merge2,2) #Bx8192
	fc = DenseLayer(flat,num_units=512,nonlinearity=lasagne.nonlinearities.rectify) #Bx512
	fc_do = dropout(fc, p=0.5) 
	network = DenseLayer(fc_do, num_units=nb_classes, nonlinearity=lasagne.nonlinearities.softmax) #Bxnb_classes

	net_output = lasagne.layers.get_output(network)
	true_output = T.matrix()

	all_params = lasagne.layers.get_all_params(network,trainable=True)
	loss = T.mean(lasagne.objectives.categorical_crossentropy(net_output,true_output))
	updates = lasagne.updates.adam(loss,all_params)

	train = theano.function(inputs= [a_l_in.input_var,b_l_in.input_var,c_l_in.input_var,true_output] , outputs=[net_output,loss], updates = updates)
	test = theano.function(inputs= [l_in.input_var], outputs= [net_output])

	return train,test,network