Exemplo n.º 1
0
    def __init__(self, params, data):

        self.get_pos_map(data)
        self.cap = params.cap
        self.lowercase = params.lowercase
        self.featuretype = params.featuretype

        chardim = params.chardim #dimension of character network layer
        worddim = params.worddim #dimension of character embedding and word LSTM layer

        if not params.nntype == "charagram":
            self.chars = self.get_character_dict(data)
            Ce = lasagne.init.Uniform(range=0.5/len(self.chars))
            Ce_np = Ce.sample((len(self.chars),params.worddim))
            Ce = theano.shared(np.asarray(Ce_np, dtype=config.floatX))

        char = T.imatrix(); charmask = T.matrix()
        word = T.imatrix(); wordmask = T.matrix()

        idxs = T.ivector()
        Y = T.matrix()

        l_in_char = lasagne.layers.InputLayer((None, None))
        if params.nntype == "charlstm":
            l_mask_char = lasagne.layers.InputLayer(shape=(None, None))
            l_emb_char = lasagne.layers.EmbeddingLayer(l_in_char, input_size=Ce.get_value().shape[0],
                                              output_size=Ce.get_value().shape[1], W=Ce)
            l_lstm_char = lasagne.layers.LSTMLayer(l_emb_char, chardim, peepholes=True, learn_init=False,
                                              mask_input=l_mask_char)
            if not params.outgate:
                l_lstm_char = lasagne_lstm_nooutput(l_emb_char, chardim, peepholes=True, learn_init=False,
                                                   mask_input=l_mask_char)
            l_We = lasagne.layers.SliceLayer(l_lstm_char, -1, 1)
            We = lasagne.layers.get_output(l_We, {l_in_char: char, l_mask_char: charmask})
        elif params.nntype == "charagram":
            char = T.matrix()
            self.featuremap = self.get_feature_map(data, params.featuretype, params.cutoff, params.lowercase)
            print "Number of features: ", len(self.featuremap)

            l_in_char = lasagne.layers.InputLayer((None, len(self.featuremap)+1))
            if self.cap:
                l_in_char = lasagne.layers.InputLayer((None, len(self.featuremap)+2))
            l_1 = lasagne.layers.DenseLayer(l_in_char, chardim, nonlinearity=params.act)
            if params.numlayers == 1:
                l_We = lasagne.layers.DenseLayer(l_in_char, chardim, nonlinearity=params.act)
            elif params.numlayers == 2:
                l_We = lasagne.layers.DenseLayer(l_1, chardim, nonlinearity=params.act)
            else:
                raise ValueError('Only 1-2 layers are supported currently.')
            We = lasagne.layers.get_output(l_We, {l_in_char:char})
        elif params.nntype == "charcnn":
            l_emb_char = lasagne.layers.EmbeddingLayer(l_in_char, input_size=Ce.get_value().shape[0],
                                              output_size=Ce.get_value().shape[1], W=Ce)
            emb = lasagne.layers.DimshuffleLayer(l_emb_char, (0, 2, 1))
            conv_params = None
            if params.conv_type == 1:
                conv_params = [(175,2),(175,3),(175,4)]
            else:
                conv_params = [(25,1),(50,2),(75,3),(100,4),(125,5),(150,6)]
            layers = []
            for num_filters, filter_size in conv_params:
                conv = lasagne.layers.Conv1DLayer(emb, num_filters, filter_size, nonlinearity=params.act)
                pl = lasagne.layers.GlobalPoolLayer(conv,theano.tensor.max)
                pl = lasagne.layers.FlattenLayer(pl)
                layers.append(pl)
            concat = lasagne.layers.ConcatLayer(layers)
            l_We = lasagne.layers.DenseLayer(concat, num_units=chardim, nonlinearity=params.act)
            We = lasagne.layers.get_output(l_We, {l_in_char: char})
        else:
            l_We = None
            We = None

        l_in_word = lasagne.layers.InputLayer((None, None))
        l_mask_word = lasagne.layers.InputLayer(shape=(None, None))
        l_emb_word = lasagne_embedding_layer_2(l_in_word, chardim, We)

        l_lstm_wordf = lasagne.layers.LSTMLayer(l_emb_word, worddim, peepholes=True, learn_init=False,
                                              mask_input=l_mask_word)
        l_lstm_wordb = lasagne.layers.LSTMLayer(l_emb_word, worddim, peepholes=True, learn_init=False,
                                              mask_input=l_mask_word, backwards = True)

        l_reshapef = lasagne.layers.ReshapeLayer(l_lstm_wordf,(-1,worddim))
        l_reshapeb = lasagne.layers.ReshapeLayer(l_lstm_wordb,(-1,worddim))
        concat2 = lasagne.layers.ConcatLayer([l_reshapef, l_reshapeb])
        l_emb = lasagne.layers.DenseLayer(concat2, num_units=worddim, nonlinearity=lasagne.nonlinearities.tanh)
        l_out = lasagne.layers.DenseLayer(l_emb, num_units=len(self.tags), nonlinearity=lasagne.nonlinearities.softmax)
        embg = lasagne.layers.get_output(l_out, {l_in_word: word, l_mask_word: wordmask})

        embg = embg[idxs]
        prediction = T.argmax(embg, axis=1)

        self.all_params = lasagne.layers.get_all_params(l_out, trainable=True) + lasagne.layers.get_all_params(l_We, trainable=True)
        reg = 0.5*params.LC*sum(lasagne.regularization.l2(x) for x in self.all_params)

        cost = T.nnet.categorical_crossentropy(embg,Y)
        cost = T.mean(cost) + reg

        self.feedforward_function = None
        self.scoring_function = None
        self.cost_function = None
        self.train_function = None

        if params.nntype == "charlstm":
            self.feedforward_function = theano.function([char, charmask, word, wordmask, idxs], embg)
            self.scoring_function = theano.function([char, charmask, word, wordmask, idxs], prediction)
            self.cost_function = theano.function([char, charmask, word, wordmask, idxs, Y], cost)
            grads = theano.gradient.grad(cost, self.all_params)
            updates = lasagne.updates.momentum(grads, self.all_params, 0.2, momentum=0.95) #same as Ling et al.
            self.train_function = theano.function([char, charmask, word, wordmask, idxs, Y], cost, updates=updates)
        elif params.nntype == "charcnn" or params.nntype == "charagram":
            self.feedforward_function = theano.function([char, word, wordmask, idxs], embg)
            self.scoring_function = theano.function([char, word, wordmask, idxs], prediction)
            self.cost_function = theano.function([char, word, wordmask, idxs, Y], cost)
            grads = theano.gradient.grad(cost, self.all_params)
            updates = lasagne.updates.momentum(grads, self.all_params, 0.2, momentum=0.95) #same as Ling et al.
            self.train_function = theano.function([char, word, wordmask, idxs, Y], cost, updates=updates)
Exemplo n.º 2
0
    def __init__(self, We_initial, We_pos_initial, params):

        if params.maxval:
            self.nout = params.maxval - params.minval + 1

        p = None
        if params.traintype == "reg" or params.traintype == "rep":
            p = cPickle.load(file(params.regfile, 'rb'))
            print p
            #contains [<TensorType(float64, matrix)>,
            # W_in_to_ingate, W_hid_to_ingate, b_ingate, W_in_to_forgetgate,
            # W_hid_to_forgetgate, b_forgetgate, W_in_to_cell, W_hid_to_cell,
            # b_cell, W_in_to_outgate, W_hid_to_outgate, b_outgate, W_cell_to_ingate,
            # W_cell_to_forgetgate, W_cell_to_outgate]

        if params.traintype == "reg":
            print "regularizing to parameters"

        if params.traintype == "rep":
            print "not updating embeddings"

        #params
        initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype=config.floatX))
        We_pos = theano.shared(np.asarray(We_pos_initial, dtype=config.floatX))

        if params.traintype == "reg":
            initial_We = theano.shared(
                np.asarray(p[0].get_value(), dtype=config.floatX))
            We = theano.shared(
                np.asarray(p[0].get_value(), dtype=config.floatX))
            updatewords = True

        if params.traintype == "rep":
            We = theano.shared(
                np.asarray(p[0].get_value(), dtype=config.floatX))
            updatewords = False

        #symbolic params
        g1batchindices = T.imatrix()
        g2batchindices = T.imatrix()
        g1mask = T.matrix()
        g2mask = T.matrix()
        g1posbatchindices = T.imatrix()
        g2posbatchindices = T.imatrix()
        scores = T.matrix()

        l_in = lasagne.layers.InputLayer((None, None, 1))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_pos = lasagne.layers.InputLayer((None, None, 1))
        l_emb = lasagne.layers.EmbeddingLayer(
            l_in,
            input_size=We.get_value().shape[0],
            output_size=We.get_value().shape[1],
            W=We)
        l_pos_emb = lasagne.layers.EmbeddingLayer(
            l_pos,
            input_size=We_pos.get_value().shape[0],
            output_size=We_pos.get_value().shape[1],
            W=We_pos)

        #attention
        llGate = gateLayer([l_in, l_emb], name='llGate')  #25*50*300
        #attention-vector
        llDot = DotSumLayer([llGate, l_pos_emb], name='llDot')  #25*50
        llSoftMax = softMaxLayer2([l_in, llDot], name='llSoftMax')  #25*30 mask
        #llSoftMax_out = lasagne.layers.get_output(llSoftMax, {l_in:g1batchindices, l_pos:g1posbatchindices})
        #self.look = theano.function([g1batchindices,g1posbatchindices], llSoftMax_out)
        llAttend = MulLayer([llSoftMax, llGate], name='llAttend')
        #--------------------------
        l_lstm = None
        if params.useoutgate:
            l_lstm = lasagne.layers.LSTMLayer(llAttend,
                                              params.layersize,
                                              peepholes=params.usepeep,
                                              learn_init=False,
                                              mask_input=l_mask)
        else:
            l_lstm = lasagne_lstm_nooutput(llAttend,
                                           params.layersize,
                                           peepholes=params.usepeep,
                                           learn_init=False,
                                           mask_input=l_mask)

        l_out = lasagne.layers.SliceLayer(l_lstm, -1, 1)

        embg1 = lasagne.layers.get_output(l_out, {
            l_in: g1batchindices,
            l_pos: g1posbatchindices,
            l_mask: g1mask
        })
        embg2 = lasagne.layers.get_output(l_out, {
            l_in: g2batchindices,
            l_pos: g2posbatchindices,
            l_mask: g2mask
        })

        g1_dot_g2 = embg1 * embg2
        g1_abs_g2 = abs(embg1 - embg2)

        lin_dot = lasagne.layers.InputLayer((None, params.layersize))
        lin_abs = lasagne.layers.InputLayer((None, params.layersize))
        l_sum = lasagne.layers.ConcatLayer([lin_dot, lin_abs])
        l_sigmoid = lasagne.layers.DenseLayer(
            l_sum, params.memsize, nonlinearity=lasagne.nonlinearities.sigmoid)

        l_softmax = lasagne.layers.DenseLayer(l_sigmoid,
                                              self.nout,
                                              nonlinearity=T.nnet.softmax)
        X = lasagne.layers.get_output(l_softmax, {
            lin_dot: g1_dot_g2,
            lin_abs: g1_abs_g2
        })
        Y = T.log(X)

        cost = scores * (T.log(scores) - Y)
        cost = cost.sum(axis=1) / (float(self.nout))

        prediction = 0.
        i = params.minval
        while i <= params.maxval:
            prediction = prediction + i * X[:, i - 1]
            i += 1

        self.network_params = lasagne.layers.get_all_params(
            l_out, trainable=True) + lasagne.layers.get_all_params(
                l_softmax, trainable=True)
        self.network_params.pop(0)
        self.all_params = lasagne.layers.get_all_params(
            l_out, trainable=True) + lasagne.layers.get_all_params(
                l_softmax, trainable=True)

        reg = self.getRegTerm(params, We, initial_We, l_out, l_softmax, p)
        self.trainable = self.getTrainableParams(params)
        cost = T.mean(cost) + reg

        self.feedforward_function = theano.function(
            [g1batchindices, g1posbatchindices, g1mask], embg1)
        self.scoring_function = theano.function([
            g1batchindices, g1posbatchindices, g1mask, g2batchindices,
            g2posbatchindices, g2mask
        ], prediction)
        self.cost_function = theano.function([
            scores, g1batchindices, g1posbatchindices, g1mask, g2batchindices,
            g2posbatchindices, g2mask
        ], cost)

        grads = theano.gradient.grad(cost, self.trainable)
        if params.clip:
            grads = [
                lasagne.updates.norm_constraint(grad, params.clip,
                                                range(grad.ndim))
                for grad in grads
            ]
        updates = params.learner(grads, self.trainable, params.eta)
        self.train_function = theano.function([
            scores, g1batchindices, g1posbatchindices, g1mask, g2batchindices,
            g2posbatchindices, g2mask
        ],
                                              cost,
                                              updates=updates)
Exemplo n.º 3
0
    def __init__(self, params):

        self.chars = utils.get_character_dict(params.character_file)
        Ce = lasagne.init.Uniform(range=0.5 / len(self.chars))
        Ce = Ce.sample((len(self.chars), params.chardim))
        Ce = theano.shared(np.asarray(Ce, dtype=config.floatX))

        g1batchindices = T.imatrix()
        g2batchindices = T.imatrix()
        p1batchindices = T.imatrix()
        p2batchindices = T.imatrix()
        g1mask = T.matrix()
        g2mask = T.matrix()
        p1mask = T.matrix()
        p2mask = T.matrix()

        l_in = lasagne.layers.InputLayer((None, None))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(
            l_in,
            input_size=Ce.get_value().shape[0],
            output_size=Ce.get_value().shape[1],
            W=Ce)
        l_lstm = None
        if params.outgate:
            l_lstm = lasagne.layers.LSTMLayer(l_emb,
                                              params.worddim,
                                              peepholes=params.peepholes,
                                              learn_init=False,
                                              mask_input=l_mask)
        else:
            l_lstm = lasagne_lstm_nooutput(l_emb,
                                           params.worddim,
                                           peepholes=params.peepholes,
                                           learn_init=False,
                                           mask_input=l_mask)
        l_out = lasagne.layers.SliceLayer(l_lstm, -1, 1)

        embg1 = lasagne.layers.get_output(l_out, {
            l_in: g1batchindices,
            l_mask: g1mask
        })
        embg2 = lasagne.layers.get_output(l_out, {
            l_in: g2batchindices,
            l_mask: g2mask
        })
        embp1 = lasagne.layers.get_output(l_out, {
            l_in: p1batchindices,
            l_mask: p1mask
        })
        embp2 = lasagne.layers.get_output(l_out, {
            l_in: p2batchindices,
            l_mask: p2mask
        })

        g1g2 = (embg1 * embg2).sum(axis=1)
        g1g2norm = T.sqrt(T.sum(embg1**2, axis=1)) * T.sqrt(
            T.sum(embg2**2, axis=1)) + 1E-6
        g1g2 = g1g2 / g1g2norm

        p1g1 = (embp1 * embg1).sum(axis=1)
        p1g1norm = T.sqrt(T.sum(embp1**2, axis=1)) * T.sqrt(
            T.sum(embg1**2, axis=1)) + 1E-6
        p1g1 = p1g1 / p1g1norm

        p2g2 = (embp2 * embg2).sum(axis=1)
        p2g2norm = T.sqrt(T.sum(embp2**2, axis=1)) * T.sqrt(
            T.sum(embg2**2, axis=1)) + 1E-6
        p2g2 = p2g2 / p2g2norm

        costp1g1 = params.margin - g1g2 + p1g1
        costp1g1 = costp1g1 * (costp1g1 > 0)

        costp2g2 = params.margin - g1g2 + p2g2
        costp2g2 = costp2g2 * (costp2g2 > 0)

        cost = costp1g1 + costp2g2

        self.all_params = lasagne.layers.get_all_params(l_out, trainable=True)
        l2 = 0.5 * params.LC * sum(
            lasagne.regularization.l2(x) for x in self.all_params)
        cost = T.mean(cost) + l2

        self.feedforward_function = theano.function([g1batchindices, g1mask],
                                                    embg1)
        self.cost_function = theano.function([
            g1batchindices, g2batchindices, p1batchindices, p2batchindices,
            g1mask, g2mask, p1mask, p2mask
        ], cost)
        prediction = g1g2
        self.scoring_function = theano.function(
            [g1batchindices, g2batchindices, g1mask, g2mask], prediction)

        grads = theano.gradient.grad(cost, self.all_params)
        if params.clip:
            grads = [
                lasagne.updates.norm_constraint(grad, params.clip,
                                                range(grad.ndim))
                for grad in grads
            ]
        updates = params.learner(grads, self.all_params, params.eta)
        self.train_function = theano.function([
            g1batchindices, g2batchindices, p1batchindices, p2batchindices,
            g1mask, g2mask, p1mask, p2mask
        ],
                                              cost,
                                              updates=updates)
Exemplo n.º 4
0
    def __init__(self, params):

        self.chars = utils.get_character_dict(params.character_file)
        Ce = lasagne.init.Uniform(range=0.5/len(self.chars))
        Ce = Ce.sample((len(self.chars),params.chardim))
        Ce = theano.shared(np.asarray(Ce, dtype=config.floatX))

        g1batchindices = T.imatrix(); g2batchindices = T.imatrix()
        p1batchindices = T.imatrix(); p2batchindices = T.imatrix()
        g1mask = T.matrix(); g2mask = T.matrix()
        p1mask = T.matrix(); p2mask = T.matrix()

        l_in = lasagne.layers.InputLayer((None, None))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(l_in, input_size=Ce.get_value().shape[0],
                                              output_size=Ce.get_value().shape[1], W=Ce)
        l_lstm = None
        if params.outgate:
            l_lstm = lasagne.layers.LSTMLayer(l_emb, params.worddim, peepholes=params.peepholes, learn_init=False,
                                              mask_input=l_mask)
        else:
            l_lstm = lasagne_lstm_nooutput(l_emb, params.worddim, peepholes=params.peepholes, learn_init=False,
                                       mask_input=l_mask)
        l_out = lasagne.layers.SliceLayer(l_lstm, -1, 1)

        embg1 = lasagne.layers.get_output(l_out, {l_in: g1batchindices, l_mask: g1mask})
        embg2 = lasagne.layers.get_output(l_out, {l_in: g2batchindices, l_mask: g2mask})
        embp1 = lasagne.layers.get_output(l_out, {l_in: p1batchindices, l_mask: p1mask})
        embp2 = lasagne.layers.get_output(l_out, {l_in: p2batchindices, l_mask: p2mask})

        g1g2 = (embg1*embg2).sum(axis=1)
        g1g2norm = T.sqrt(T.sum(embg1**2,axis=1)) * T.sqrt(T.sum(embg2**2,axis=1)) + 1E-6
        g1g2 = g1g2 / g1g2norm

        p1g1 = (embp1*embg1).sum(axis=1)
        p1g1norm = T.sqrt(T.sum(embp1**2,axis=1)) * T.sqrt(T.sum(embg1**2,axis=1)) + 1E-6
        p1g1 = p1g1 / p1g1norm

        p2g2 = (embp2*embg2).sum(axis=1)
        p2g2norm = T.sqrt(T.sum(embp2**2,axis=1)) * T.sqrt(T.sum(embg2**2,axis=1)) + 1E-6
        p2g2 = p2g2 / p2g2norm

        costp1g1 = params.margin - g1g2 + p1g1
        costp1g1 = costp1g1*(costp1g1 > 0)

        costp2g2 = params.margin - g1g2 + p2g2
        costp2g2 = costp2g2*(costp2g2 > 0)

        cost = costp1g1 + costp2g2


        self.all_params = lasagne.layers.get_all_params(l_out, trainable=True)
        l2 = 0.5 * params.LC * sum(lasagne.regularization.l2(x) for x in self.all_params)
        cost = T.mean(cost) + l2

        self.feedforward_function = theano.function([g1batchindices, g1mask], embg1)
        self.cost_function = theano.function([g1batchindices, g2batchindices, p1batchindices, p2batchindices,
                                              g1mask, g2mask, p1mask, p2mask], cost)
        prediction = g1g2
        self.scoring_function = theano.function([g1batchindices, g2batchindices,
                                                 g1mask, g2mask], prediction)

        grads = theano.gradient.grad(cost, self.all_params)
        if params.clip:
            grads = [lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads]
        updates = params.learner(grads, self.all_params, params.eta)
        self.train_function = theano.function([g1batchindices, g2batchindices, p1batchindices, p2batchindices,
                                                   g1mask, g2mask, p1mask, p2mask], cost, updates=updates)
Exemplo n.º 5
0
    def __init__(self, We_initial, params):

        p = None

        if params.traintype == "reg" or params.traintype == "rep":
            p = cPickle.load(file(params.regfile, 'rb'))
            print p
            #contains [<TensorType(float64, matrix)>,
            # W_in_to_ingate, W_hid_to_ingate, b_ingate, W_in_to_forgetgate,
            # W_hid_to_forgetgate, b_forgetgate, W_in_to_cell, W_hid_to_cell,
            # b_cell, W_in_to_outgate, W_hid_to_outgate, b_outgate, W_cell_to_ingate,
            # W_cell_to_forgetgate, W_cell_to_outgate]

        if params.traintype == "reg":
            print "regularizing to parameters"

        if params.traintype == "rep":
            print "not updating embeddings"

        #params
        initial_We = theano.shared(np.asarray(We_initial, dtype = config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype = config.floatX))

        if params.traintype == "reg":
            initial_We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX))
            We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX))
            updatewords = True

        if params.traintype == "rep":
            We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX))
            updatewords = False

        g1batchindices = T.imatrix()
        g1mask = T.matrix()
        scores = T.matrix()

        l_in = lasagne.layers.InputLayer((None, None, 1))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We)
        l_lstm = None
        if params.useoutgate:
            l_lstm = lasagne.layers.LSTMLayer(l_emb, params.layersize, peepholes=params.usepeep, learn_init=False, mask_input = l_mask)
        else:
            l_lstm = lasagne_lstm_nooutput(l_emb, params.layersize, peepholes=params.usepeep, learn_init=False, mask_input = l_mask)

        if params.traintype == "reg" or params.traintype == "rep":
            if params.useoutgate:
                W_in_to_ingate = np.asarray(p[1].get_value(), dtype = config.floatX)
                W_hid_to_ingate = np.asarray(p[2].get_value(), dtype = config.floatX)
                b_ingate = np.asarray(p[3].get_value(), dtype = config.floatX)
                W_in_to_forgetgate = np.asarray(p[4].get_value(), dtype = config.floatX)
                W_hid_to_forgetgate = np.asarray(p[5].get_value(), dtype = config.floatX)
                b_forgetgate = np.asarray(p[6].get_value(), dtype = config.floatX)
                W_in_to_cell = np.asarray(p[7].get_value(), dtype = config.floatX)
                W_hid_to_cell = np.asarray(p[8].get_value(), dtype = config.floatX)
                b_cell = np.asarray(p[9].get_value(), dtype = config.floatX)
                W_in_to_outgate = np.asarray(p[10].get_value(), dtype = config.floatX)
                W_hid_to_outgate = np.asarray(p[11].get_value(), dtype = config.floatX)
                b_outgate = np.asarray(p[12].get_value(), dtype = config.floatX)
                W_cell_to_ingate = np.asarray(p[13].get_value(), dtype = config.floatX)
                W_cell_to_forgetgate = np.asarray(p[14].get_value(), dtype = config.floatX)
                W_cell_to_outgate = np.asarray(p[15].get_value(), dtype = config.floatX)

                ingate = lasagne.layers.Gate(W_in=W_in_to_ingate, W_hid=W_hid_to_ingate, W_cell=W_cell_to_ingate, b=b_ingate)
                forgetgate = lasagne.layers.Gate(W_in=W_in_to_forgetgate, W_hid=W_hid_to_forgetgate, W_cell=W_cell_to_forgetgate, b=b_forgetgate)
                outgate = lasagne.layers.Gate(W_in=W_in_to_outgate, W_hid=W_hid_to_outgate, W_cell=W_cell_to_outgate, b=b_outgate)
                cell = lasagne.layers.Gate(W_in=W_in_to_cell, W_hid=W_hid_to_cell, W_cell=None, b=b_cell, nonlinearity=lasagne.nonlinearities.tanh)
                l_lstm = lasagne.layers.LSTMLayer(l_emb, params.layersize, ingate = ingate, forgetgate = forgetgate,
                                  outgate = outgate, cell = cell, peepholes=params.usepeep, learn_init=False, mask_input = l_mask)
            else:
                W_in_to_ingate = np.asarray(p[1].get_value(), dtype = config.floatX)
                W_hid_to_ingate = np.asarray(p[2].get_value(), dtype = config.floatX)
                b_ingate = np.asarray(p[3].get_value(), dtype = config.floatX)
                W_in_to_forgetgate = np.asarray(p[4].get_value(), dtype = config.floatX)
                W_hid_to_forgetgate = np.asarray(p[5].get_value(), dtype = config.floatX)
                b_forgetgate = np.asarray(p[6].get_value(), dtype = config.floatX)
                W_in_to_cell = np.asarray(p[7].get_value(), dtype = config.floatX)
                W_hid_to_cell = np.asarray(p[8].get_value(), dtype = config.floatX)
                b_cell = np.asarray(p[9].get_value(), dtype = config.floatX)
                W_cell_to_ingate = np.asarray(p[10].get_value(), dtype = config.floatX)
                W_cell_to_forgetgate = np.asarray(p[11].get_value(), dtype = config.floatX)

                ingate = lasagne.layers.Gate(W_in=W_in_to_ingate, W_hid=W_hid_to_ingate, W_cell=W_cell_to_ingate, b=b_ingate)
                forgetgate = lasagne.layers.Gate(W_in=W_in_to_forgetgate, W_hid=W_hid_to_forgetgate, W_cell=W_cell_to_forgetgate, b=b_forgetgate)
                cell = lasagne.layers.Gate(W_in=W_in_to_cell, W_hid=W_hid_to_cell, W_cell=None, b=b_cell, nonlinearity=lasagne.nonlinearities.tanh)
                l_lstm = lasagne_lstm_nooutput(l_emb, params.layersize, ingate = ingate, forgetgate = forgetgate,
                                  cell = cell, peepholes=params.usepeep, learn_init=False, mask_input = l_mask)

        l_out = lasagne.layers.SliceLayer(l_lstm, -1, 1)

        embg = lasagne.layers.get_output(l_out, {l_in:g1batchindices, l_mask:g1mask})

        l_in2 = lasagne.layers.InputLayer((None, We.get_value().shape[1]))
        l_sigmoid = lasagne.layers.DenseLayer(l_in2, params.memsize, nonlinearity=lasagne.nonlinearities.sigmoid)

        l_softmax = lasagne.layers.DenseLayer(l_sigmoid, 2, nonlinearity=T.nnet.softmax)
        X = lasagne.layers.get_output(l_softmax, {l_in2:embg})
        cost = T.nnet.categorical_crossentropy(X,scores)
        prediction = T.argmax(X, axis=1)

        self.network_params = lasagne.layers.get_all_params(l_out, trainable=True) + lasagne.layers.get_all_params(l_softmax, trainable=True)
        self.network_params.pop(0)
        self.all_params = lasagne.layers.get_all_params(l_out, trainable=True) + lasagne.layers.get_all_params(l_softmax, trainable=True)

        reg = self.getRegTerm(params, We, initial_We, l_out, l_softmax, p)
        self.trainable = self.getTrainableParams(params)
        cost = T.mean(cost) + reg

        self.feedforward_function = theano.function([g1batchindices,g1mask], embg)
        self.scoring_function = theano.function([g1batchindices,
                             g1mask],prediction)
        self.cost_function = theano.function([scores, g1batchindices,
                             g1mask], cost)

        grads = theano.gradient.grad(cost, self.trainable)
        if params.clip:
            grads = [lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads]
        updates = params.learner(grads, self.trainable, params.eta)
        self.train_function = theano.function([scores, g1batchindices,
                             g1mask], cost, updates=updates)
Exemplo n.º 6
0
    def __init__(self, We_initial, params):

        initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype=config.floatX))

        g1batchindices = T.imatrix()
        g2batchindices = T.imatrix()
        p1batchindices = T.imatrix()
        p2batchindices = T.imatrix()
        g1mask = T.matrix()
        g2mask = T.matrix()
        p1mask = T.matrix()
        p2mask = T.matrix()

        l_in = lasagne.layers.InputLayer((None, None, 1))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(
            l_in,
            input_size=We.get_value().shape[0],
            output_size=We.get_value().shape[1],
            W=We)
        l_lstm = None
        if params.outgate:
            l_lstm = lasagne.layers.LSTMLayer(l_emb,
                                              params.layersize,
                                              peepholes=params.peephole,
                                              learn_init=False,
                                              mask_input=l_mask)
        else:
            l_lstm = lasagne_lstm_nooutput(l_emb,
                                           params.layersize,
                                           peepholes=params.peephole,
                                           learn_init=False,
                                           mask_input=l_mask)
        l_out = lasagne.layers.SliceLayer(l_lstm, -1, 1)

        embg1 = lasagne.layers.get_output(l_out, {
            l_in: g1batchindices,
            l_mask: g1mask
        })
        embg2 = lasagne.layers.get_output(l_out, {
            l_in: g2batchindices,
            l_mask: g2mask
        })
        embp1 = lasagne.layers.get_output(l_out, {
            l_in: p1batchindices,
            l_mask: p1mask
        })
        embp2 = lasagne.layers.get_output(l_out, {
            l_in: p2batchindices,
            l_mask: p2mask
        })

        g1g2 = (embg1 * embg2).sum(axis=1)
        g1g2norm = T.sqrt(T.sum(embg1**2, axis=1)) * T.sqrt(
            T.sum(embg2**2, axis=1))
        g1g2 = g1g2 / g1g2norm

        p1g1 = (embp1 * embg1).sum(axis=1)
        p1g1norm = T.sqrt(T.sum(embp1**2, axis=1)) * T.sqrt(
            T.sum(embg1**2, axis=1))
        p1g1 = p1g1 / p1g1norm

        p2g2 = (embp2 * embg2).sum(axis=1)
        p2g2norm = T.sqrt(T.sum(embp2**2, axis=1)) * T.sqrt(
            T.sum(embg2**2, axis=1))
        p2g2 = p2g2 / p2g2norm

        costp1g1 = params.margin - g1g2 + p1g1
        costp1g1 = costp1g1 * (costp1g1 > 0)

        costp2g2 = params.margin - g1g2 + p2g2
        costp2g2 = costp2g2 * (costp2g2 > 0)

        cost = costp1g1 + costp2g2
        network_params = lasagne.layers.get_all_params(l_lstm, trainable=True)
        network_params.pop(0)
        self.all_params = lasagne.layers.get_all_params(l_lstm, trainable=True)

        l2 = 0.5 * params.LC * sum(
            lasagne.regularization.l2(x) for x in network_params)
        if params.updatewords:
            word_reg = 0.5 * params.LW * lasagne.regularization.l2(We -
                                                                   initial_We)
            cost = T.mean(cost) + l2 + word_reg
        else:
            cost = T.mean(cost) + l2

        self.feedforward_function = theano.function([g1batchindices, g1mask],
                                                    embg1)
        self.cost_function = theano.function([
            g1batchindices, g2batchindices, p1batchindices, p2batchindices,
            g1mask, g2mask, p1mask, p2mask
        ], cost)

        prediction = g1g2

        self.scoring_function = theano.function(
            [g1batchindices, g2batchindices, g1mask, g2mask], prediction)

        self.train_function = None
        if params.updatewords:
            grads = theano.gradient.grad(cost, self.all_params)
            if params.clip:
                grads = [
                    lasagne.updates.norm_constraint(grad, params.clip,
                                                    range(grad.ndim))
                    for grad in grads
                ]
            updates = params.learner(grads, self.all_params, params.eta)
            self.train_function = theano.function([
                g1batchindices, g2batchindices, p1batchindices, p2batchindices,
                g1mask, g2mask, p1mask, p2mask
            ],
                                                  cost,
                                                  updates=updates)
        else:
            self.all_params = network_params
            grads = theano.gradient.grad(cost, self.all_params)
            if params.clip:
                grads = [
                    lasagne.updates.norm_constraint(grad, params.clip,
                                                    range(grad.ndim))
                    for grad in grads
                ]
            updates = params.learner(grads, self.all_params, params.eta)
            self.train_function = theano.function([
                g1batchindices, g2batchindices, p1batchindices, p2batchindices,
                g1mask, g2mask, p1mask, p2mask
            ],
                                                  cost,
                                                  updates=updates)
Exemplo n.º 7
0
    def __init__(self, params, data):

        self.get_pos_map(data)
        self.cap = params.cap
        self.lowercase = params.lowercase
        self.featuretype = params.featuretype

        chardim = params.chardim  #dimension of character network layer
        worddim = params.worddim  #dimension of character embedding and word LSTM layer

        if not params.nntype == "charagram":
            self.chars = self.get_character_dict(data)
            Ce = lasagne.init.Uniform(range=0.5 / len(self.chars))
            Ce_np = Ce.sample((len(self.chars), params.worddim))
            Ce = theano.shared(np.asarray(Ce_np, dtype=config.floatX))

        char = T.imatrix()
        charmask = T.matrix()
        word = T.imatrix()
        wordmask = T.matrix()

        idxs = T.ivector()
        Y = T.matrix()

        l_in_char = lasagne.layers.InputLayer((None, None))
        if params.nntype == "charlstm":
            l_mask_char = lasagne.layers.InputLayer(shape=(None, None))
            l_emb_char = lasagne.layers.EmbeddingLayer(
                l_in_char,
                input_size=Ce.get_value().shape[0],
                output_size=Ce.get_value().shape[1],
                W=Ce)
            l_lstm_char = lasagne.layers.LSTMLayer(l_emb_char,
                                                   chardim,
                                                   peepholes=True,
                                                   learn_init=False,
                                                   mask_input=l_mask_char)
            if not params.outgate:
                l_lstm_char = lasagne_lstm_nooutput(l_emb_char,
                                                    chardim,
                                                    peepholes=True,
                                                    learn_init=False,
                                                    mask_input=l_mask_char)
            l_We = lasagne.layers.SliceLayer(l_lstm_char, -1, 1)
            We = lasagne.layers.get_output(l_We, {
                l_in_char: char,
                l_mask_char: charmask
            })
        elif params.nntype == "charagram":
            char = T.matrix()
            self.featuremap = self.get_feature_map(data, params.featuretype,
                                                   params.cutoff,
                                                   params.lowercase)
            print "Number of features: ", len(self.featuremap)

            l_in_char = lasagne.layers.InputLayer(
                (None, len(self.featuremap) + 1))
            if self.cap:
                l_in_char = lasagne.layers.InputLayer(
                    (None, len(self.featuremap) + 2))
            l_1 = lasagne.layers.DenseLayer(l_in_char,
                                            chardim,
                                            nonlinearity=params.act)
            if params.numlayers == 1:
                l_We = lasagne.layers.DenseLayer(l_in_char,
                                                 chardim,
                                                 nonlinearity=params.act)
            elif params.numlayers == 2:
                l_We = lasagne.layers.DenseLayer(l_1,
                                                 chardim,
                                                 nonlinearity=params.act)
            else:
                raise ValueError('Only 1-2 layers are supported currently.')
            We = lasagne.layers.get_output(l_We, {l_in_char: char})
        elif params.nntype == "charcnn":
            l_emb_char = lasagne.layers.EmbeddingLayer(
                l_in_char,
                input_size=Ce.get_value().shape[0],
                output_size=Ce.get_value().shape[1],
                W=Ce)
            emb = lasagne.layers.DimshuffleLayer(l_emb_char, (0, 2, 1))
            conv_params = None
            if params.conv_type == 1:
                conv_params = [(175, 2), (175, 3), (175, 4)]
            else:
                conv_params = [(25, 1), (50, 2), (75, 3), (100, 4), (125, 5),
                               (150, 6)]
            layers = []
            for num_filters, filter_size in conv_params:
                conv = lasagne.layers.Conv1DLayer(emb,
                                                  num_filters,
                                                  filter_size,
                                                  nonlinearity=params.act)
                pl = lasagne.layers.GlobalPoolLayer(conv, theano.tensor.max)
                pl = lasagne.layers.FlattenLayer(pl)
                layers.append(pl)
            concat = lasagne.layers.ConcatLayer(layers)
            l_We = lasagne.layers.DenseLayer(concat,
                                             num_units=chardim,
                                             nonlinearity=params.act)
            We = lasagne.layers.get_output(l_We, {l_in_char: char})
        else:
            l_We = None
            We = None

        l_in_word = lasagne.layers.InputLayer((None, None))
        l_mask_word = lasagne.layers.InputLayer(shape=(None, None))
        l_emb_word = lasagne_embedding_layer_2(l_in_word, chardim, We)

        l_lstm_wordf = lasagne.layers.LSTMLayer(l_emb_word,
                                                worddim,
                                                peepholes=True,
                                                learn_init=False,
                                                mask_input=l_mask_word)
        l_lstm_wordb = lasagne.layers.LSTMLayer(l_emb_word,
                                                worddim,
                                                peepholes=True,
                                                learn_init=False,
                                                mask_input=l_mask_word,
                                                backwards=True)

        l_reshapef = lasagne.layers.ReshapeLayer(l_lstm_wordf, (-1, worddim))
        l_reshapeb = lasagne.layers.ReshapeLayer(l_lstm_wordb, (-1, worddim))
        concat2 = lasagne.layers.ConcatLayer([l_reshapef, l_reshapeb])
        l_emb = lasagne.layers.DenseLayer(
            concat2,
            num_units=worddim,
            nonlinearity=lasagne.nonlinearities.tanh)
        l_out = lasagne.layers.DenseLayer(
            l_emb,
            num_units=len(self.tags),
            nonlinearity=lasagne.nonlinearities.softmax)
        embg = lasagne.layers.get_output(l_out, {
            l_in_word: word,
            l_mask_word: wordmask
        })

        embg = embg[idxs]
        prediction = T.argmax(embg, axis=1)

        self.all_params = lasagne.layers.get_all_params(
            l_out, trainable=True) + lasagne.layers.get_all_params(
                l_We, trainable=True)
        reg = 0.5 * params.LC * sum(
            lasagne.regularization.l2(x) for x in self.all_params)

        cost = T.nnet.categorical_crossentropy(embg, Y)
        cost = T.mean(cost) + reg

        self.feedforward_function = None
        self.scoring_function = None
        self.cost_function = None
        self.train_function = None

        if params.nntype == "charlstm":
            self.feedforward_function = theano.function(
                [char, charmask, word, wordmask, idxs], embg)
            self.scoring_function = theano.function(
                [char, charmask, word, wordmask, idxs], prediction)
            self.cost_function = theano.function(
                [char, charmask, word, wordmask, idxs, Y], cost)
            grads = theano.gradient.grad(cost, self.all_params)
            updates = lasagne.updates.momentum(
                grads, self.all_params, 0.2,
                momentum=0.95)  #same as Ling et al.
            self.train_function = theano.function(
                [char, charmask, word, wordmask, idxs, Y],
                cost,
                updates=updates)
        elif params.nntype == "charcnn" or params.nntype == "charagram":
            self.feedforward_function = theano.function(
                [char, word, wordmask, idxs], embg)
            self.scoring_function = theano.function(
                [char, word, wordmask, idxs], prediction)
            self.cost_function = theano.function(
                [char, word, wordmask, idxs, Y], cost)
            grads = theano.gradient.grad(cost, self.all_params)
            updates = lasagne.updates.momentum(
                grads, self.all_params, 0.2,
                momentum=0.95)  #same as Ling et al.
            self.train_function = theano.function(
                [char, word, wordmask, idxs, Y], cost, updates=updates)
Exemplo n.º 8
0
    def __init__(self, We_initial, params):

        if params.maxval:
            self.nout = params.maxval - params.minval + 1

        p = None
        if params.traintype == "reg" or params.traintype == "rep":
            p = cPickle.load(file(params.regfile, 'rb'))
            print p
            #contains [<TensorType(float64, matrix)>,
            # W_in_to_ingate, W_hid_to_ingate, b_ingate, W_in_to_forgetgate,
            # W_hid_to_forgetgate, b_forgetgate, W_in_to_cell, W_hid_to_cell,
            # b_cell, W_in_to_outgate, W_hid_to_outgate, b_outgate, W_cell_to_ingate,
            # W_cell_to_forgetgate, W_cell_to_outgate]

        if params.traintype == "reg":
            print "regularizing to parameters"

        if params.traintype == "rep":
            print "not updating embeddings"

        #params
        initial_We = theano.shared(np.asarray(We_initial, dtype = config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype = config.floatX))

        if params.traintype == "reg":
            initial_We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX))
            We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX))
            updatewords = True

        if params.traintype == "rep":
            We = theano.shared(np.asarray(p[0].get_value(), dtype = config.floatX))
            updatewords = False

        #symbolic params
        g1batchindices = T.imatrix(); g2batchindices = T.imatrix()
        g1mask = T.matrix(); g2mask = T.matrix()
        scores = T.matrix()

        l_in = lasagne.layers.InputLayer((None, None, 1))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(l_in, input_size=We.get_value().shape[0], output_size=We.get_value().shape[1], W=We)
        l_lstm = None
        if params.useoutgate:
            l_lstm = lasagne.layers.LSTMLayer(l_emb, params.layersize, peepholes=params.usepeep, learn_init=False, mask_input = l_mask)
        else:
            l_lstm = lasagne_lstm_nooutput(l_emb, params.layersize, peepholes=params.usepeep, learn_init=False, mask_input = l_mask)

        if params.traintype == "reg" or params.traintype == "rep":
            if params.useoutgate:
                W_in_to_ingate = np.asarray(p[1].get_value(), dtype = config.floatX)
                W_hid_to_ingate = np.asarray(p[2].get_value(), dtype = config.floatX)
                b_ingate = np.asarray(p[3].get_value(), dtype = config.floatX)
                W_in_to_forgetgate = np.asarray(p[4].get_value(), dtype = config.floatX)
                W_hid_to_forgetgate = np.asarray(p[5].get_value(), dtype = config.floatX)
                b_forgetgate = np.asarray(p[6].get_value(), dtype = config.floatX)
                W_in_to_cell = np.asarray(p[7].get_value(), dtype = config.floatX)
                W_hid_to_cell = np.asarray(p[8].get_value(), dtype = config.floatX)
                b_cell = np.asarray(p[9].get_value(), dtype = config.floatX)
                W_in_to_outgate = np.asarray(p[10].get_value(), dtype = config.floatX)
                W_hid_to_outgate = np.asarray(p[11].get_value(), dtype = config.floatX)
                b_outgate = np.asarray(p[12].get_value(), dtype = config.floatX)
                W_cell_to_ingate = np.asarray(p[13].get_value(), dtype = config.floatX)
                W_cell_to_forgetgate = np.asarray(p[14].get_value(), dtype = config.floatX)
                W_cell_to_outgate = np.asarray(p[15].get_value(), dtype = config.floatX)

                ingate = lasagne.layers.Gate(W_in=W_in_to_ingate, W_hid=W_hid_to_ingate, W_cell=W_cell_to_ingate, b=b_ingate)
                forgetgate = lasagne.layers.Gate(W_in=W_in_to_forgetgate, W_hid=W_hid_to_forgetgate, W_cell=W_cell_to_forgetgate, b=b_forgetgate)
                outgate = lasagne.layers.Gate(W_in=W_in_to_outgate, W_hid=W_hid_to_outgate, W_cell=W_cell_to_outgate, b=b_outgate)
                cell = lasagne.layers.Gate(W_in=W_in_to_cell, W_hid=W_hid_to_cell, W_cell=None, b=b_cell, nonlinearity=lasagne.nonlinearities.tanh)
                l_lstm = lasagne.layers.LSTMLayer(l_emb, params.layersize, ingate = ingate, forgetgate = forgetgate,
                                  outgate = outgate, cell = cell, peepholes=params.usepeep, learn_init=False, mask_input = l_mask)
            else:
                W_in_to_ingate = np.asarray(p[1].get_value(), dtype = config.floatX)
                W_hid_to_ingate = np.asarray(p[2].get_value(), dtype = config.floatX)
                b_ingate = np.asarray(p[3].get_value(), dtype = config.floatX)
                W_in_to_forgetgate = np.asarray(p[4].get_value(), dtype = config.floatX)
                W_hid_to_forgetgate = np.asarray(p[5].get_value(), dtype = config.floatX)
                b_forgetgate = np.asarray(p[6].get_value(), dtype = config.floatX)
                W_in_to_cell = np.asarray(p[7].get_value(), dtype = config.floatX)
                W_hid_to_cell = np.asarray(p[8].get_value(), dtype = config.floatX)
                b_cell = np.asarray(p[9].get_value(), dtype = config.floatX)
                W_cell_to_ingate = np.asarray(p[10].get_value(), dtype = config.floatX)
                W_cell_to_forgetgate = np.asarray(p[11].get_value(), dtype = config.floatX)

                ingate = lasagne.layers.Gate(W_in=W_in_to_ingate, W_hid=W_hid_to_ingate, W_cell=W_cell_to_ingate, b=b_ingate)
                forgetgate = lasagne.layers.Gate(W_in=W_in_to_forgetgate, W_hid=W_hid_to_forgetgate, W_cell=W_cell_to_forgetgate, b=b_forgetgate)
                cell = lasagne.layers.Gate(W_in=W_in_to_cell, W_hid=W_hid_to_cell, W_cell=None, b=b_cell, nonlinearity=lasagne.nonlinearities.tanh)
                l_lstm = lasagne_lstm_nooutput(l_emb, params.layersize, ingate = ingate, forgetgate = forgetgate,
                                  cell = cell, peepholes=params.usepeep, learn_init=False, mask_input = l_mask)


        l_out = lasagne.layers.SliceLayer(l_lstm, -1, 1)

        embg1 = lasagne.layers.get_output(l_out, {l_in:g1batchindices, l_mask:g1mask})
        embg2 = lasagne.layers.get_output(l_out, {l_in:g2batchindices, l_mask:g2mask})

        g1_dot_g2 = embg1*embg2
        g1_abs_g2 = abs(embg1-embg2)

        lin_dot = lasagne.layers.InputLayer((None, params.layersize))
        lin_abs = lasagne.layers.InputLayer((None, params.layersize))
        l_sum = lasagne.layers.ConcatLayer([lin_dot, lin_abs])
        l_sigmoid = lasagne.layers.DenseLayer(l_sum, params.memsize, nonlinearity=lasagne.nonlinearities.sigmoid)

        if params.task == "sim":
            l_softmax = lasagne.layers.DenseLayer(l_sigmoid, self.nout, nonlinearity=T.nnet.softmax)
            X = lasagne.layers.get_output(l_softmax, {lin_dot:g1_dot_g2, lin_abs:g1_abs_g2})
            Y = T.log(X)

            cost = scores*(T.log(scores) - Y)
            cost = cost.sum(axis=1)/(float(self.nout))

            prediction = 0.
            i = params.minval
            while i<= params.maxval:
                prediction = prediction + i*X[:,i-1]
                i += 1
        elif params.task == "ent":
            l_softmax = lasagne.layers.DenseLayer(l_sigmoid, 3, nonlinearity=T.nnet.softmax)
            X = lasagne.layers.get_output(l_softmax, {lin_dot:g1_dot_g2, lin_abs:g1_abs_g2})

            cost = theano.tensor.nnet.categorical_crossentropy(X,scores)

            prediction = T.argmax(X, axis=1)
        else:
            raise ValueError('Params.task not set correctly.')

        self.network_params = lasagne.layers.get_all_params(l_out, trainable=True) + lasagne.layers.get_all_params(l_softmax, trainable=True)
        self.network_params.pop(0)
        self.all_params = lasagne.layers.get_all_params(l_out, trainable=True) + lasagne.layers.get_all_params(l_softmax, trainable=True)

        reg = self.getRegTerm(params, We, initial_We, l_out, l_softmax, p)
        self.trainable = self.getTrainableParams(params)
        cost = T.mean(cost) + reg

        self.feedforward_function = theano.function([g1batchindices,g1mask], embg1)
        self.scoring_function = theano.function([g1batchindices, g2batchindices,
                             g1mask, g2mask],prediction)
        self.cost_function = theano.function([scores, g1batchindices, g2batchindices,
                             g1mask, g2mask], cost)

        grads = theano.gradient.grad(cost, self.trainable)
        if params.clip:
            grads = [lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads]
        updates = params.learner(grads, self.trainable, params.eta)
        self.train_function = theano.function([scores, g1batchindices, g2batchindices,
                             g1mask, g2mask], cost, updates=updates)
Exemplo n.º 9
0
    def __init__(self, We_initial, params):

        p = None

        if params.traintype == "reg" or params.traintype == "rep":
            p = cPickle.load(file(params.regfile, 'rb'))
            print p
            #contains [<TensorType(float64, matrix)>,
            # W_in_to_ingate, W_hid_to_ingate, b_ingate, W_in_to_forgetgate,
            # W_hid_to_forgetgate, b_forgetgate, W_in_to_cell, W_hid_to_cell,
            # b_cell, W_in_to_outgate, W_hid_to_outgate, b_outgate, W_cell_to_ingate,
            # W_cell_to_forgetgate, W_cell_to_outgate]

        if params.traintype == "reg":
            print "regularizing to parameters"

        if params.traintype == "rep":
            print "not updating embeddings"

        #params
        initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype=config.floatX))

        if params.traintype == "reg":
            initial_We = theano.shared(
                np.asarray(p[0].get_value(), dtype=config.floatX))
            We = theano.shared(
                np.asarray(p[0].get_value(), dtype=config.floatX))
            updatewords = True

        if params.traintype == "rep":
            We = theano.shared(
                np.asarray(p[0].get_value(), dtype=config.floatX))
            updatewords = False

        g1batchindices = T.imatrix()
        g1mask = T.matrix()
        scores = T.matrix()

        l_in = lasagne.layers.InputLayer((None, None, 1))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(
            l_in,
            input_size=We.get_value().shape[0],
            output_size=We.get_value().shape[1],
            W=We)
        l_lstm = None
        if params.useoutgate:
            l_lstm = lasagne.layers.LSTMLayer(l_emb,
                                              params.layersize,
                                              peepholes=params.usepeep,
                                              learn_init=False,
                                              mask_input=l_mask)
        else:
            l_lstm = lasagne_lstm_nooutput(l_emb,
                                           params.layersize,
                                           peepholes=params.usepeep,
                                           learn_init=False,
                                           mask_input=l_mask)

        if params.traintype == "reg" or params.traintype == "rep":
            if params.useoutgate:
                W_in_to_ingate = np.asarray(p[1].get_value(),
                                            dtype=config.floatX)
                W_hid_to_ingate = np.asarray(p[2].get_value(),
                                             dtype=config.floatX)
                b_ingate = np.asarray(p[3].get_value(), dtype=config.floatX)
                W_in_to_forgetgate = np.asarray(p[4].get_value(),
                                                dtype=config.floatX)
                W_hid_to_forgetgate = np.asarray(p[5].get_value(),
                                                 dtype=config.floatX)
                b_forgetgate = np.asarray(p[6].get_value(),
                                          dtype=config.floatX)
                W_in_to_cell = np.asarray(p[7].get_value(),
                                          dtype=config.floatX)
                W_hid_to_cell = np.asarray(p[8].get_value(),
                                           dtype=config.floatX)
                b_cell = np.asarray(p[9].get_value(), dtype=config.floatX)
                W_in_to_outgate = np.asarray(p[10].get_value(),
                                             dtype=config.floatX)
                W_hid_to_outgate = np.asarray(p[11].get_value(),
                                              dtype=config.floatX)
                b_outgate = np.asarray(p[12].get_value(), dtype=config.floatX)
                W_cell_to_ingate = np.asarray(p[13].get_value(),
                                              dtype=config.floatX)
                W_cell_to_forgetgate = np.asarray(p[14].get_value(),
                                                  dtype=config.floatX)
                W_cell_to_outgate = np.asarray(p[15].get_value(),
                                               dtype=config.floatX)

                ingate = lasagne.layers.Gate(W_in=W_in_to_ingate,
                                             W_hid=W_hid_to_ingate,
                                             W_cell=W_cell_to_ingate,
                                             b=b_ingate)
                forgetgate = lasagne.layers.Gate(W_in=W_in_to_forgetgate,
                                                 W_hid=W_hid_to_forgetgate,
                                                 W_cell=W_cell_to_forgetgate,
                                                 b=b_forgetgate)
                outgate = lasagne.layers.Gate(W_in=W_in_to_outgate,
                                              W_hid=W_hid_to_outgate,
                                              W_cell=W_cell_to_outgate,
                                              b=b_outgate)
                cell = lasagne.layers.Gate(
                    W_in=W_in_to_cell,
                    W_hid=W_hid_to_cell,
                    W_cell=None,
                    b=b_cell,
                    nonlinearity=lasagne.nonlinearities.tanh)
                l_lstm = lasagne.layers.LSTMLayer(l_emb,
                                                  params.layersize,
                                                  ingate=ingate,
                                                  forgetgate=forgetgate,
                                                  outgate=outgate,
                                                  cell=cell,
                                                  peepholes=params.usepeep,
                                                  learn_init=False,
                                                  mask_input=l_mask)
            else:
                W_in_to_ingate = np.asarray(p[1].get_value(),
                                            dtype=config.floatX)
                W_hid_to_ingate = np.asarray(p[2].get_value(),
                                             dtype=config.floatX)
                b_ingate = np.asarray(p[3].get_value(), dtype=config.floatX)
                W_in_to_forgetgate = np.asarray(p[4].get_value(),
                                                dtype=config.floatX)
                W_hid_to_forgetgate = np.asarray(p[5].get_value(),
                                                 dtype=config.floatX)
                b_forgetgate = np.asarray(p[6].get_value(),
                                          dtype=config.floatX)
                W_in_to_cell = np.asarray(p[7].get_value(),
                                          dtype=config.floatX)
                W_hid_to_cell = np.asarray(p[8].get_value(),
                                           dtype=config.floatX)
                b_cell = np.asarray(p[9].get_value(), dtype=config.floatX)
                W_cell_to_ingate = np.asarray(p[10].get_value(),
                                              dtype=config.floatX)
                W_cell_to_forgetgate = np.asarray(p[11].get_value(),
                                                  dtype=config.floatX)

                ingate = lasagne.layers.Gate(W_in=W_in_to_ingate,
                                             W_hid=W_hid_to_ingate,
                                             W_cell=W_cell_to_ingate,
                                             b=b_ingate)
                forgetgate = lasagne.layers.Gate(W_in=W_in_to_forgetgate,
                                                 W_hid=W_hid_to_forgetgate,
                                                 W_cell=W_cell_to_forgetgate,
                                                 b=b_forgetgate)
                cell = lasagne.layers.Gate(
                    W_in=W_in_to_cell,
                    W_hid=W_hid_to_cell,
                    W_cell=None,
                    b=b_cell,
                    nonlinearity=lasagne.nonlinearities.tanh)
                l_lstm = lasagne_lstm_nooutput(l_emb,
                                               params.layersize,
                                               ingate=ingate,
                                               forgetgate=forgetgate,
                                               cell=cell,
                                               peepholes=params.usepeep,
                                               learn_init=False,
                                               mask_input=l_mask)

        l_out = lasagne.layers.SliceLayer(l_lstm, -1, 1)

        embg = lasagne.layers.get_output(l_out, {
            l_in: g1batchindices,
            l_mask: g1mask
        })

        l_in2 = lasagne.layers.InputLayer((None, We.get_value().shape[1]))
        l_sigmoid = lasagne.layers.DenseLayer(
            l_in2, params.memsize, nonlinearity=lasagne.nonlinearities.sigmoid)

        l_softmax = lasagne.layers.DenseLayer(l_sigmoid,
                                              2,
                                              nonlinearity=T.nnet.softmax)
        X = lasagne.layers.get_output(l_softmax, {l_in2: embg})
        cost = T.nnet.categorical_crossentropy(X, scores)
        prediction = T.argmax(X, axis=1)

        self.network_params = lasagne.layers.get_all_params(
            l_out, trainable=True) + lasagne.layers.get_all_params(
                l_softmax, trainable=True)
        self.network_params.pop(0)
        self.all_params = lasagne.layers.get_all_params(
            l_out, trainable=True) + lasagne.layers.get_all_params(
                l_softmax, trainable=True)

        reg = self.getRegTerm(params, We, initial_We, l_out, l_softmax, p)
        self.trainable = self.getTrainableParams(params)
        cost = T.mean(cost) + reg

        self.feedforward_function = theano.function([g1batchindices, g1mask],
                                                    embg)
        self.scoring_function = theano.function([g1batchindices, g1mask],
                                                prediction)
        self.cost_function = theano.function([scores, g1batchindices, g1mask],
                                             cost)

        grads = theano.gradient.grad(cost, self.trainable)
        if params.clip:
            grads = [
                lasagne.updates.norm_constraint(grad, params.clip,
                                                range(grad.ndim))
                for grad in grads
            ]
        updates = params.learner(grads, self.trainable, params.eta)
        self.train_function = theano.function([scores, g1batchindices, g1mask],
                                              cost,
                                              updates=updates)
Exemplo n.º 10
0
    def __init__(self, We_initial, params):

        initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype=config.floatX))

        g1batchindices = T.imatrix()
        g2batchindices = T.imatrix()
        p1batchindices = T.imatrix()
        p2batchindices = T.imatrix()
        g1mask = T.matrix()
        g2mask = T.matrix()
        p1mask = T.matrix()
        p2mask = T.matrix()

        l_in = lasagne.layers.InputLayer((None, None, 1))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(l_in, input_size=We.get_value().shape[0],
                                              output_size=We.get_value().shape[1], W=We)
        l_lstm = None
        if params.outgate:
            l_lstm = lasagne.layers.LSTMLayer(l_emb, params.layersize, peepholes=params.peephole, learn_init=False,
                                              mask_input=l_mask)
        else:
            l_lstm = lasagne_lstm_nooutput(l_emb, params.layersize, peepholes=params.peephole, learn_init=False,
                                       mask_input=l_mask)
        l_out = lasagne.layers.SliceLayer(l_lstm, -1, 1)

        embg1 = lasagne.layers.get_output(l_out, {l_in: g1batchindices, l_mask: g1mask})
        embg2 = lasagne.layers.get_output(l_out, {l_in: g2batchindices, l_mask: g2mask})
        embp1 = lasagne.layers.get_output(l_out, {l_in: p1batchindices, l_mask: p1mask})
        embp2 = lasagne.layers.get_output(l_out, {l_in: p2batchindices, l_mask: p2mask})

        g1g2 = (embg1 * embg2).sum(axis=1)
        g1g2norm = T.sqrt(T.sum(embg1 ** 2, axis=1)) * T.sqrt(T.sum(embg2 ** 2, axis=1))
        g1g2 = g1g2 / g1g2norm

        p1g1 = (embp1 * embg1).sum(axis=1)
        p1g1norm = T.sqrt(T.sum(embp1 ** 2, axis=1)) * T.sqrt(T.sum(embg1 ** 2, axis=1))
        p1g1 = p1g1 / p1g1norm

        p2g2 = (embp2 * embg2).sum(axis=1)
        p2g2norm = T.sqrt(T.sum(embp2 ** 2, axis=1)) * T.sqrt(T.sum(embg2 ** 2, axis=1))
        p2g2 = p2g2 / p2g2norm

        costp1g1 = params.margin - g1g2 + p1g1
        costp1g1 = costp1g1 * (costp1g1 > 0)

        costp2g2 = params.margin - g1g2 + p2g2
        costp2g2 = costp2g2 * (costp2g2 > 0)

        cost = costp1g1 + costp2g2
        network_params = lasagne.layers.get_all_params(l_lstm, trainable=True)
        network_params.pop(0)
        self.all_params = lasagne.layers.get_all_params(l_lstm, trainable=True)

        l2 = 0.5 * params.LC * sum(lasagne.regularization.l2(x) for x in network_params)
        if params.updatewords:
            word_reg = 0.5 * params.LW * lasagne.regularization.l2(We - initial_We)
            cost = T.mean(cost) + l2 + word_reg
        else:
            cost = T.mean(cost) + l2

        self.feedforward_function = theano.function([g1batchindices, g1mask], embg1)
        self.cost_function = theano.function([g1batchindices, g2batchindices, p1batchindices, p2batchindices,
                                              g1mask, g2mask, p1mask, p2mask], cost)

        prediction = g1g2

        self.scoring_function = theano.function([g1batchindices, g2batchindices,
                                                 g1mask, g2mask], prediction)

        self.train_function = None
        if params.updatewords:
            grads = theano.gradient.grad(cost, self.all_params)
            if params.clip:
                grads = [lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads]
            updates = params.learner(grads, self.all_params, params.eta)
            self.train_function = theano.function([g1batchindices, g2batchindices, p1batchindices, p2batchindices,
                                                   g1mask, g2mask, p1mask, p2mask], cost, updates=updates)
        else:
            self.all_params = network_params
            grads = theano.gradient.grad(cost, self.all_params)
            if params.clip:
                grads = [lasagne.updates.norm_constraint(grad, params.clip, range(grad.ndim)) for grad in grads]
            updates = params.learner(grads, self.all_params, params.eta)
            self.train_function = theano.function([g1batchindices, g2batchindices, p1batchindices, p2batchindices,
                                                   g1mask, g2mask, p1mask, p2mask], cost, updates=updates)