示例#1
0
    def __init__(self,
                 n_words=1000,
                 n_embedding=100,
                 lr=0.01,
                 margin=0.1,
                 momentum=0.9,
                 word_to_id=None):
        self.n_embedding = n_embedding
        self.n_lstm_embed = n_embedding
        self.word_embed = n_embedding
        self.lr = lr
        self.momentum = momentum
        self.margin = margin
        self.n_words = n_words
        self.n_D = 3 * self.n_words + 3

        self.word_to_id = word_to_id
        self.id_to_word = dict((v, k) for k, v in word_to_id.iteritems())

        # Question
        x = T.vector('x')
        phi_x = T.vector('phi_x')

        # True statements
        phi_f1_1 = T.vector('phi_f1_1')
        phi_f2_1 = T.vector('phi_f2_1')

        # False statements
        phi_f1_2 = T.vector('phi_f1_2')
        phi_f2_2 = T.vector('phi_f2_2')

        # Supporting memories
        m0 = T.vector('m0')
        m1 = T.vector('m1')
        phi_m0 = T.vector('phi_m0')
        phi_m1 = T.vector('phi_m1')

        # True word
        r = T.vector('r')

        # Word sequence
        words = T.ivector('words')

        # Scoring function
        self.U_O = init_shared_normal(n_embedding, self.n_D, 0.01)

        # Word embedding
        self.L = glorot_uniform((self.n_words, self.word_embed))
        self.Lprime = glorot_uniform((self.n_words, self.n_lstm_embed))

        # LSTM
        self.W_i = glorot_uniform((self.word_embed, self.n_lstm_embed))
        self.U_i = orthogonal((self.n_lstm_embed, self.n_lstm_embed))
        self.b_i = shared_zeros((self.n_lstm_embed))

        self.W_f = glorot_uniform((self.word_embed, self.n_lstm_embed))
        self.U_f = orthogonal((self.n_lstm_embed, self.n_lstm_embed))
        self.b_f = shared_zeros((self.n_lstm_embed))

        self.W_c = glorot_uniform((self.word_embed, self.n_lstm_embed))
        self.U_c = orthogonal((self.n_lstm_embed, self.n_lstm_embed))
        self.b_c = shared_zeros((self.n_lstm_embed))

        self.W_o = glorot_uniform((self.word_embed, self.n_lstm_embed))
        self.U_o = orthogonal((self.n_lstm_embed, self.n_lstm_embed))
        self.b_o = shared_zeros((self.n_lstm_embed))

        mem_cost = self.calc_cost(phi_x, phi_f1_1, phi_f1_2, phi_f2_1,
                                  phi_f2_2, phi_m0)

        lstm_output = self.lstm_cost(words)
        self.predict_function_r = theano.function(inputs=[words],
                                                  outputs=lstm_output,
                                                  allow_input_downcast=True)

        lstm_cost = -T.sum(T.mul(r, T.log(lstm_output)))

        cost = mem_cost + lstm_cost

        params = [
            self.U_O, self.W_i, self.U_i, self.b_i, self.W_f, self.U_f,
            self.b_f, self.W_c, self.U_c, self.b_c, self.W_o, self.U_o,
            self.b_o, self.L, self.Lprime
        ]

        grads = T.grad(cost, params)

        # Parameter updates
        updates = self.get_updates(params, grads, method='adagrad')

        l_rate = T.scalar('l_rate')

        # Theano functions
        self.train_function = theano.function(
            inputs=[
                phi_x, phi_f1_1, phi_f1_2, phi_f2_1, phi_f2_2, phi_m0, r,
                words,
                theano.Param(l_rate, default=self.lr)
            ],
            outputs=cost,
            updates=updates,
            on_unused_input='warn',
            allow_input_downcast=True,
        )
        #mode='FAST_COMPILE')
        #mode='DebugMode')
        #mode=theano.compile.MonitorMode(pre_func=inspect_inputs,post_func=inspect_outputs))

        # Candidate statement for prediction
        phi_f = T.vector('phi_f')

        score_o = self.calc_score_o(phi_x, phi_f)
        self.predict_function_o = theano.function(inputs=[phi_x, phi_f],
                                                  outputs=score_o)
示例#2
0
def orthogonal_init(shape, dim_ordering='tf', name=None):
    return orthogonal(shape, name=name, dim_ordering=dim_ordering)
示例#3
0
    def __init__(self, n_words=1000, n_embedding=100, lr=0.01, margin=0.1, momentum=0.9, word_to_id=None):
        self.n_embedding = n_embedding
        self.n_lstm_embed = n_embedding
        self.word_embed = n_embedding
        self.lr = lr
        self.momentum = momentum
        self.margin = margin
        self.n_words = n_words
        self.n_D = 3 * self.n_words + 3

        self.word_to_id = word_to_id
        self.id_to_word = dict((v, k) for k, v in word_to_id.iteritems())

        # Question
        x = T.vector('x')
        phi_x = T.vector('phi_x')

        # True statements
        phi_f1_1 = T.vector('phi_f1_1')
        phi_f2_1 = T.vector('phi_f2_1')

        # False statements
        phi_f1_2 = T.vector('phi_f1_2')
        phi_f2_2 = T.vector('phi_f2_2')

        # Supporting memories
        m0 = T.vector('m0')
        m1 = T.vector('m1')
        phi_m0 = T.vector('phi_m0')
        phi_m1 = T.vector('phi_m1')

        # True word
        r = T.vector('r')

        # Word sequence
        words = T.ivector('words')

        # Scoring function
        self.U_O = init_shared_normal(n_embedding, self.n_D, 0.01)

        # Word embedding
        self.L = glorot_uniform((self.n_words, self.word_embed))
        self.Lprime = glorot_uniform((self.n_words, self.n_lstm_embed))

        # LSTM
        self.W_i = glorot_uniform((self.word_embed, self.n_lstm_embed))
        self.U_i = orthogonal((self.n_lstm_embed, self.n_lstm_embed))
        self.b_i = shared_zeros((self.n_lstm_embed))

        self.W_f = glorot_uniform((self.word_embed, self.n_lstm_embed))
        self.U_f = orthogonal((self.n_lstm_embed, self.n_lstm_embed))
        self.b_f = shared_zeros((self.n_lstm_embed))

        self.W_c = glorot_uniform((self.word_embed, self.n_lstm_embed))
        self.U_c = orthogonal((self.n_lstm_embed, self.n_lstm_embed))
        self.b_c = shared_zeros((self.n_lstm_embed))

        self.W_o = glorot_uniform((self.word_embed, self.n_lstm_embed))
        self.U_o = orthogonal((self.n_lstm_embed, self.n_lstm_embed))
        self.b_o = shared_zeros((self.n_lstm_embed))

        mem_cost = self.calc_cost(phi_x, phi_f1_1, phi_f1_2, phi_f2_1, phi_f2_2, phi_m0)

        lstm_output = self.lstm_cost(words)
        self.predict_function_r = theano.function(inputs = [words], outputs = lstm_output, allow_input_downcast=True)

        lstm_cost = -T.sum(T.mul(r, T.log(lstm_output)))

        cost = mem_cost + lstm_cost

        params = [
            self.U_O,
            self.W_i, self.U_i, self.b_i,
            self.W_f, self.U_f, self.b_f,
            self.W_c, self.U_c, self.b_c,
            self.W_o, self.U_o, self.b_o,
            self.L, self.Lprime
        ]

        grads = T.grad(cost, params)

        # Parameter updates
        updates = self.get_updates(params, grads, method='adagrad')

        l_rate = T.scalar('l_rate')

        # Theano functions
        self.train_function = theano.function(
            inputs = [phi_x, phi_f1_1, phi_f1_2, phi_f2_1, phi_f2_2,
                      phi_m0, r, words,
                      theano.Param(l_rate, default=self.lr)],
            outputs = cost,
            updates = updates,
            on_unused_input='warn',
            allow_input_downcast=True,
            )
            #mode='FAST_COMPILE')
            #mode='DebugMode')
            #mode=theano.compile.MonitorMode(pre_func=inspect_inputs,post_func=inspect_outputs))

        # Candidate statement for prediction
        phi_f = T.vector('phi_f')

        score_o = self.calc_score_o(phi_x, phi_f)
        self.predict_function_o = theano.function(inputs = [phi_x, phi_f], outputs = score_o)