예제 #1
0
    def _linear(self, x, h, bias_default=0.0):
        I, D = x.get_shape().as_list()[1], self._num_units
        w = weight("W", [I, D])
        u = weight("U", [D, D])
        b = bias("b", D, bias_default)

        if self.batch_norm:
            with tf.variable_scope("Linear1"):
                x_w = batch_norm(tf.matmul(x, w), is_training=self.is_training)
            with tf.variable_scope("Linear2"):
                h_u = batch_norm(tf.matmul(h, u), is_training=self.is_training)
            return x_w + h_u + b
        else:
            return tf.matmul(x, w) + tf.matmul(h, u) + b
예제 #2
0
    def _linear(self,
                x,
                h,
                bias_default=0.0
                ):  #this is to multiply the internal things with gates
        I, D = x.get_shape().as_list()[1], self._num_units
        w = weight('W', [I, D])
        u = weight('U', [D, D])
        b = bias('b', D, bias_default)

        if self.batch_normx:  #batch norm
            with tf.variable_scope('Linear1'):
                x_w = batch_norm(tf.matmul(x, w), is_training=self.is_training)
            with tf.variable_scope('Linear2'):
                h_u = batch_norm(tf.matmul(h, u), is_training=self.is_training)
            return x_w + h_u + b
        else:
            return tf.matmul(x, w) + tf.matmul(h, u) + b
예제 #3
0
    def build(self):
        params = self.params
        N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count
        V, d, A = params.glove_size, params.hidden_size, self.words.vocab_size

        # initialize self
        # placeholders
        input = tf.placeholder(tf.float32, shape=[N, L, V], name='x')  # [num_batch, sentence_len, glove_dim]
        question = tf.placeholder(tf.float32, shape=[N, Q, V], name='q')  # [num_batch, sentence_len, glove_dim]
        answer = tf.placeholder(tf.int64, shape=[N], name='y')  # [num_batch] - one word answer
        input_mask = tf.placeholder(tf.bool, shape=[N, L], name='x_mask')  # [num_batch, sentence_len]
        is_training = tf.placeholder(tf.bool)

        # Prepare parameters
        gru = rnn_cell.GRUCell(d)

        # Input module
        with tf.variable_scope('input') as scope:
            input_list = self.make_decoder_batch_input(input)
            input_states, _ = seq2seq.rnn_decoder(input_list, gru.zero_state(N, tf.float32), gru)

            # Question module
            scope.reuse_variables()

            ques_list = self.make_decoder_batch_input(question)
            questions, _ = seq2seq.rnn_decoder(ques_list, gru.zero_state(N, tf.float32), gru)
            question_vec = questions[-1]  # use final state

        # Masking: to extract fact vectors at end of sentence. (details in paper)
        input_states = tf.transpose(tf.pack(input_states), [1, 0, 2])  # [N, L, D]
        facts = []
        for n in range(N):
            filtered = tf.boolean_mask(input_states[n, :, :], input_mask[n, :])  # [?, D]
            padding = tf.zeros(tf.pack([F - tf.shape(filtered)[0], d]))
            facts.append(tf.concat(0, [filtered, padding]))  # [F, D]

        facked = tf.pack(facts)  # packing for transpose... I hate TF so much
        facts = tf.unpack(tf.transpose(facked, [1, 0, 2]), num=F)  # F x [N, D]

        # Episodic Memory
        with tf.variable_scope('episodic') as scope:
            episode = EpisodeModule(d, question_vec, facts)

            memory = tf.identity(question_vec)
            for t in range(params.memory_step):
                memory = gru(episode.new(memory), memory)[0]
                scope.reuse_variables()

        # Regularizations
        if params.batch_norm:
            memory = batch_norm(memory, is_training=is_training)
        memory = dropout(memory, params.keep_prob, is_training)

        with tf.name_scope('Answer'):
            # Answer module : feed-forward version (for it is one word answer)
            w_a = weight('w_a', [d, A])
            logits = tf.matmul(memory, w_a)  # [N, A]

        with tf.name_scope('Loss'):
            # Cross-Entropy loss
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, answer)
            loss = tf.reduce_mean(cross_entropy)
            total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2'))

        with tf.variable_scope('Accuracy'):
            # Accuracy
            predicts = tf.cast(tf.argmax(logits, 1), 'int32')
            corrects = tf.equal(predicts, answer)
            num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
            accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

        # Training
        optimizer = tf.train.AdadeltaOptimizer(params.learning_rate)
        opt_op = optimizer.minimize(total_loss, global_step=self.global_step)

        # placeholders
        self.x = input
        self.q = question
        self.y = answer
        self.mask = input_mask
        self.is_training = is_training

        # tensors
        self.total_loss = total_loss
        self.num_corrects = num_corrects
        self.accuracy = accuracy
        self.opt_op = opt_op
예제 #4
0
    def build(self):
        params = self.params
        N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count
        V, d, A = params.embed_size, params.hidden_size, self.words.vocab_size

        # initialize self
        # placeholders
        input = tf.placeholder(
            'int32', shape=[N, F, L],
            name='x')  # [num_batch, fact_count, sentence_len]
        question = tf.placeholder('int32', shape=[N, Q],
                                  name='q')  # [num_batch, question_len]
        answer = tf.placeholder('int32', shape=[N],
                                name='y')  # [num_batch] - one word answer
        fact_counts = tf.placeholder('int64', shape=[N], name='fc')
        input_mask = tf.placeholder('float32', shape=[N, F, L, V], name='xm')
        is_training = tf.placeholder(tf.bool)
        self.att = tf.constant(0.)

        # Prepare parameters
        gru = rnn_cell.GRUCell(d)
        l = self.positional_encoding()
        embedding = weight('embedding', [A, V],
                           init='uniform',
                           range=3**(1 / 2))

        with tf.name_scope('SentenceReader'):
            input_list = tf.unpack(tf.transpose(input))  # L x [F, N]
            input_embed = []
            for facts in input_list:
                facts = tf.unpack(facts)
                embed = tf.pack([
                    tf.nn.embedding_lookup(embedding, w) for w in facts
                ])  # [F, N, V]
                input_embed.append(embed)

            # apply positional encoding
            input_embed = tf.transpose(tf.pack(input_embed),
                                       [2, 1, 0, 3])  # [N, F, L, V]
            encoded = l * input_embed * input_mask
            facts = tf.reduce_sum(encoded, 2)  # [N, F, V]

        # dropout time
        facts = dropout(facts, params.keep_prob, is_training)

        with tf.name_scope('InputFusion'):
            # Bidirectional RNN
            with tf.variable_scope('Forward'):
                forward_states, _ = tf.nn.dynamic_rnn(gru,
                                                      facts,
                                                      fact_counts,
                                                      dtype=tf.float32)

            with tf.variable_scope('Backward'):
                facts_reverse = tf.reverse_sequence(facts, fact_counts, 1)
                backward_states, _ = tf.nn.dynamic_rnn(gru,
                                                       facts_reverse,
                                                       fact_counts,
                                                       dtype=tf.float32)

            # Use forward and backward states both
            facts = forward_states + backward_states  # [N, F, d]

        with tf.variable_scope('Question'):
            ques_list = tf.unpack(tf.transpose(question))
            ques_embed = [
                tf.nn.embedding_lookup(embedding, w) for w in ques_list
            ]
            _, question_vec = tf.nn.rnn(gru, ques_embed, dtype=tf.float32)

        # Episodic Memory
        with tf.variable_scope('Episodic'):
            episode = EpisodeModule(d, question_vec, facts, is_training,
                                    params.batch_norm)
            memory = tf.identity(question_vec)

            for t in range(params.memory_step):
                with tf.variable_scope('Layer%d' % t) as scope:
                    if params.memory_update == 'gru':
                        memory = gru(episode.new(memory), memory)[0]
                    else:
                        # ReLU update
                        c = episode.new(memory)
                        concated = tf.concat(1, [memory, c, question_vec])

                        w_t = weight('w_t', [3 * d, d])
                        z = tf.matmul(concated, w_t)
                        if params.batch_norm:
                            z = batch_norm(z, is_training)
                        else:
                            b_t = bias('b_t', d)
                            z = z + b_t
                        memory = tf.nn.relu(z)  # [N, d]

                    scope.reuse_variables()

        # Regularizations
        if params.batch_norm:
            memory = batch_norm(memory, is_training=is_training)
        memory = dropout(memory, params.keep_prob, is_training)

        with tf.name_scope('Answer'):
            # Answer module : feed-forward version (for it is one word answer)
            w_a = weight('w_a', [d, A], init='xavier')
            logits = tf.matmul(memory, w_a)  # [N, A]

        with tf.name_scope('Loss'):
            # Cross-Entropy loss
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits, answer)
            loss = tf.reduce_mean(cross_entropy)
            total_loss = loss + params.weight_decay * tf.add_n(
                tf.get_collection('l2'))

        with tf.variable_scope('Accuracy'):
            # Accuracy
            predicts = tf.cast(tf.argmax(logits, 1), 'int32')
            corrects = tf.equal(predicts, answer)
            num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
            accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

        # Training
        optimizer = tf.train.AdamOptimizer(params.learning_rate)
        opt_op = optimizer.minimize(total_loss, global_step=self.global_step)

        # placeholders
        self.x = input
        self.xm = input_mask
        self.q = question
        self.y = answer
        self.fc = fact_counts
        self.is_training = is_training

        # tensors
        self.total_loss = total_loss
        self.num_corrects = num_corrects
        self.accuracy = accuracy
        self.opt_op = opt_op
예제 #5
0
    def build(self):
        params = self.params
        N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count
        V, d, A = params.embed_size, params.hidden_size, self.words.vocab_size

        # initialize self
        # placeholders
        input = tf.placeholder('int32', shape=[N, L], name='x')  # [num_batch, sentence_len]
        question = tf.placeholder('int32', shape=[N, Q], name='q')  # [num_batch, sentence_len]
        answer = tf.placeholder('int32', shape=[N], name='y')  # [num_batch] - one word answer
        input_mask = tf.placeholder(tf.bool, shape=[N, L], name='x_mask')  # [num_batch, sentence_len]
        is_training = tf.placeholder(tf.bool)

        # Prepare parameters
        gru = rnn_cell.GRUCell(d)

        # Input module
        with tf.variable_scope('input') as scope:
            input_list = tf.unpack(tf.transpose(input))
            input_states, _ = seq2seq.embedding_rnn_decoder(input_list, gru.zero_state(N, tf.float32), gru, A, V)

            # Question module
            scope.reuse_variables()

            ques_list = tf.unpack(tf.transpose(question))
            questions, _ = seq2seq.embedding_rnn_decoder(ques_list, gru.zero_state(N, tf.float32), gru, A, V)
            question_vec = questions[-1]  # use final state

        # Masking: to extract fact vectors at end of sentence. (details in paper)
        input_states = tf.transpose(tf.pack(input_states), [1, 0, 2])  # [N, L, D]
        facts = []
        for n in range(N):
            filtered = tf.boolean_mask(input_states[n, :, :], input_mask[n, :])  # [?, D]
            padding = tf.zeros(tf.pack([F - tf.shape(filtered)[0], d]))
            facts.append(tf.concat(0, [filtered, padding]))  # [F, D]

        facked = tf.pack(facts)  # packing for transpose... I hate TF so much
        facts = tf.unpack(tf.transpose(facked, [1, 0, 2]), num=F)  # F x [N, D]

        # Episodic Memory
        with tf.variable_scope('episodic') as scope:
            episode = EpisodeModule(d, question_vec, facts)

            memory = tf.identity(question_vec)
            for t in range(params.memory_step):
                memory = gru(episode.new(memory), memory)[0]
                scope.reuse_variables()

        # Regularizations
        if params.batch_norm:
            memory = batch_norm(memory, is_training=is_training)
        memory = dropout(memory, params.keep_prob, is_training)

        with tf.name_scope('Answer'):
            # Answer module : feed-forward version (for it is one word answer)
            w_a = weight('w_a', [d, A])
            logits = tf.matmul(memory, w_a)  # [N, A]

        with tf.name_scope('Loss'):
            # Cross-Entropy loss
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, answer)
            loss = tf.reduce_mean(cross_entropy)
            total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2'))

        with tf.variable_scope('Accuracy'):
            # Accuracy
            predicts = tf.cast(tf.argmax(logits, 1), 'int32')
            corrects = tf.equal(predicts, answer)
            num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
            accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

        # Training
        optimizer = tf.train.AdadeltaOptimizer(params.learning_rate)
        opt_op = optimizer.minimize(total_loss, global_step=self.global_step)

        # placeholders
        self.x = input
        self.q = question
        self.y = answer
        self.mask = input_mask
        self.is_training = is_training

        # tensors
        self.total_loss = total_loss
        self.num_corrects = num_corrects
        self.accuracy = accuracy
        self.opt_op = opt_op
예제 #6
0
    def build(self):
        params = self.params
        N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count
        V, d, A = params.embed_size, params.hidden_size, self.words.vocab_size

        # initialize self
        # placeholders
        input = tf.placeholder('int32', shape=[N, F, L], name='x')  # [num_batch, fact_count, sentence_len]
        question = tf.placeholder('int32', shape=[N, Q], name='q')  # [num_batch, question_len]
        answer = tf.placeholder('int32', shape=[N], name='y')  # [num_batch] - one word answer
        fact_counts = tf.placeholder('int64', shape=[N], name='fc')
        input_mask = tf.placeholder('float32', shape=[N, F, L, V], name='xm')
        is_training = tf.placeholder(tf.bool)
        self.att = tf.constant(0.)

        # Prepare parameters
        gru = rnn_cell.GRUCell(d)
        l = self.positional_encoding()
        embedding = weight('embedding', [A, V], init='uniform', range=3**(1/2))

        with tf.name_scope('SentenceReader'):
            input_list = tf.unpack(tf.transpose(input))  # L x [F, N]
            input_embed = []
            for facts in input_list:
                facts = tf.unpack(facts)
                embed = tf.pack([tf.nn.embedding_lookup(embedding, w) for w in facts])  # [F, N, V]
                input_embed.append(embed)

            # apply positional encoding
            input_embed = tf.transpose(tf.pack(input_embed), [2, 1, 0, 3])  # [N, F, L, V]
            encoded = l * input_embed * input_mask
            facts = tf.reduce_sum(encoded, 2)  # [N, F, V]

        # dropout time
        facts = dropout(facts, params.keep_prob, is_training)

        with tf.name_scope('InputFusion'):
            # Bidirectional RNN
            with tf.variable_scope('Forward'):
                forward_states, _ = tf.nn.dynamic_rnn(gru, facts, fact_counts, dtype=tf.float32)

            with tf.variable_scope('Backward'):
                facts_reverse = tf.reverse_sequence(facts, fact_counts, 1)
                backward_states, _ = tf.nn.dynamic_rnn(gru, facts_reverse, fact_counts, dtype=tf.float32)

            # Use forward and backward states both
            facts = forward_states + backward_states  # [N, F, d]

        with tf.variable_scope('Question'):
            ques_list = tf.unpack(tf.transpose(question))
            ques_embed = [tf.nn.embedding_lookup(embedding, w) for w in ques_list]
            _, question_vec = tf.nn.rnn(gru, ques_embed, dtype=tf.float32)

        # Episodic Memory
        with tf.variable_scope('Episodic'):
            episode = EpisodeModule(d, question_vec, facts, is_training, params.batch_norm)
            memory = tf.identity(question_vec)

            for t in range(params.memory_step):
                with tf.variable_scope('Layer%d' % t) as scope:
                    if params.memory_update == 'gru':
                        memory = gru(episode.new(memory), memory)[0]
                    else:
                        # ReLU update
                        c = episode.new(memory)
                        concated = tf.concat(1, [memory, c, question_vec])

                        w_t = weight('w_t', [3 * d, d])
                        z = tf.matmul(concated, w_t)
                        if params.batch_norm:
                            z = batch_norm(z, is_training)
                        else:
                            b_t = bias('b_t', d)
                            z = z + b_t
                        memory = tf.nn.relu(z)  # [N, d]

                    scope.reuse_variables()

        # Regularizations
        if params.batch_norm:
            memory = batch_norm(memory, is_training=is_training)
        memory = dropout(memory, params.keep_prob, is_training)

        with tf.name_scope('Answer'):
            # Answer module : feed-forward version (for it is one word answer)
            w_a = weight('w_a', [d, A], init='xavier')
            logits = tf.matmul(memory, w_a)  # [N, A]

        with tf.name_scope('Loss'):
            # Cross-Entropy loss
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, answer)
            loss = tf.reduce_mean(cross_entropy)
            total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2'))

        with tf.variable_scope('Accuracy'):
            # Accuracy
            predicts = tf.cast(tf.argmax(logits, 1), 'int32')
            corrects = tf.equal(predicts, answer)
            num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
            accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

        # Training
        optimizer = tf.train.AdamOptimizer(params.learning_rate)
        opt_op = optimizer.minimize(total_loss, global_step=self.global_step)

        # placeholders
        self.x = input
        self.xm = input_mask
        self.q = question
        self.y = answer
        self.fc = fact_counts
        self.is_training = is_training

        # tensors
        self.total_loss = total_loss
        self.num_corrects = num_corrects
        self.accuracy = accuracy
        self.opt_op = opt_op