示例#1
0
    def testDifferingKeyHeadSizes(self, gate_style):
        """Checks if arbitrary key sizes are still supported."""
        mem_slots = 2
        head_size = 32
        num_heads = 2
        key_size = 128
        batch_size = 5

        input_shape = (batch_size, 3, 3)
        mem = relational_memory.RelationalMemory(mem_slots,
                                                 head_size,
                                                 num_heads,
                                                 gate_style=gate_style,
                                                 key_size=key_size)
        self.assertNotEqual(key_size, mem._head_size)
        inputs = tf.placeholder(tf.float32, input_shape)

        memory_0 = mem.initial_state(batch_size)
        _, memory_1 = mem(inputs, memory_0)

        with self.test_session() as session:
            tf.global_variables_initializer().run()
            results = session.run({
                "memory_1": memory_1,
                "memory_0": memory_0
            },
                                  feed_dict={inputs: np.ones(input_shape)})

        self.assertTrue(
            np.any(np.not_equal(results["memory_0"], results["memory_1"])))
示例#2
0
    def testRecurrence(self, mem_slots, head_size, num_heads):
        """Checks if you can run the relational memory for 2 steps."""

        batch_size = 5
        num_blocks = 5

        input_shape = [batch_size, 3, 1]
        mem = relational_memory.RelationalMemory(mem_slots,
                                                 head_size,
                                                 num_heads,
                                                 num_blocks=num_blocks)
        inputs = tf.placeholder(tf.float32, input_shape)

        hidden_0 = mem.initial_state(batch_size)
        _, hidden_1 = mem(inputs, hidden_0)
        _, hidden_2 = mem(inputs, hidden_1)

        with self.test_session() as session:
            tf.global_variables_initializer().run()
            results = session.run({
                "hidden_2": hidden_2,
                "hidden_1": hidden_1
            },
                                  feed_dict={inputs: np.zeros(input_shape)})
        self.assertAllEqual(results["hidden_1"].shape,
                            results["hidden_2"].shape)
示例#3
0
    def testInputErasureWorking(self, gate_style):
        """Checks if gating is working by ignoring the input."""
        mem_slots = 2
        head_size = 32
        num_heads = 2
        batch_size = 5
        input_shape = (batch_size, 3, 3)
        mem = relational_memory.RelationalMemory(mem_slots,
                                                 head_size,
                                                 num_heads,
                                                 forget_bias=float("+inf"),
                                                 input_bias=float("-inf"),
                                                 gate_style=gate_style)
        inputs = tf.placeholder(tf.float32, input_shape)

        memory_0 = mem.initial_state(batch_size)
        _, memory_1 = mem(inputs, memory_0)

        with self.test_session() as session:
            tf.global_variables_initializer().run()
            results = session.run({
                "memory_1": memory_1,
                "memory_0": memory_0
            },
                                  feed_dict={inputs: np.ones(input_shape)})
        self.assertAllEqual(results["memory_0"], results["memory_1"])
示例#4
0
    def testMemoryUpdating(self):
        """Checks if memory is updating correctly."""
        mem_slots = 2
        head_size = 32
        num_heads = 4
        batch_size = 5
        input_shape = (batch_size, 3, 3)
        mem = relational_memory.RelationalMemory(mem_slots,
                                                 head_size,
                                                 num_heads,
                                                 gate_style=None)
        inputs = tf.placeholder(tf.float32, input_shape)

        memory_0 = mem.initial_state(batch_size)
        _, memory_1 = mem(inputs, memory_0)

        with self.test_session() as session:
            tf.global_variables_initializer().run()
            results = session.run({
                "memory_1": memory_1,
                "memory_0": memory_0
            },
                                  feed_dict={inputs: np.zeros(input_shape)})

        self.assertTrue(
            np.any(np.not_equal(results["memory_0"], results["memory_1"])))
示例#5
0
    def testStateSizeOutputSize(self):
        """Checks for correct `state_size` and `output_size` return values."""
        mem_slots = 4
        head_size = 32
        mem = relational_memory.RelationalMemory(mem_slots, head_size)

        self.assertItemsEqual([mem._mem_slots, mem._mem_size],
                              mem.state_size.as_list())
        self.assertItemsEqual([mem._mem_slots * mem._mem_size],
                              mem.output_size.as_list())
示例#6
0
    def testBadInputs(self):
        """Test that verifies errors are thrown for bad input arguments."""

        mem_slots = 4
        head_size = 32

        with self.assertRaisesRegexp(ValueError, "num_blocks must be >= 1"):
            relational_memory.RelationalMemory(mem_slots,
                                               head_size,
                                               num_blocks=0)

        with self.assertRaisesRegexp(ValueError,
                                     "attention_mlp_layers must be >= 1"):
            relational_memory.RelationalMemory(mem_slots,
                                               head_size,
                                               attention_mlp_layers=0)

        with self.assertRaisesRegexp(ValueError, "gate_style must be one of"):
            relational_memory.RelationalMemory(mem_slots,
                                               head_size,
                                               gate_style="bad_gate")
示例#7
0
    def __init__(self, vocab_size, embedding_size, batch_size, initialization, mem_slots, num_heads,
                 use_pos, attention_mlp_layers, head_size):
        # Placeholders for input, output
        self.input_x = tf.placeholder(tf.int32, [batch_size, 3], name="input_h")
        self.input_y = tf.placeholder(tf.float32, [batch_size, 1], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

        # Embedding layer
        with tf.name_scope("embedding"):
            if initialization != []:
                self.input_feature = tf.get_variable(name="input_feature_1", initializer=initialization)
            else:
                self.input_feature = tf.get_variable(name="input_feature_2", shape=[vocab_size, embedding_size], initializer=tf.contrib.layers.xavier_initializer(seed=1234))

        # Embedding lookup
        self.emb = tf.nn.embedding_lookup(self.input_feature, self.input_x)

        if use_pos == 1:
            self.emb = add_positional_embedding(self.emb, 3, embedding_size)

        self.h_emb, self.r_emb, self.t_emb = tf.split(self.emb, num_or_size_splits=3, axis=1)

        self.h_emb = tf.squeeze(self.h_emb)
        self.r_emb = tf.squeeze(self.r_emb)
        self.t_emb = tf.squeeze(self.t_emb)

        gen_mem = relational_memory.RelationalMemory(mem_slots=mem_slots, head_size=head_size, num_heads=num_heads,
                                                      gate_style='memory', attention_mlp_layers=attention_mlp_layers)

        init_states = gen_mem.initial_state(batch_size=batch_size)

        mem_output1, memory_input_next_step = gen_mem(self.h_emb, init_states)
        mem_output2, memory_input_next_step = gen_mem(self.r_emb, memory_input_next_step)
        mem_output3, memory_input_next_step = gen_mem(self.t_emb, memory_input_next_step)

        self.final_output = tf.nn.dropout(mem_output1 * mem_output2 * mem_output3, self.dropout_keep_prob)

        # Final scores and predictions
        with tf.name_scope("output1"):
            W1 = tf.get_variable("W1", shape=[self.final_output.get_shape()[-1], 1], initializer=tf.contrib.layers.xavier_initializer(seed=1234))
            b1 = tf.Variable(tf.zeros([1]))

        self.scores = tf.nn.xw_plus_b(self.final_output, W1, b1, name="scores")
        self.predictions = tf.nn.sigmoid(self.scores)

        # Calculate mean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softplus(self.scores * self.input_y)
            self.loss = tf.reduce_mean(losses)

        self.saver = tf.compat.v1.train.Saver(tf.global_variables(), max_to_keep=500)
示例#8
0
    def testGateShapes(self, gate_style):
        """Checks the shapes of RelationalMemory gates."""
        mem_slots = 4
        head_size = 32
        num_heads = 4
        batch_size = 4
        input_shape = (batch_size, 3, 3)

        mem = relational_memory.RelationalMemory(mem_slots,
                                                 head_size,
                                                 num_heads,
                                                 gate_style=gate_style)

        inputs = tf.placeholder(tf.float32, input_shape)
        init_state = mem.initial_state(batch_size)
        mem(inputs, init_state)

        gate_size = mem._calculate_gate_size()
        expected_size = [batch_size, num_heads, gate_size]

        self.assertEqual(mem.input_gate.get_shape().as_list(), expected_size)
        self.assertEqual(mem.forget_gate.get_shape().as_list(), expected_size)
示例#9
0
    def testOutputStateShapes(self, treat_input_as_matrix):
        """Checks the shapes of RelationalMemory output and state."""
        mem_slots = 4
        head_size = 32
        num_heads = 2
        batch_size = 5

        input_shape = (batch_size, 3, 3)
        mem = relational_memory.RelationalMemory(mem_slots, head_size,
                                                 num_heads)
        inputs = tf.placeholder(tf.float32, input_shape)
        init_state = mem.initial_state(batch_size)
        out = mem(inputs,
                  init_state,
                  treat_input_as_matrix=treat_input_as_matrix)

        with self.test_session() as session:
            tf.global_variables_initializer().run()
            new_out, new_memory = session.run(
                out, feed_dict={inputs: np.zeros(input_shape)})
        self.assertAllEqual(init_state.get_shape().as_list(), new_memory.shape)
        self.assertAllEqual(new_out.shape,
                            [batch_size, mem_slots * head_size * num_heads])
示例#10
0
    def __init__(self, embedding_size, batch_size, initialization, mem_slots, num_heads,
                 use_pos, attention_mlp_layers, head_size, num_filters=128):
        # Placeholders for input, output
        self.input_x = tf.compat.v1.placeholder(tf.int32, [batch_size, 3], name="input_h")
        self.input_y = tf.compat.v1.placeholder(tf.float32, [batch_size, 1], name="input_y")
        self.dropout_keep_prob = tf.compat.v1.placeholder(tf.float32, name="dropout_keep_prob")

        # Embedding layer
        with tf.name_scope("embedding"):
            self.W_query = tf.compat.v1.get_variable(name="W_query", initializer=initialization[0], trainable=False)
            self.W_user = tf.compat.v1.get_variable(name="W_user", initializer=initialization[1])
            self.W_doc = tf.compat.v1.get_variable(name="W_doc", initializer=initialization[2], trainable=False)

        # Embedding lookup
        self.h_emb = tf.nn.embedding_lookup(self.W_query, self.input_x[:, 0])
        self.r_emb = tf.nn.embedding_lookup(self.W_user, self.input_x[:, 1])
        self.t_emb = tf.nn.embedding_lookup(self.W_doc, self.input_x[:, 2])

        if use_pos == 1:
            self.h_emb = add_positional_embedding(self.h_emb, 1, embedding_size, name="pos_h")
            self.r_emb = add_positional_embedding(self.r_emb, 1, embedding_size, name="pos_r")
            self.t_emb = add_positional_embedding(self.t_emb, 1, embedding_size, name="pos_t")

        gen_mem = relational_memory.RelationalMemory(mem_slots=mem_slots, head_size=head_size, num_heads=num_heads,
                                                     gate_style='memory', attention_mlp_layers=attention_mlp_layers)

        init_states = gen_mem.initial_state(batch_size=batch_size)

        mem_output1, memory_input_next_step = gen_mem(self.h_emb, init_states)
        mem_output2, memory_input_next_step = gen_mem(self.r_emb, memory_input_next_step)
        mem_output3, memory_input_next_step = gen_mem(self.t_emb, memory_input_next_step)

        mem_output1 = tf.compat.v1.reshape(mem_output1, [-1, 1, mem_output1.get_shape()[-1]])
        mem_output2 = tf.compat.v1.reshape(mem_output2, [-1, 1, mem_output2.get_shape()[-1]])
        mem_output3 = tf.compat.v1.reshape(mem_output3, [-1, 1, mem_output3.get_shape()[-1]])

        mem_output = tf.compat.v1.concat([mem_output1, mem_output2, mem_output3], axis=1)
        self.input_cnn = tf.expand_dims(mem_output, -1)

        # CNN decoder
        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        with tf.name_scope("conv-maxpool"):
            W = tf.compat.v1.get_variable("W_conv", shape=[3, 1, 1, num_filters],
                                          initializer=tf.contrib.layers.xavier_initializer(seed=1234))
            b = tf.Variable(tf.zeros([num_filters]))
            conv = tf.nn.conv2d(self.input_cnn, W, strides=[1, 1, 1, 1], padding="VALID", name="conv")
            # Apply nonlinearity
            self.h_pool = tf.compat.v1.nn.relu(tf.nn.bias_add(conv, b), name="relu")

            # Maxpooling over the outputs
            self.h_pool = tf.squeeze(tf.nn.max_pool(self.h_pool, ksize=[1, 1, self.input_cnn.get_shape()[-2], 1], strides=[1, 1, 1, 1], padding='VALID', name="pool"))

        # Add dropout
        with tf.name_scope("dropout"):
            self.final_output = tf.nn.dropout(self.h_pool, self.dropout_keep_prob)

        # Final scores and predictions
        with tf.name_scope("output"):
            W_output = tf.compat.v1.get_variable("W1", shape=[self.final_output.get_shape()[-1], 1], initializer=tf.contrib.layers.xavier_initializer(seed=1234))
            b_output = tf.Variable(tf.zeros([1]))
        self.scores = tf.compat.v1.nn.xw_plus_b(self.final_output, W_output, b_output, name="scores")
        self.predictions = tf.compat.v1.nn.sigmoid(self.scores)

        # Calculate mean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.compat.v1.nn.softplus(self.scores * self.input_y)
            self.loss = tf.reduce_mean(losses)

        self.saver = tf.compat.v1.train.Saver(tf.global_variables(), max_to_keep=500)