示例#1
0
    def test_construction(self):
        interface = DNC.interface(
            read_keys=None,
            read_strengths=None,
            write_key=np.random.uniform(0, 1, (3, 9, 1)).astype(np.float32),
            write_strength=np.random.uniform(0, 1, (3, 1)).astype(np.float32),
            erase_vector=tf.convert_to_tensor(
                np.zeros((3, 9)).astype(np.float32)),
            write_vector=tf.convert_to_tensor(
                np.random.uniform(0, 1, (3, 9)).astype(np.float32)),
            free_gates=np.random.uniform(0, 1, (3, 5)).astype(np.float32),
            allocation_gate=np.random.uniform(0, 1, (3, 1)).astype(np.float32),
            write_gate=np.random.uniform(0, 1, (3, 1)).astype(np.float32),
            read_modes=None,
        )

        memory = Memory(13, 9, 5)
        memory_state = memory.get_initial_state(batch_size=3)
        usage, write_weighting, memory, link_matrix, precedence = memory.write(
            memory_state, interface)

        self.assertEqual(usage.shape, (3, 13))
        self.assertEqual(write_weighting.shape, (3, 13))
        self.assertEqual(memory.shape, (3, 13, 9))
        self.assertEqual(link_matrix.shape, (3, 13, 13))
        self.assertEqual(precedence.shape, (3, 13))
示例#2
0
    def test_update_memory(self):
        graph = tf.Graph()
        with graph.as_default():
            with tf.Session(graph=graph) as session:

                mem = Memory(4, 5, 2, 2)
                write_weighting = random_softmax((2, 4), axis=1)
                write_vector = np.random.uniform(0, 1,
                                                 (2, 5)).astype(np.float32)
                erase_vector = np.random.uniform(0, 1,
                                                 (2, 5)).astype(np.float32)
                memory_matrix = np.random.uniform(-1, 1,
                                                  (2, 4, 5)).astype(np.float32)

                ww = write_weighting[:, :, np.newaxis]
                v, e = write_vector[:,
                                    np.newaxis, :], erase_vector[:,
                                                                 np.newaxis, :]
                predicted = memory_matrix * (1 - np.matmul(ww, e)) + np.matmul(
                    ww, v)

                memory_matrix = tf.convert_to_tensor(memory_matrix)

                op = mem.update_memory(memory_matrix, write_weighting,
                                       write_vector, erase_vector)
                M = session.run(op)

                self.assertEqual(M.shape, (2, 4, 5))
                self.assertTrue(np.allclose(M, predicted))
示例#3
0
    def test_write(self):
        graph = tf.Graph()
        with graph.as_default():
            with tf.Session(graph=graph) as session:

                mem = Memory(4, 5, 2, 1)
                M, u, p, L, ww, rw, r = session.run(mem.init_memory())
                key = np.random.uniform(0, 1, (1, 5, 1)).astype(np.float32)
                strength = np.random.uniform(0, 1, (1, 1)).astype(np.float32)
                free_gates = np.random.uniform(0, 1, (1, 2)).astype(np.float32)
                write_gate = np.random.uniform(0, 1, (1, 1)).astype(np.float32)
                allocation_gate = np.random.uniform(0, 1,
                                                    (1, 1)).astype(np.float32)
                write_vector = np.random.uniform(0, 1,
                                                 (1, 5)).astype(np.float32)
                erase_vector = np.zeros((1, 5)).astype(np.float32)

                u_op, ww_op, M_op, L_op, p_op = mem.write(
                    M, u, rw, ww, p, L, key, strength, free_gates,
                    allocation_gate, write_gate, write_vector, erase_vector)
                session.run(tf.initialize_all_variables())
                u, ww, M, L, p = session.run([u_op, ww_op, M_op, L_op, p_op])

                self.assertEqual(u.shape, (1, 4))
                self.assertEqual(ww.shape, (1, 4))
                self.assertEqual(M.shape, (1, 4, 5))
                self.assertEqual(L.shape, (1, 4, 4))
                self.assertEqual(p.shape, (1, 4))
示例#4
0
    def test_construction(self):
        interface = DNC.interface(
            read_keys=None,
            read_strengths=None,
            write_key=np.random.uniform(0, 1, (3, 9, 1)).astype(np.float32),
            write_strength=np.random.uniform(0, 1, (3, 1)).astype(np.float32),
            erase_vector=tf.convert_to_tensor(
                np.zeros((3, 9)).astype(np.float32)),
            write_vector=tf.convert_to_tensor(
                np.random.uniform(0, 1, (3, 9)).astype(np.float32)),
            free_gates=np.random.uniform(0, 1, (3, 5)).astype(np.float32),
            allocation_gate=np.random.uniform(0, 1, (3, 1)).astype(np.float32),
            write_gate=np.random.uniform(0, 1, (3, 1)).astype(np.float32),
            read_modes=None,
        )

        memory = Memory(13, 9, 5)
        memory_state = memory.initial_state(3)
        write_op = memory.write(memory_state, interface)
        init_op = tf.global_variables_initializer()

        with self.test_session() as session:
            init_op.run()
            usage, write_weighting, memory, link_matrix, precedence = session.run(
                write_op)

        self.assertEqual(usage.shape, (3, 13))
        self.assertEqual(write_weighting.shape, (3, 13))
        self.assertEqual(memory.shape, (3, 13, 9))
        self.assertEqual(link_matrix.shape, (3, 13, 13))
        self.assertEqual(precedence.shape, (3, 13))
示例#5
0
    def test_lookup_weighting(self):
        graph = tf.Graph()
        with graph.as_default():
            with tf.Session(graph=graph) as session:

                mem = Memory(4, 5, 2, 2)
                initial_mem = np.random.uniform(0, 1,
                                                (2, 4, 5)).astype(np.float32)
                keys = np.random.uniform(0, 1, (2, 5, 2)).astype(np.float32)
                strengths = np.random.uniform(0, 1, (2, 2)).astype(np.float32)

                norm_mem = initial_mem / np.sqrt(
                    np.sum(initial_mem**2, axis=2, keepdims=True))
                norm_keys = keys / np.sqrt(
                    np.sum(keys**2, axis=1, keepdims=True))
                sim = np.matmul(norm_mem, norm_keys)
                sim = sim * strengths[:, np.newaxis, :]
                predicted_wieghts = np.exp(sim) / np.sum(
                    np.exp(sim), axis=1, keepdims=True)

                memory_matrix = tf.convert_to_tensor(initial_mem)
                op = mem.get_lookup_weighting(memory_matrix, keys, strengths)
                c = session.run(op)

                self.assertEqual(c.shape, (2, 4, 2))
                self.assertTrue(np.allclose(c, predicted_wieghts))
示例#6
0
    def test_update_usage_vector(self):
        graph = tf.Graph()
        with graph.as_default():
            with tf.Session(graph=graph) as session:

                mem = Memory(4, 5, 2, 2)
                free_gates = np.random.uniform(0, 1, (2, 2)).astype(np.float32)
                init_read_weightings = random_softmax((2, 4, 2), axis=1)
                init_write_weightings = random_softmax((2, 4), axis=1)
                init_usage = np.random.uniform(0, 1, (2, 4)).astype(np.float32)

                psi = np.product(
                    1 - init_read_weightings * free_gates[:, np.newaxis, :],
                    axis=2)
                predicted_usage = (init_usage + init_write_weightings -
                                   init_usage * init_write_weightings) * psi

                read_weightings = tf.convert_to_tensor(init_read_weightings)
                write_weighting = tf.convert_to_tensor(init_write_weightings)
                usage_vector = tf.convert_to_tensor(init_usage)

                op = mem.update_usage_vector(usage_vector, read_weightings,
                                             write_weighting, free_gates)
                u = session.run(op)

                self.assertEqual(u.shape, (2, 4))
                self.assertTrue(np.array_equal(u, predicted_usage))
示例#7
0
    def test_update_link_matrix(self):
        graph = tf.Graph()
        with graph.as_default():
            with tf.compat.v1.Session(graph=graph) as session:

                mem = Memory(4, 5, 2, 2)
                _write_weighting = random_softmax((2, 4), axis=1)
                _precedence_vector = random_softmax((2, 4), axis=1)
                initial_link = np.random.uniform(0, 1,
                                                 (2, 4, 4)).astype(np.float32)
                np.fill_diagonal(initial_link[0, :], 0)
                np.fill_diagonal(initial_link[1, :], 0)

                # calculate the updated link iteratively as in paper
                # to check the correctness of the vectorized implementation
                predicted = np.zeros((2, 4, 4), dtype=np.float32)
                for i in range(4):
                    for j in range(4):
                        if i != j:
                            reset_factor = (1 - _write_weighting[:, i] -
                                            _write_weighting[:, j])
                            predicted[:, i, j] = reset_factor * initial_link[:, i, j] + _write_weighting[:, i] * \
                                                 _precedence_vector[:, j]

                link_matrix = tf.convert_to_tensor(value=initial_link)
                precedence_vector = tf.convert_to_tensor(
                    value=_precedence_vector)

                write_weighting = tf.constant(_write_weighting)

                op = mem.update_link_matrix(precedence_vector, link_matrix,
                                            write_weighting)
                L = session.run(op)

                self.assertTrue(np.allclose(L, predicted))
示例#8
0
    def test_get_allocation_weighting(self):
        graph = tf.Graph()
        with graph.as_default():
            with tf.Session(graph=graph) as session:

                mem = Memory(4, 5, 2, 2)
                mock_usage = np.random.uniform(0.01, 1,
                                               (2, 4)).astype(np.float32)
                sorted_usage = np.sort(mock_usage, axis=1)
                free_list = np.argsort(mock_usage, axis=1)

                predicted_weights = np.zeros((2, 4)).astype(np.float32)
                for i in range(2):
                    for j in range(4):
                        product_list = [
                            mock_usage[i, free_list[i, k]] for k in range(j)
                        ]
                        predicted_weights[i, free_list[
                            i, j]] = (1 - mock_usage[i, free_list[i, j]]
                                      ) * np.product(product_list)

                op = mem.get_allocation_weighting(sorted_usage, free_list)
                a = session.run(op)

                self.assertEqual(a.shape, (2, 4))
                self.assertTrue(np.allclose(a, predicted_weights))
示例#9
0
文件: dnc.py 项目: revz345/dnc
    def __init__(self,
                 controller_class,
                 input_size,
                 output_size,
                 max_sequence_length,
                 memory_words_num=256,
                 memory_word_size=64,
                 memory_read_heads=4,
                 batch_size=128):
        """
        constructs a complete DNC architecture as described in the DNC paper
        http://www.nature.com/nature/journal/vaop/ncurrent/full/nature20101.html

        Parameters:
        -----------
        controller_class: BaseController
            a concrete implementation of the BaseController class
        input_size: int
            the size of the input vector
        output_size: int
            the size of the output vector
        max_sequence_length: int
            the maximum length of an input sequence
        memory_words_num: int
            the number of words that can be stored in memory
        memory_word_size: int
            the size of an individual word in memory
        memory_read_heads: int
            the number of read heads in the memory
        batch_size: int
            the size of the data batch
        """

        self.input_size = input_size
        self.output_size = output_size
        self.max_sequence_length = max_sequence_length
        self.words_num = memory_words_num
        self.word_size = memory_word_size
        self.read_heads = memory_read_heads
        self.batch_size = batch_size

        self.memory = Memory(self.words_num, self.word_size, self.read_heads,
                             self.batch_size)
        self.controller = controller_class(self.input_size, self.output_size,
                                           self.read_heads, self.word_size,
                                           self.batch_size)

        # input data placeholders
        self.input_data = tf.placeholder(tf.float32, [None, None, chunk_size],
                                         name='input')
        self.target_output = tf.placeholder(tf.float32,
                                            [None, None, output_size],
                                            name='targets')
        #self.input_data = tf.placeholder(tf.float32, [batch_size, None, input_size], name='input')
        #self.target_output = tf.placeholder(tf.float32, [batch_size, None, output_size], name='targets')
        self.sequence_length = tf.placeholder(tf.int32, name='sequence_length')

        self.build_graph()
示例#10
0
    def test_init_memory(self):
        memory = Memory(words_num=13, word_size=7, read_heads_num=2)
        state = memory.get_initial_state(batch_size=9)

        self.assertEqual(state.memory_matrix.shape, (9, 13, 7))
        self.assertEqual(state.usage_vector.shape, (9, 13))
        self.assertEqual(state.link_matrix.shape, (9, 13, 13))
        self.assertEqual(state.precedence_vector.shape, (9, 13))
        self.assertEqual(state.write_weighting.shape, (9, 13))
        self.assertEqual(state.read_weightings.shape, (9, 13, 2))
示例#11
0
    def test_init_memory(self):
        graph = tf.Graph()
        with graph.as_default():
            with tf.Session(graph=graph) as session:

                mem = Memory(4, 5, 2, 2)
                M, u, p, L, ww, rw, r = session.run(mem.init_memory())

                self.assertEqual(M.shape, (2, 4, 5))
                self.assertEqual(u.shape, (2, 4))
                self.assertEqual(L.shape, (2, 4, 4))
                self.assertEqual(ww.shape, (2, 4))
                self.assertEqual(rw.shape, (2, 4, 2))
                self.assertEqual(r.shape, (2, 5, 2))
                self.assertEqual(p.shape, (2, 4))
示例#12
0
    def test_construction(self):
        graph = tf.Graph()
        with graph.as_default():
            with tf.Session(graph=graph) as session:

                mem = Memory(4, 5, 2, 2)
                session.run(tf.initialize_all_variables())

                self.assertEqual(mem.words_num, 4)
                self.assertEqual(mem.word_size, 5)
                self.assertEqual(mem.read_heads, 2)
                self.assertEqual(mem.batch_size, 2)

                self.assertEqual(mem.memory_matrix.get_shape().as_list(),
                                 [2, 4, 5])
                self.assertEqual(mem.usage_vector.get_shape().as_list(),
                                 [2, 4])
                self.assertEqual(mem.link_matrix.get_shape().as_list(),
                                 [2, 4, 4])
                self.assertEqual(mem.write_weighting.get_shape().as_list(),
                                 [2, 4])
                self.assertEqual(mem.read_weightings.get_shape().as_list(),
                                 [2, 4, 2])
                self.assertEqual(mem.read_vectors.get_shape().as_list(),
                                 [2, 5, 2])
示例#13
0
    def test_update_read_vectors(self):
        graph = tf.Graph()
        with graph.as_default():
            with tf.Session(graph = graph) as session:

                mem = Memory(4, 5, 2, 4)
                memory_matrix = np.random.uniform(-1, 1, (4, 4, 5)).astype(np.float32)
                read_weightings = random_softmax((4, 4, 2), axis=1)
                predicted = np.matmul(np.transpose(memory_matrix, [0, 2, 1]), read_weightings)

                op = mem.update_read_vectors(memory_matrix, read_weightings)
                session.run(tf.global_variables_initializer())
                r = session.run(op)
                #updated_read_vectors = session.run(mem.read_vectors.value())

                self.assertTrue(np.allclose(r, predicted))
示例#14
0
    def test_update_precedence_vector(self):
        graph = tf.Graph()
        with graph.as_default():
            with tf.Session(graph=graph) as session:

                mem = Memory(4, 5, 2, 2)
                write_weighting = random_softmax((2, 4), axis=1)
                initial_precedence = random_softmax((2, 4), axis=1)
                predicted = (1 - write_weighting.sum(axis=1, keepdims=True)) * initial_precedence + write_weighting

                precedence_vector = tf.convert_to_tensor(initial_precedence)

                op = mem.update_precedence_vector(precedence_vector, write_weighting)
                p = session.run(op)

                self.assertEqual(p.shape, (2,4))
                self.assertTrue(np.allclose(p, predicted))
示例#15
0
    def test_read(self):
        graph = tf.Graph()
        with graph.as_default():
            with tf.Session(graph = graph) as session:
                mem = Memory(4, 5, 2, 1)
                M, u, p, L, ww, rw, r = session.run(mem.init_memory())
                keys = np.random.uniform(0, 1, (1, 5, 2)).astype(np.float32)
                strengths = np.random.uniform(0, 1, (1, 2)).astype(np.float32)
                link_matrix = np.random.uniform(0, 1, (1, 4, 4)).astype(np.float32)
                read_modes = random_softmax((1, 3, 2), axis=1).astype(np.float32)
                memory_matrix = np.random.uniform(-1, 1, (1, 4, 5)).astype(np.float32)

                wr_op, r_op = mem.read(memory_matrix, rw, keys, strengths, link_matrix, read_modes)
                session.run(tf.global_variables_initializer())
                wr, r = session.run([wr_op, r_op])

                self.assertEqual(wr.shape, (1, 4, 2))
                self.assertEqual(r.shape, (1, 5, 2))
示例#16
0
    def test_updated_write_weighting(self):
        graph = tf.Graph()
        with graph.as_default():
            with tf.Session(graph=graph) as session:

                mem = Memory(4, 5, 2, 2)
                write_gate = np.random.uniform(0, 1, (2,1)).astype(np.float32)
                allocation_gate = np.random.uniform(0, 1, (2,1)).astype(np.float32)
                lookup_weighting = random_softmax((2, 4, 1), axis=1)
                allocation_weighting = random_softmax((2, 4), axis=1)

                predicted_weights = write_gate * (allocation_gate * allocation_weighting + (1 - allocation_gate) * np.squeeze(lookup_weighting))

                op = mem.update_write_weighting(lookup_weighting, allocation_weighting, write_gate, allocation_gate)
                w_w = session.run(op)

                self.assertEqual(w_w.shape, (2,4))
                self.assertTrue(np.allclose(w_w, predicted_weights))
示例#17
0
    def test_get_directional_weightings(self):
        graph = tf.Graph()
        with graph.as_default():
            with tf.Session(graph=graph) as session:

                mem = Memory(4, 5, 2, 2)
                _link_matrix = np.random.uniform(0, 1, (2, 4, 4)).astype(np.float32)
                _read_weightings = random_softmax((2, 4, 2), axis=1)
                predicted_forward = np.matmul(_link_matrix, _read_weightings)
                predicted_backward = np.matmul(np.transpose(_link_matrix, [0, 2, 1]), _read_weightings)

                read_weightings = tf.convert_to_tensor(_read_weightings)

                fop, bop = mem.get_directional_weightings(read_weightings, _link_matrix)

                forward_weighting, backward_weighting = session.run([fop, bop])

                self.assertTrue(np.allclose(forward_weighting, predicted_forward))
                self.assertTrue(np.allclose(backward_weighting, predicted_backward))
示例#18
0
 def get_addressing_weights(m, i, link_matrix):
     lookup_w = ContentAddressing.weighting(m.memory_matrix, i.read_keys,
                                            i.read_strengths)
     fwd_w, bkwd_w = TemporalLinkAddressing.weightings(
         link_matrix, m.read_weightings)
     read_w, read_v = Memory.read(m.memory_matrix, m.read_weightings,
                                  link_matrix, i)
     if not tf.executing_eagerly():
         lookup_w = lookup_w.eval()
         fwd_w, bkwd_w = fwd_w.eval(), bkwd_w.eval()
         read_w, read_v = read_w.eval(), read_v.eval()
     return lookup_w, fwd_w, bkwd_w, read_w, read_v
示例#19
0
    def test_read_vectors_and_weightings(self):
        m = Memory.state(
            memory_matrix=np.random.uniform(-1, 1,
                                            (5, 11, 7)).astype(np.float32),
            usage_vector=None,
            link_matrix=None,
            precedence_vector=None,
            write_weighting=None,
            read_weightings=DNCMemoryTests.softmax_sample((5, 11, 3), axis=1),
        )
        # pull out read_modes due to https://github.com/tensorflow/tensorflow/issues/1409
        # hack to circumvent tf bug in not doing `convert_to_tensor` in einsum reductions correctly
        read_modes = DNCMemoryTests.softmax_sample((5, 3, 3), axis=1)
        i = DNC.interface(
            read_keys=np.random.uniform(0, 1, (5, 7, 3)).astype(np.float32),
            read_strengths=np.random.uniform(0, 1, (5, 3)).astype(np.float32),
            write_key=None,
            write_strength=None,
            erase_vector=None,
            write_vector=None,
            free_gates=None,
            allocation_gate=None,
            write_gate=None,
            read_modes=tf.convert_to_tensor(read_modes),
        )

        # read uses the link matrix that is produced after a write operation
        new_link_matrix = np.random.uniform(0, 1,
                                            (5, 11, 11)).astype(np.float32)
        # assume ContentAddressing and TemporalLinkAddressing are already correct
        lookup_weightings, forward_weighting, backward_weighting, \
            updated_read_weightings, updated_read_vectors = self.get_addressing_weights(
                m, i, new_link_matrix)
        self.assertEqual(updated_read_weightings.shape, (5, 11, 3))
        self.assertEqual(updated_read_vectors.shape, (5, 7, 3))

        expected_read_weightings = np.zeros((5, 11, 3)).astype(np.float32)
        for read_head in range(3):
            backward_weight = read_modes[:, 0, read_head, np.
                                         newaxis] * backward_weighting[:, :,
                                                                       read_head]
            lookup_weight = read_modes[:, 1, read_head, np.newaxis] * \
                lookup_weightings[:, :, read_head]
            forward_weight = read_modes[:, 2, read_head, np.newaxis] * \
                forward_weighting[:, :, read_head]
            expected_read_weightings[:, :, read_head] = backward_weight + \
                lookup_weight + forward_weight
        expected_read_vectors = np.matmul(
            np.transpose(m.memory_matrix, [0, 2, 1]), updated_read_weightings)

        self.assertAllClose(updated_read_weightings, expected_read_weightings)
        self.assertEqual(updated_read_weightings.shape, (5, 11, 3))
        self.assertAllClose(updated_read_vectors, expected_read_vectors)
示例#20
0
    def test_construction(self):
        graph = tf.Graph()
        with graph.as_default():
            with tf.Session(graph=graph) as session:

                mem = Memory(4, 5, 2, 2)
                session.run(tf.initialize_all_variables())

                self.assertEqual(mem.words_num, 4)
                self.assertEqual(mem.word_size, 5)
                self.assertEqual(mem.read_heads, 2)
                self.assertEqual(mem.batch_size, 2)
示例#21
0
    def test_update_read_weightings(self):
        graph = tf.Graph()
        with graph.as_default():
            with tf.Session(graph=graph) as session:

                mem = Memory(4, 5, 2, 2)
                lookup_weightings = random_softmax((2, 4, 2), axis=1)
                forward_weighting = random_softmax((2, 4, 2), axis=1)
                backward_weighting = random_softmax((2, 4, 2), axis=1)
                read_mode = random_softmax((2, 3, 2), axis=1)
                predicted_weights = np.zeros((2, 4, 2)).astype(np.float32)

                # calculate the predicted weights using iterative method from paper
                # to check the correcteness of the vectorized implementation
                for i in range(2):
                    predicted_weights[:, :,
                                      i] = read_mode[:, 0, i, np.
                                                     newaxis] * backward_weighting[:, :,
                                                                                   i] + read_mode[:,
                                                                                                  1,
                                                                                                  i,
                                                                                                  np
                                                                                                  .
                                                                                                  newaxis] * lookup_weightings[:, :,
                                                                                                                               i] + read_mode[:,
                                                                                                                                              2,
                                                                                                                                              i,
                                                                                                                                              np
                                                                                                                                              .
                                                                                                                                              newaxis] * forward_weighting[:, :,
                                                                                                                                                                           i]

                op = mem.update_read_weightings(lookup_weightings,
                                                forward_weighting,
                                                backward_weighting, read_mode)
                session.run(tf.initialize_all_variables())
                w_r = session.run(op)
                #updated_read_weightings = session.run(mem.read_weightings.value())

                self.assertTrue(np.allclose(w_r, predicted_weights))
示例#22
0
    def test_read_vectors_and_weightings(self):
        m = Memory.state(
            memory_matrix=np.random.uniform(-1, 1,
                                            (5, 11, 7)).astype(np.float32),
            usage_vector=None,
            link_matrix=None,
            precedence_vector=None,
            write_weighting=None,
            read_weightings=DNCMemoryTests.softmax_sample((5, 11, 3), axis=1),
        )
        i = DNC.interface(
            read_keys=np.random.uniform(0, 1, (5, 7, 3)).astype(np.float32),
            read_strengths=np.random.uniform(0, 1, (5, 3)).astype(np.float32),
            write_key=None,
            write_strength=None,
            erase_vector=None,
            write_vector=None,
            free_gates=None,
            allocation_gate=None,
            write_gate=None,
            read_modes=tf.convert_to_tensor(
                DNCMemoryTests.softmax_sample((5, 3, 3), axis=1)),
        )
        # read uses the link matrix that is produced after a write operation
        new_link_matrix = np.random.uniform(0, 1,
                                            (5, 11, 11)).astype(np.float32)

        # assume ContentAddressing and TemporalLinkAddressing are already correct
        op_ca = ContentAddressing.weighting(m.memory_matrix, i.read_keys,
                                            i.read_strengths)
        op_f, op_b = TemporalLinkAddressing.weightings(new_link_matrix,
                                                       m.read_weightings)
        read_op = Memory.read(m.memory_matrix, m.read_weightings,
                              new_link_matrix, i)
        with self.test_session() as session:
            lookup_weightings = session.run(op_ca)
            forward_weighting, backward_weighting = session.run([op_f, op_b])
            updated_read_weightings, updated_read_vectors = session.run(
                read_op)
            # hack to circumvent tf bug in not doing `convert_to_tensor` in einsum reductions correctly
            read_modes_numpy = tf.Session().run(i.read_modes)

        self.assertEqual(updated_read_weightings.shape, (5, 11, 3))
        self.assertEqual(updated_read_vectors.shape, (5, 7, 3))

        expected_read_weightings = np.zeros((5, 11, 3)).astype(np.float32)
        for read_head in range(3):
            backward_weight = read_modes_numpy[:, 0, read_head, np.
                                               newaxis] * backward_weighting[:, :,
                                                                             read_head]
            lookup_weight = read_modes_numpy[:, 1, read_head, np.
                                             newaxis] * lookup_weightings[:, :,
                                                                          read_head]
            forward_weight = read_modes_numpy[:, 2, read_head, np.
                                              newaxis] * forward_weighting[:, :,
                                                                           read_head]
            expected_read_weightings[:, :,
                                     read_head] = backward_weight + lookup_weight + forward_weight
        expected_read_vectors = np.matmul(
            np.transpose(m.memory_matrix, [0, 2, 1]), updated_read_weightings)

        self.assertAllClose(updated_read_weightings, expected_read_weightings)
        self.assertEqual(updated_read_weightings.shape, (5, 11, 3))
        self.assertAllClose(updated_read_vectors, expected_read_vectors)
示例#23
0
parser.add_argument("--no-dnc", action='store_true')
parser.add_argument("--savedir", type=str, default="model")
parser.add_argument("--logdir", type=str, default="logs")
parser.add_argument("--learningrate", type=float, default=1e-4)
parser.add_argument("--no-mask", action='store_true')
args = parser.parse_args()

BATCH_SIZE = args.batch_size

task = eval(args.task)
if args.test_params:
    test_params = eval(args.test_params)
else:
    test_params = tuple(np.max(p) for p in task.default_params)

memory = Memory(args.msize, args.mwidth, init_state=args.minit)
memory.add_head(NTMReadHead, shifts=[-1, 0, 1])
memory.add_head(NTMWriteHead, shifts=[-1, 0, 1])

input = tf.placeholder(tf.float32, shape=(None, None, task.input_size))
#
if args.controller == 'lstm':
    controller = LSTMCell(args.controller_size)
elif args.controller == 'multilstm':
    controller = tf.nn.rnn_cell.MultiRNNCell(
        [LSTMCell(args.controller_size) for i in range(3)])
elif args.controller == 'ff':
    controller = dnc.ff.FFWrapper(
        dnc.ff.simple_feedforward(hidden=[args.controller_size] * 2))

if not args.no_dnc:
示例#24
0
文件: dnc.py 项目: revz345/dnc
class DNC:
    def __init__(self,
                 controller_class,
                 input_size,
                 output_size,
                 max_sequence_length,
                 memory_words_num=256,
                 memory_word_size=64,
                 memory_read_heads=4,
                 batch_size=128):
        """
        constructs a complete DNC architecture as described in the DNC paper
        http://www.nature.com/nature/journal/vaop/ncurrent/full/nature20101.html

        Parameters:
        -----------
        controller_class: BaseController
            a concrete implementation of the BaseController class
        input_size: int
            the size of the input vector
        output_size: int
            the size of the output vector
        max_sequence_length: int
            the maximum length of an input sequence
        memory_words_num: int
            the number of words that can be stored in memory
        memory_word_size: int
            the size of an individual word in memory
        memory_read_heads: int
            the number of read heads in the memory
        batch_size: int
            the size of the data batch
        """

        self.input_size = input_size
        self.output_size = output_size
        self.max_sequence_length = max_sequence_length
        self.words_num = memory_words_num
        self.word_size = memory_word_size
        self.read_heads = memory_read_heads
        self.batch_size = batch_size

        self.memory = Memory(self.words_num, self.word_size, self.read_heads,
                             self.batch_size)
        self.controller = controller_class(self.input_size, self.output_size,
                                           self.read_heads, self.word_size,
                                           self.batch_size)

        # input data placeholders
        self.input_data = tf.placeholder(tf.float32, [None, None, chunk_size],
                                         name='input')
        self.target_output = tf.placeholder(tf.float32,
                                            [None, None, output_size],
                                            name='targets')
        #self.input_data = tf.placeholder(tf.float32, [batch_size, None, input_size], name='input')
        #self.target_output = tf.placeholder(tf.float32, [batch_size, None, output_size], name='targets')
        self.sequence_length = tf.placeholder(tf.int32, name='sequence_length')

        self.build_graph()

    def _step_op(self, step, memory_state, controller_state=None):
        """
        performs a step operation on the input step data

        Parameters:
        ----------
        step: Tensor (batch_size, input_size)
        memory_state: Tuple
            a tuple of current memory parameters
        controller_state: Tuple
            the state of the controller if it's recurrent

        Returns: Tuple
            output: Tensor (batch_size, output_size)
            memory_view: dict
        """

        last_read_vectors = memory_state[6]
        pre_output, interface, nn_state = None, None, None

        if self.controller.has_recurrent_nn:
            pre_output, interface, nn_state = self.controller.process_input(
                step, last_read_vectors, controller_state)
        else:
            pre_output, interface = self.controller.process_input(
                step, last_read_vectors)

        usage_vector, write_weighting, memory_matrix, link_matrix, precedence_vector = self.memory.write(
            memory_state[0], memory_state[1], memory_state[5], memory_state[4],
            memory_state[2], memory_state[3], interface['write_key'],
            interface['write_strength'], interface['free_gates'],
            interface['allocation_gate'], interface['write_gate'],
            interface['write_vector'], interface['erase_vector'])

        read_weightings, read_vectors = self.memory.read(
            memory_matrix,
            memory_state[5],
            interface['read_keys'],
            interface['read_strengths'],
            link_matrix,
            interface['read_modes'],
        )

        return [

            # report new memory state to be updated outside the condition branch
            memory_matrix,
            usage_vector,
            precedence_vector,
            link_matrix,
            write_weighting,
            read_weightings,
            read_vectors,
            self.controller.final_output(pre_output, read_vectors),
            interface['free_gates'],
            interface['allocation_gate'],
            interface['write_gate'],

            # report new state of RNN if exists
            nn_state[0] if nn_state is not None else tf.zeros(1),
            nn_state[1] if nn_state is not None else tf.zeros(1)
        ]

    def _loop_body(self, time, memory_state, outputs, free_gates,
                   allocation_gates, write_gates, read_weightings,
                   write_weightings, usage_vectors, controller_state):
        """
        the body of the DNC sequence processing loop

        Parameters:
        ----------
        time: Tensor
        outputs: TensorArray
        memory_state: Tuple
        free_gates: TensorArray
        allocation_gates: TensorArray
        write_gates: TensorArray
        read_weightings: TensorArray,
        write_weightings: TensorArray,
        usage_vectors: TensorArray,
        controller_state: Tuple

        Returns: Tuple containing all updated arguments
        """

        step_input = self.unpacked_input_data.read(time)

        output_list = self._step_op(step_input, memory_state, controller_state)

        # update memory parameters

        new_controller_state = tf.zeros(1)
        new_memory_state = tuple(output_list[0:7])

        new_controller_state = LSTMStateTuple(output_list[11], output_list[12])

        outputs = outputs.write(time, output_list[7])

        # collecting memory view for the current step
        free_gates = free_gates.write(time, output_list[8])
        allocation_gates = allocation_gates.write(time, output_list[9])
        write_gates = write_gates.write(time, output_list[10])
        read_weightings = read_weightings.write(time, output_list[5])
        write_weightings = write_weightings.write(time, output_list[4])
        usage_vectors = usage_vectors.write(time, output_list[1])

        return (time + 1, new_memory_state, outputs, free_gates,
                allocation_gates, write_gates, read_weightings,
                write_weightings, usage_vectors, new_controller_state)

    def build_graph(self):
        """
        builds the computational graph that performs a step-by-step evaluation
        of the input data batches
        """

        self.unpacked_input_data = dnc.utility.unpack_into_tensorarray(
            self.input_data, 1, self.sequence_length)

        outputs = tf.TensorArray(tf.float32,
                                 self.sequence_length,
                                 name='outputs')
        free_gates = tf.TensorArray(tf.float32,
                                    self.sequence_length,
                                    name='free_gates')
        allocation_gates = tf.TensorArray(tf.float32,
                                          self.sequence_length,
                                          name='allocation_gates')
        write_gates = tf.TensorArray(tf.float32,
                                     self.sequence_length,
                                     name='write_gates')
        read_weightings = tf.TensorArray(tf.float32,
                                         self.sequence_length,
                                         name='read_weightings')
        write_weightings = tf.TensorArray(tf.float32,
                                          self.sequence_length,
                                          name='write_weightings')
        usage_vectors = tf.TensorArray(tf.float32,
                                       self.sequence_length,
                                       name='usage_vectors')

        controller_state = self.controller.get_state(
        ) if self.controller.has_recurrent_nn else (tf.zeros(1), tf.zeros(1))
        memory_state = self.memory.init_memory()
        if not isinstance(controller_state, LSTMStateTuple):
            controller_state = LSTMStateTuple(controller_state[0],
                                              controller_state[1])
        final_results = None

        with tf.variable_scope("sequence_loop") as scope:
            time = tf.placeholder(dtype=tf.int32, name='time')
            final_results = tf.while_loop(
                cond=lambda time, *_: time < self.sequence_length,
                body=self._loop_body,
                loop_vars=(time, memory_state, outputs, free_gates,
                           allocation_gates, write_gates, read_weightings,
                           write_weightings, usage_vectors, controller_state),
                parallel_iterations=32,
                swap_memory=False)

        dependencies = []
        if self.controller.has_recurrent_nn:
            dependencies.append(self.controller.update_state(final_results[9]))

        with tf.control_dependencies(dependencies):
            self.packed_output = dnc.utility.pack_into_tensor(final_results[2],
                                                              axis=1)
            self.packed_memory_view = {
                'free_gates':
                dnc.utility.pack_into_tensor(final_results[3], axis=1),
                'allocation_gates':
                dnc.utility.pack_into_tensor(final_results[4], axis=1),
                'write_gates':
                dnc.utility.pack_into_tensor(final_results[5], axis=1),
                'read_weightings':
                dnc.utility.pack_into_tensor(final_results[6], axis=1),
                'write_weightings':
                dnc.utility.pack_into_tensor(final_results[7], axis=1),
                'usage_vectors':
                dnc.utility.pack_into_tensor(final_results[8], axis=1)
            }

    def get_outputs(self):
        """
        returns the graph nodes for the output and memory view

        Returns: Tuple
            outputs: Tensor (batch_size, time_steps, output_size)
            memory_view: dict
        """
        return self.packed_output, self.packed_memory_view

    def save(self, session, ckpts_dir, name):
        """
        saves the current values of the model's parameters to a checkpoint

        Parameters:
        ----------
        session: tf.Session
            the tensorflow session to save
        ckpts_dir: string
            the path to the checkpoints directories
        name: string
            the name of the checkpoint subdirectory
        """
        checkpoint_dir = os.path.join(ckpts_dir, name)

        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)

        tf.train.Saver(tf.trainable_variables()).save(
            session, os.path.join(checkpoint_dir, 'model.ckpt'))

    def restore(self, session, ckpts_dir, name):
        """
        session: tf.Session
            the tensorflow session to restore into
        ckpts_dir: string
            the path to the checkpoints directories
        name: string
            the name of the checkpoint subdirectory
        """
        tf.train.Saver(tf.trainable_variables()).restore(
            session, os.path.join(ckpts_dir, name, 'model.ckpt'))
示例#25
0
import tensorflow as tf
import numpy as np

from dnc import DNC, LSTMCell
from dnc.memory import Memory, NTMReadHead, NTMWriteHead

from tasks import CopyTask, RepeatCopyTask, AndTask, XorTask, MergeTask
from utils import *

INPUT_SIZE = 8
BATCH_SIZE = 32

memory = Memory(25, 6)
memory.add_head(NTMReadHead, shifts=[-1, 0, 1])
memory.add_head(NTMReadHead, shifts=[-1, 0, 1])
memory.add_head(NTMWriteHead, shifts=[-1, 0, 1])

input = tf.placeholder(tf.float32, shape=(None, None, INPUT_SIZE+2))
#lstm  = tf.nn.rnn_cell.MultiRNNCell([LSTMCell(256) for i in range(3)])
lstm  = LSTMCell(100)

net = DNC(input, memory, INPUT_SIZE+2, controller = lstm, log_memory=True)
targets = tf.placeholder(dtype=tf.float32, shape=[None, None, INPUT_SIZE+2])
mask = tf.placeholder(dtype=tf.float32, shape=[None, None, INPUT_SIZE+2])
output  = net[0]
loss = tf.losses.sigmoid_cross_entropy(logits=output, weights=mask, multi_class_labels=targets)
cost = tf.reduce_sum( mask*((1 - targets * (1 - tf.exp(-output))) * tf.sigmoid(output)) ) / BATCH_SIZE

opt = tf.train.RMSPropOptimizer(1e-4, momentum=0.9)
train = minimize_and_clip(opt, loss)