示例#1
0
    def test_dnc_output_shape(self):
        batch_size = 3
        controller_config = {
            "hidden_size": 64,
        }
        memory_config = {
            'read_heads_num': 7,
            'word_size': 5,
            'words_num': 16,
        }
        output_size = 10

        for input_size in [10, 17, 49]:
            dnc = DNC(controller_config,
                      memory_config,
                      output_size,
                      classic_dnc_output=False)
            initial_state = dnc.initial_state(batch_size)
            input_shape = dnc._W * dnc._R + input_size
            test_input = np.random.uniform(
                -3, 3, (batch_size, input_shape)).astype(np.float32)
            example_output_op, _ = dnc(
                tf.convert_to_tensor(test_input),
                initial_state,
            )
            init = tf.global_variables_initializer()

            with self.test_session() as sess:
                init.run()
                example_output = sess.run(example_output_op)
            self.assertEqual(example_output.shape, (batch_size, output_size))
示例#2
0
    def test_eager_dnc_optimization(self):
        batch_size = 7
        input_size = 15
        memory_config = {
            'memory_size': 27,
            'word_size': 9,
            'num_read_heads': 10,
        }
        output_size = 36

        x = tf.keras.Input(shape=(
            None,
            input_size,
        ))
        dnc_cell = DNC(output_size, controller_units=30, **memory_config)
        dnc_initial_state = dnc_cell.get_initial_state(batch_size=batch_size)
        layer = tf.keras.layers.RNN(dnc_cell)
        y = layer(x, initial_state=dnc_initial_state)

        model = tf.keras.models.Model(x, y)
        model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
                      loss='mse',
                      run_eagerly=True)
        model.train_on_batch(np.zeros((batch_size, 5, input_size)),
                             np.zeros((batch_size, output_size)))
        self.assertEqual(model.output_shape[1], output_size)
示例#3
0
    def test_dnc_optimization(self):
        batch_size = 7
        time_steps = 15
        input_size = 30
        controller_config = {
            "hidden_size": 64,
        }
        memory_config = {
            'read_heads_num': 10,
            'word_size': 9,
            'words_num': 27,
        }
        output_size = 36

        dnc = DNC(controller_config,
                  memory_config,
                  output_size,
                  classic_dnc_output=False)
        dnc_initial_state = dnc.initial_state(batch_size)
        inputs = tf.random_normal([time_steps, batch_size, input_size])
        dnc_output_op, _ = rnn.dynamic_rnn(cell=dnc,
                                           inputs=inputs,
                                           initial_state=dnc_initial_state,
                                           time_major=True)

        targets = np.random.rand(time_steps, batch_size, output_size)
        loss = tf.reduce_mean(tf.square(dnc_output_op - targets))
        optimizier_op = tf.train.GradientDescentOptimizer(5).minimize(loss)
        init_op = tf.global_variables_initializer()

        with self.test_session():
            init_op.run()
            optimizier_op.run()
示例#4
0
    def test_final_output(self):
        output_size = 19
        batch_size = 6
        controller_config = {
            "hidden_size": 64,
        }
        memory_config = {'words_num': 20, 'word_size': 5, 'read_heads_num': 2}
        dnc = DNC(controller_config,
                  memory_config,
                  output_size,
                  classic_dnc_output=False)
        intermediate_output = np.random.uniform(
            -1, 1, (batch_size, output_size)).astype(np.float32)
        new_read_vectors = np.random.uniform(0, 1, (batch_size, 5, 2)).astype(
            np.float32)

        memory_result = dnc._memory_to_output_weights(
            tf.convert_to_tensor(
                np.reshape(new_read_vectors, (-1, dnc._W * dnc._R))))
        controller_result = dnc._controller_to_output_weights(
            tf.convert_to_tensor(intermediate_output))
        final_result = memory_result + controller_result

        init_op = tf.global_variables_initializer()
        with self.test_session() as sess:
            init_op.run()
            output = sess.run(final_result)
        self.assertEqual(output.shape, (6, 19))
示例#5
0
    def test_parse_interface_vector(self):
        output_size = 10
        batch_size = 2
        memory_config = {
            'memory_size': None,
            'word_size': 5,
            'num_read_heads': 2,
        }
        interface_vector_size = 38
        interface = np.random.uniform(-3, 3,
                                      (batch_size, interface_vector_size))
        interface = interface.astype(np.float32)

        def softmax_dim1(x):
            y = np.atleast_2d(x)
            y = y - np.expand_dims(np.max(y, axis=1), 1)
            y = np.exp(y)
            y_summed = np.expand_dims(np.sum(y, axis=1), 1)
            return y / y_summed

        expected_interface = {
            "read_keys":
            np.reshape(interface[:, :10], (-1, 5, 2)),
            "read_strengths":
            1 + np.log(np.exp(np.reshape(interface[:, 10:12], (
                -1,
                2,
            ))) + 1),
            "write_key":
            np.reshape(interface[:, 12:17], (-1, 5, 1)),
            "write_strength":
            1 + np.log(np.exp(np.reshape(interface[:, 17], (-1, 1))) + 1),
            "erase_vector":
            1.0 / (1 + np.exp(-1 * np.reshape(interface[:, 18:23], (-1, 5)))),
            "write_vector":
            np.reshape(interface[:, 23:28], (-1, 5)),
            "free_gates":
            1.0 / (1 + np.exp(-1 * np.reshape(interface[:, 28:30], (-1, 2)))),
            "allocation_gate":
            1.0 / (1 + np.exp(-1 * interface[:, 30, np.newaxis])),
            "write_gate":
            1.0 / (1 + np.exp(-1 * interface[:, 31, np.newaxis])),
            "read_modes":
            softmax_dim1(np.reshape(interface[:, 32:], (-1, 3, 2))),
        }

        dnc = DNC(output_size, controller_units=64, **memory_config)
        parsed_interface = dnc._parse_interface_vector(interface)._asdict()

        for item in expected_interface:
            with self.subTest(name=item):
                self.assertAllClose(
                    parsed_interface[item],
                    expected_interface[item],
                )
示例#6
0
    def test_restore(self):

        current_dir = os.path.dirname(__file__)
        ckpts_dir = os.path.join(current_dir, 'checkpoints')

        model1_output, model1_memview = None, None
        sample_input = np.random.uniform(0, 1, (2, 5, 10)).astype(np.float32)
        sample_seq_len = 5

        graph1 = tf.Graph()
        with graph1.as_default():
            with tf.Session(graph=graph1) as session1:

                computer = DNC(DummyController,
                               10,
                               20,
                               10,
                               10,
                               64,
                               2,
                               batch_size=2)
                session1.run(tf.initialize_all_variables())

                saved_weights = session1.run([
                    computer.controller.nn_output_weights,
                    computer.controller.interface_weights,
                    computer.controller.mem_output_weights,
                    computer.controller.W, computer.controller.b
                ])

                computer.save(session1, ckpts_dir, 'test-restore')

        graph2 = tf.Graph()
        with graph2.as_default():
            with tf.Session(graph=graph2) as session2:

                computer = DNC(DummyController,
                               10,
                               20,
                               10,
                               10,
                               64,
                               2,
                               batch_size=2)
                session2.run(tf.initialize_all_variables())
                computer.restore(session2, ckpts_dir, 'test-restore')

                restored_weights = session2.run([
                    computer.controller.nn_output_weights,
                    computer.controller.interface_weights,
                    computer.controller.mem_output_weights,
                    computer.controller.W, computer.controller.b
                ])

                self.assertTrue(
                    np.product([
                        np.array_equal(restored_weights[i], saved_weights[i])
                        for i in range(5)
                    ]))
示例#7
0
 def test_constructor(self):
     memory_config = {
         'memory_size': 4,
         'word_size': 5,
         'num_read_heads': 2,
     }
     dnc = DNC(10, controller_units=64, **memory_config)
     input_size = 17
     test_input = np.random.uniform(
         -3, 3, (2, dnc._W * dnc._R + input_size)).astype(np.float32)
     initial_state = dnc.get_initial_state(batch_size=2)
     _, _ = dnc(test_input, initial_state)
     self.assertEqual(dnc._interface_vector_size, 38)
     self.assertEqual(dnc.output_size, 10)
     self.assertEqual(dnc.get_config()["name"], "DNC")
示例#8
0
    def test_construction(self):
        interface = DNC.interface(
            read_keys=None,
            read_strengths=None,
            write_key=np.random.uniform(0, 1, (3, 9, 1)).astype(np.float32),
            write_strength=np.random.uniform(0, 1, (3, 1)).astype(np.float32),
            erase_vector=tf.convert_to_tensor(
                np.zeros((3, 9)).astype(np.float32)),
            write_vector=tf.convert_to_tensor(
                np.random.uniform(0, 1, (3, 9)).astype(np.float32)),
            free_gates=np.random.uniform(0, 1, (3, 5)).astype(np.float32),
            allocation_gate=np.random.uniform(0, 1, (3, 1)).astype(np.float32),
            write_gate=np.random.uniform(0, 1, (3, 1)).astype(np.float32),
            read_modes=None,
        )

        memory = Memory(13, 9, 5)
        memory_state = memory.get_initial_state(batch_size=3)
        usage, write_weighting, memory, link_matrix, precedence = memory.write(
            memory_state, interface)

        self.assertEqual(usage.shape, (3, 13))
        self.assertEqual(write_weighting.shape, (3, 13))
        self.assertEqual(memory.shape, (3, 13, 9))
        self.assertEqual(link_matrix.shape, (3, 13, 13))
        self.assertEqual(precedence.shape, (3, 13))
示例#9
0
    def test_construction(self):
        interface = DNC.interface(
            read_keys=None,
            read_strengths=None,
            write_key=np.random.uniform(0, 1, (3, 9, 1)).astype(np.float32),
            write_strength=np.random.uniform(0, 1, (3, 1)).astype(np.float32),
            erase_vector=tf.convert_to_tensor(
                np.zeros((3, 9)).astype(np.float32)),
            write_vector=tf.convert_to_tensor(
                np.random.uniform(0, 1, (3, 9)).astype(np.float32)),
            free_gates=np.random.uniform(0, 1, (3, 5)).astype(np.float32),
            allocation_gate=np.random.uniform(0, 1, (3, 1)).astype(np.float32),
            write_gate=np.random.uniform(0, 1, (3, 1)).astype(np.float32),
            read_modes=None,
        )

        memory = Memory(13, 9, 5)
        memory_state = memory.initial_state(3)
        write_op = memory.write(memory_state, interface)
        init_op = tf.global_variables_initializer()

        with self.test_session() as session:
            init_op.run()
            usage, write_weighting, memory, link_matrix, precedence = session.run(
                write_op)

        self.assertEqual(usage.shape, (3, 13))
        self.assertEqual(write_weighting.shape, (3, 13))
        self.assertEqual(memory.shape, (3, 13, 9))
        self.assertEqual(link_matrix.shape, (3, 13, 13))
        self.assertEqual(precedence.shape, (3, 13))
示例#10
0
    def test_save(self):
        graph = tf.Graph()
        with graph.as_default():
            with tf.compat.v1.Session(graph=graph) as session:
                computer = DNC(DummyController,
                               10,
                               20,
                               10,
                               10,
                               64,
                               2,
                               batch_size=2)
                session.run(tf.compat.v1.global_variables_initializer())
                current_dir = os.path.dirname(__file__)
                ckpts_dir = os.path.join(current_dir, 'checkpoints')

                computer.save(session, ckpts_dir, 'test-save')

                self.assertTrue(True)
示例#11
0
    def test_construction(self):
        graph = tf.Graph()
        with graph.as_default():
            with tf.compat.v1.Session(graph=graph) as session:
                computer = DNC(DummyController, 10, 20, 10, 10, 64, 1)
                rcomputer = DNC(DummyRecurrentController, 10, 20, 10, 10, 64,
                                1)

                self.assertEqual(computer.input_size, 10)
                self.assertEqual(computer.output_size, 20)
                self.assertEqual(computer.words_num, 10)
                self.assertEqual(computer.word_size, 64)
                self.assertEqual(computer.read_heads, 1)
                self.assertEqual(computer.batch_size, 1)
                self.assertTrue(isinstance(computer.memory, memory.Memory))
                self.assertTrue(
                    isinstance(computer.controller, DummyController))
                self.assertTrue(
                    isinstance(rcomputer.controller, DummyRecurrentController))
示例#12
0
    def test_dnc_output_shape(self):
        batch_size = 3
        memory_config = {
            'memory_size': 16,
            'word_size': 5,
            'num_read_heads': 7,
        }
        output_size = 10

        for input_size in [10, 17, 49]:
            dnc = DNC(output_size, controller_units=64, **memory_config)
            initial_state = dnc.get_initial_state(batch_size=batch_size)
            input_shape = dnc._W * dnc._R + input_size
            test_input = np.random.uniform(
                -3, 3, (batch_size, input_shape)).astype(np.float32)
            example_output, _ = dnc(
                tf.convert_to_tensor(test_input),
                initial_state,
            )
            self.assertEqual(example_output.shape, (batch_size, output_size))
示例#13
0
    def test_read_vectors_and_weightings(self):
        m = Memory.state(
            memory_matrix=np.random.uniform(-1, 1,
                                            (5, 11, 7)).astype(np.float32),
            usage_vector=None,
            link_matrix=None,
            precedence_vector=None,
            write_weighting=None,
            read_weightings=DNCMemoryTests.softmax_sample((5, 11, 3), axis=1),
        )
        # pull out read_modes due to https://github.com/tensorflow/tensorflow/issues/1409
        # hack to circumvent tf bug in not doing `convert_to_tensor` in einsum reductions correctly
        read_modes = DNCMemoryTests.softmax_sample((5, 3, 3), axis=1)
        i = DNC.interface(
            read_keys=np.random.uniform(0, 1, (5, 7, 3)).astype(np.float32),
            read_strengths=np.random.uniform(0, 1, (5, 3)).astype(np.float32),
            write_key=None,
            write_strength=None,
            erase_vector=None,
            write_vector=None,
            free_gates=None,
            allocation_gate=None,
            write_gate=None,
            read_modes=tf.convert_to_tensor(read_modes),
        )

        # read uses the link matrix that is produced after a write operation
        new_link_matrix = np.random.uniform(0, 1,
                                            (5, 11, 11)).astype(np.float32)
        # assume ContentAddressing and TemporalLinkAddressing are already correct
        lookup_weightings, forward_weighting, backward_weighting, \
            updated_read_weightings, updated_read_vectors = self.get_addressing_weights(
                m, i, new_link_matrix)
        self.assertEqual(updated_read_weightings.shape, (5, 11, 3))
        self.assertEqual(updated_read_vectors.shape, (5, 7, 3))

        expected_read_weightings = np.zeros((5, 11, 3)).astype(np.float32)
        for read_head in range(3):
            backward_weight = read_modes[:, 0, read_head, np.
                                         newaxis] * backward_weighting[:, :,
                                                                       read_head]
            lookup_weight = read_modes[:, 1, read_head, np.newaxis] * \
                lookup_weightings[:, :, read_head]
            forward_weight = read_modes[:, 2, read_head, np.newaxis] * \
                forward_weighting[:, :, read_head]
            expected_read_weightings[:, :, read_head] = backward_weight + \
                lookup_weight + forward_weight
        expected_read_vectors = np.matmul(
            np.transpose(m.memory_matrix, [0, 2, 1]), updated_read_weightings)

        self.assertAllClose(updated_read_weightings, expected_read_weightings)
        self.assertEqual(updated_read_weightings.shape, (5, 11, 3))
        self.assertAllClose(updated_read_vectors, expected_read_vectors)
示例#14
0
    def test_call(self):
        graph = tf.Graph()
        with graph.as_default():
            with tf.Session(graph=graph) as session:

                computer = DNC(DummyController,
                               10,
                               20,
                               10,
                               10,
                               64,
                               2,
                               batch_size=3)
                rcomputer = DNC(DummyRecurrentController,
                                10,
                                20,
                                10,
                                10,
                                64,
                                2,
                                batch_size=3)
                input_batches = np.random.uniform(0, 1, (3, 5, 10)).astype(
                    np.float32)

                session.run(tf.initialize_all_variables())
                out_view = session.run(computer.get_outputs(),
                                       feed_dict={
                                           computer.input_data: input_batches,
                                           computer.sequence_length: 5
                                       })
                out, view = out_view

                rout_rview, ro, rs = session.run(
                    [
                        rcomputer.get_outputs(),
                        rcomputer.controller.get_state()[0],
                        rcomputer.controller.get_state()[1]
                    ],
                    feed_dict={
                        rcomputer.input_data: input_batches,
                        rcomputer.sequence_length: 5
                    })
                rout, rview = rout_rview

                self.assertEqual(out.shape, (3, 5, 20))
                self.assertEqual(view['free_gates'].shape, (3, 5, 2))
                self.assertEqual(view['allocation_gates'].shape, (3, 5, 1))
                self.assertEqual(view['write_gates'].shape, (3, 5, 1))
                self.assertEqual(view['read_weightings'].shape, (3, 5, 10, 2))
                self.assertEqual(view['write_weightings'].shape, (3, 5, 10))

                self.assertEqual(rout.shape, (3, 5, 20))
                self.assertEqual(rview['free_gates'].shape, (3, 5, 2))
                self.assertEqual(rview['allocation_gates'].shape, (3, 5, 1))
                self.assertEqual(rview['write_gates'].shape, (3, 5, 1))
                self.assertEqual(rview['read_weightings'].shape, (3, 5, 10, 2))
                self.assertEqual(rview['write_weightings'].shape, (3, 5, 10))
示例#15
0
 def test_constructor(self):
     controller_config = {
         "hidden_size": 64,
     }
     memory_config = {
         'read_heads_num': 2,
         'word_size': 5,
         'words_num': None,
     }
     dnc = DNC(controller_config,
               memory_config,
               10,
               classic_dnc_output=False)
     self.assertEqual(dnc._interface_vector_size, 38)
     self.assertEqual(dnc._controller_to_interface_weights.output_size, 38)
     self.assertEqual(dnc._controller.output_size, tf.TensorShape([64]))
     self.assertEqual(dnc._controller_to_output_weights.output_size, 10)
     self.assertEqual(dnc._memory_to_output_weights.output_size, 10)
示例#16
0
    graph = tf.Graph()

    with graph.as_default():
        with tf.Session(graph=graph) as session:

            llprint("Building Computational Graph ... ")

            optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=momentum)
            summerizer = tf.train.SummaryWriter(tb_logs_dir, session.graph)

            ncomputer = DNC(
                FeedforwardController,
                input_size,
                output_size,
                2 * sequence_max_length + 1,
                words_count,
                word_size,
                read_heads,
                batch_size
            )

            # squash the DNC output between 0 and 1
            output, _ = ncomputer.get_outputs()
            squashed_output = tf.clip_by_value(tf.sigmoid(output), 1e-6, 1. - 1e-6)

            loss = binary_cross_entropy(squashed_output, ncomputer.target_output)

            gradients = optimizer.compute_gradients(loss)
            for i, (grad, var) in enumerate(gradients):
                if grad is not None:
                    #with tf.control_dependencies([tf.Print(tf.zeros(1), [var.name, tf.is_nan(grad)])]):
示例#17
0
def main():
    """
    Train the DNC to take a word and list its instances of vowels in order of occurrence.
    :return: None.
    """
    dirname = os.path.dirname(__file__)
    ckpts_dir = os.path.join(dirname, 'checkpoints')
    data_dir = os.path.join(dirname, 'data', 'encoded')
    tb_logs_dir = os.path.join(dirname, 'logs')

    llprint("Loading Data ... ")
    lexicon_dict = load(os.path.join(data_dir, 'lexicon-dict.pkl'))
    data = load(os.path.join(data_dir, 'train', 'train.pkl'))
    llprint("Done!\n")

    batch_size = 1
    input_size = output_size = len(lexicon_dict)
    sequence_max_length = 100
    dict_size = len(lexicon_dict)
    words_count = 256
    word_size = 64
    read_heads = 4

    learning_rate = 1e-4
    momentum = 0.9

    from_checkpoint = None
    iterations = 100000
    start_step = 0

    options, _ = getopt.getopt(sys.argv[1:], '',
                               ['checkpoint=', 'iterations=', 'start='])

    for opt in options:
        if opt[0] == '--checkpoint':
            from_checkpoint = opt[1]
        elif opt[0] == '--iterations':
            iterations = int(opt[1])
        elif opt[0] == '--start':
            start_step = int(opt[1])

    graph = tf.Graph()
    with graph.as_default():
        with tf.compat.v1.Session(graph=graph) as session:

            llprint("Building Computational Graph ... ")

            optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate,
                                                            momentum=momentum)
            summarizer = tf.compat.v1.summary.FileWriter(
                tb_logs_dir, session.graph)

            ncomputer = DNC(RecurrentController, input_size, output_size,
                            sequence_max_length, words_count, word_size,
                            read_heads, batch_size)

            output, _ = ncomputer.get_outputs()

            loss_weights = tf.compat.v1.placeholder(tf.float32,
                                                    [batch_size, None, 1])

            loss = tf.reduce_mean(
                loss_weights * tf.nn.softmax_cross_entropy_with_logits(
                    logits=output, labels=ncomputer.target_output))

            summaries = []

            gradients = optimizer.compute_gradients(loss)
            for i, (grad, var) in enumerate(gradients):
                if grad is not None:
                    gradients[i] = (tf.clip_by_value(grad, -10, 10), var)
            for (grad, var) in gradients:
                if grad is not None:
                    summaries.append(
                        tf.compat.v1.summary.histogram(var.name + '/grad',
                                                       grad))

            apply_gradients = optimizer.apply_gradients(gradients)

            summaries.append(tf.compat.v1.summary.scalar("Loss", loss))
            summarize_op = tf.compat.v1.summary.merge(summaries)
            no_summarize = tf.no_op()

            llprint("Done!\n")

            llprint("Initializing Variables ... ")
            session.run(tf.compat.v1.global_variables_initializer())
            llprint("Done!\n")

            if from_checkpoint is not None:
                llprint("Restoring Checkpoint %s ... " % from_checkpoint)
                ncomputer.restore(session, ckpts_dir, from_checkpoint)
                llprint("Done!\n")

            last_100_losses = []

            start = 0 if start_step == 0 else start_step + 1
            end = start_step + iterations + 1

            start_time_100 = time.time()
            avg_100_time = 0.
            avg_counter = 0

            for i in range(start, end + 1):
                try:
                    llprint("\rIteration %d/%d" % (i, end))

                    sample = np.random.choice(data, 1)
                    input_data, target_output, seq_len, weights = prepare_sample(
                        sample, lexicon_dict['#'], dict_size)

                    summarize = (i % 100 == 0)
                    take_checkpoint = (i != 0) and (i % end == 0)

                    loss_value, _, summary = session.run(
                        [
                            loss, apply_gradients,
                            summarize_op if summarize else no_summarize
                        ],
                        feed_dict={
                            ncomputer.input_data: input_data,
                            ncomputer.target_output: target_output,
                            ncomputer.sequence_length: seq_len,
                            loss_weights: weights
                        })

                    last_100_losses.append(loss_value)
                    if summarize:
                        summarizer.add_summary(summary, i)
                        llprint("\n\tAvg. Cross-Entropy: %.7f\n" %
                                (np.mean(last_100_losses)))

                        end_time_100 = time.time()
                        elapsed_time = (end_time_100 - start_time_100) / 60
                        avg_counter += 1
                        avg_100_time += (1. / avg_counter) * (elapsed_time -
                                                              avg_100_time)
                        estimated_time = (avg_100_time *
                                          ((end - i) / 100.)) / 60.

                        print("\tAvg. 100 iterations time: %.2f minutes" %
                              avg_100_time)
                        print("\tApprox. time to completion: %.2f hours" %
                              estimated_time)

                        start_time_100 = time.time()
                        last_100_losses = []

                    if take_checkpoint:
                        llprint("\nSaving Checkpoint ... "),
                        ncomputer.save(session, ckpts_dir, 'step-%d' % i)
                        llprint("Done!\n")

                except KeyboardInterrupt:

                    llprint("\nSaving Checkpoint ... "),
                    ncomputer.save(session, ckpts_dir, 'step-%d' % i)
                    llprint("Done!\n")
                    sys.exit(0)
    mem_size = int(configs[9])
    mem_slot = int(configs[10])
    sequence_length = int(configs[11])
    iterations = int(configs[12])
    non_uniform_priority = configs[13].lower() == True
    mixin = configs[14].lower() == "true"
    copy_mode = False

    # Generate the model
    rnn = DNC(input_size=sequence_num_of_bits + 3,
              hidden_size=nhid,
              rnn_type=rnn_type,
              num_layers=1,
              num_hidden_layers=2,
              dropout=0,
              nr_cells=mem_slot,
              cell_size=mem_size,
              read_heads=1,
              gpu_id=-1,
              debug=True,
              batch_first=True,
              independent_linears=False,
              copy_mode=copy_mode)

    rnn.load_state_dict(torch.load(current_model))

    # Execute the evaluation
    sigm = T.nn.Sigmoid()

    sequence_length -= 1

    for i in tqdm(range(0, args.iterations)):
示例#19
0
def main():
    """
    Tests the latest checkpoint of the DNC that was trained on the vowels task. In this task, the DNC is given an input
    that consist of a sequence of letters and asked to return any vowels contained in that sequence in order of
    their appearance in the sequence. For simplicity's sake, y is not considered a vowel.
    :return: None.
    """
    ckpts_dir = './checkpoints/'
    lexicon_dictionary = load('./data/encoded/lexicon-dict.pkl')
    target_code = lexicon_dictionary["#"]
    test_files = []

    for entry_name in os.listdir('./data/encoded/test/'):
        entry_path = os.path.join('./data/encoded/test/', entry_name)
        if os.path.isfile(entry_path):
            test_files.append(entry_path)

    graph = tf.Graph()
    with graph.as_default():
        with tf.compat.v1.Session(graph=graph) as session:

            ncomputer = DNC(
                RecurrentController,
                input_size=len(lexicon_dictionary),
                output_size=len(lexicon_dictionary),
                max_sequence_length=100,
                memory_words_num=256,
                memory_word_size=64,
                memory_read_heads=4,
            )

            ncomputer.restore(session, ckpts_dir, 'step-100001')

            outputs, _ = ncomputer.get_outputs()
            softmaxed = tf.nn.softmax(outputs)

            tasks_results = {}
            tasks_numbers = []
            counter = 0
            for test_file in test_files:
                test_data = load(test_file)
                task_regexp = r'([0-9]{1,4})test.txt.pkl'
                task_filename = os.path.basename(test_file)
                task_match_obj = re.match(task_regexp, task_filename)
                task_number = task_match_obj.group(1)
                tasks_numbers.append(task_number)
                results = []

                for story in test_data:
                    a_story = np.array(story['inputs'])
                    # Bool vector indicating if the target code is the value at that index in a_story
                    target_mask_1 = (a_story == target_code)
                    target_mask = target_mask_1.copy()
                    # Sets the first target code appearance to False so that it will remain in answer
                    target_mask[np.where(target_mask_1 == True)[0][0]] = False

                    desired_answers = np.array(story['outputs'])
                    input_vec, seq_len = prepare_sample(
                        [story], len(lexicon_dictionary))
                    softmax_output = session.run(softmaxed,
                                                 feed_dict={
                                                     ncomputer.input_data:
                                                     input_vec,
                                                     ncomputer.sequence_length:
                                                     seq_len
                                                 })

                    softmax_output = np.squeeze(softmax_output, axis=0)
                    given_answers = np.argmax(softmax_output[target_mask],
                                              axis=1)

                    is_correct = True
                    if len(given_answers) != len(desired_answers):
                        is_correct = False
                    else:
                        for i in range(len(given_answers)):
                            if given_answers[i] != desired_answers[i]:
                                is_correct = False
                    if not is_correct:
                        print("\nGiven: ", given_answers)
                        print("Expected: ", desired_answers)
                        results.append(False)
                    else:
                        results.append(True)

                counter += 1
                llprint("\rTests Completed ... %d/%d" %
                        (counter, len(test_files)))

                error_rate = 1. - np.mean(results)
                tasks_results[task_number] = error_rate
            print("\n")
            print(
                "-------------------------------------------------------------------"
            )
            all_tasks_results = [v for _, v in tasks_results.items()]
            results_mean = "%.2f%%" % (np.mean(all_tasks_results) * 100)
            failed_count = "%d" % (np.sum(np.array(all_tasks_results) > 0.05))

            print("%-27s%-27s" % ("Percent Failed", results_mean))
            print("%-27s%-27s" % ("Total Failed", failed_count))
示例#20
0
def main():
    """
    Runs an interactive shell where the user can submit input with their chosen deliminator and see the output of the
    DNC's latest checkpoint. 
    :return: None
    """
    dir_path = os.path.dirname(os.path.realpath(__file__))
    ckpts_dir = os.path.join(dir_path, 'checkpoints')
    lexicon_dictionary = load(
        os.path.join(dir_path, 'data', 'encoded', 'lexicon-dict.pkl'))
    target_code = lexicon_dictionary["#"]

    graph = tf.Graph()
    with graph.as_default():
        with tf.compat.v1.Session(graph=graph) as session:

            ncomputer = DNC(
                RecurrentController,
                input_size=len(lexicon_dictionary),
                output_size=len(lexicon_dictionary),
                max_sequence_length=100,
                memory_words_num=256,
                memory_word_size=64,
                memory_read_heads=4,
            )

            ncomputer.restore(session, ckpts_dir, 'step-100001')

            outputs, _ = ncomputer.get_outputs()
            softmaxed = tf.nn.softmax(outputs)

            print(
                "This is an interactive shell script. Here a user may test a trained neural network by passing it "
                "custom inputs and seeing if they elicid the desired output. \n Please note that a user may only "
                "test inputs that consists of words in the neural network's lexicon. If the user would like to quit"
                " the program, they can type ':q!' when prompted for an input. \n If the user would like to see the"
                " network's lexicon, they can type ':dict' when prompted for an input. Otherwise, the user may "
                "simply type the sequence of inputs that they would like to use and then hit the enter key. \n "
                "They will then be asked to specify the deliminator that distinguishes one word from another word."
                " The input will then be split using that deliminator. \n If all resulting inputs are in the "
                "network's lexicon, the network will then be fed these inputs and its output will be printed for "
                "the user along with its expected output.")

            my_input = input("Input:")
            while my_input != ":q!":
                if my_input == ":dict":
                    print(
                        "The neural network has been trained to recognize the following words:"
                    )
                    print(lexicon_dictionary)
                    my_input = input("Input:")
                    continue
                deliminator = input("Deliminator:")
                story = my_input.split(deliminator)
                if not set(story).issubset(lexicon_dictionary):
                    print("You may only test key in the lexicon dictionary.")
                    my_input = input("Input:")
                    continue

                desired_answers = get_solution(story)
                encoded_story = []
                encoded_answers = []
                for an_input in story:
                    encoded_story.append(lexicon_dictionary[an_input])
                for an_output in desired_answers:
                    encoded_answers.append(lexicon_dictionary[an_output])
                input_vec, _, seq_len, _ = prepare_sample(
                    [encoded_story], encoded_answers, target_code,
                    len(lexicon_dictionary))
                softmax_output = session.run(softmaxed,
                                             feed_dict={
                                                 ncomputer.input_data:
                                                 input_vec,
                                                 ncomputer.sequence_length:
                                                 seq_len
                                             })

                softmax_output = np.squeeze(softmax_output, axis=0)
                given_answers = np.argmax(
                    softmax_output[:len(desired_answers)], axis=1)

                print("Output: ", [
                    list(lexicon_dictionary.keys())[list(
                        lexicon_dictionary.values()).index(an_answer)]
                    for an_answer in given_answers
                ])
                is_correct = True
                if len(given_answers) != len(encoded_answers):
                    is_correct = False
                else:
                    for i in range(len(given_answers)):
                        if given_answers[i] != encoded_answers[i]:
                            is_correct = False
                if is_correct:
                    print("Correct!")
                else:
                    print("Expected: ", desired_answers)

                my_input = input("Input:")
示例#21
0
                     1,
                     0,
                     path_len_mean=path_len_mean,
                     path_len_std=path_len_std)
ep = env.start_ep()
num_subgoals = 3
#her_sample = False
her_coeff = 1.
ab = False
rnn = DNC(input_size=bit_str_len * 2 + 1,
          hidden_size=len(env.ep.actions_list),
          rnn_type=args.rnn_type,
          num_layers=args.nlayer,
          num_hidden_layers=args.nhlayer,
          dropout=args.dropout,
          nr_cells=args.mem_slot,
          cell_size=args.mem_size,
          read_heads=args.read_heads,
          gpu_id=args.cuda,
          debug=args.visdom,
          batch_first=True,
          independent_linears=True)

if args.cuda != -1:
    rnn = rnn.cuda(args.cuda)

    print(rnn)

if args.optim == 'adam':
    optimizer = optim.Adam(rnn.parameters(),
                           lr=args.lr,
示例#22
0
import tensorflow as tf
from dnc.dnc import DNC
import numpy as np

np.random.seed(1)

g = tf.Graph()
with g.as_default():
    batch_size = 4
    output_size = 20
    input_size = 10

    dnc = DNC(output_size,
              controller_units=128,
              memory_size=256,
              word_size=64,
              num_read_heads=4)
    initial_state = dnc.get_initial_state(batch_size=batch_size)
    example_input = np.random.uniform(0, 1, (batch_size, input_size)).astype(
        np.float32)
    output_op, _ = dnc(
        tf.convert_to_tensor(example_input),
        initial_state,
    )
    init = tf.global_variables_initializer()
    with tf.Session(graph=g) as sess:
        init.run()
        example_output = sess.run(output_op)

    tf.summary.FileWriter("graphs", g).close()
示例#23
0
    cuda = args.cuda
    iterations = args.iterations
    summarize_freq = args.summarize_freq
    check_freq = args.check_freq
    visdom = args.visdom

    from_checkpoint = None

    if args.memory_type == 'dnc':
        rnn = DNC(input_size=args.input_size,
                  hidden_size=args.nhid,
                  rnn_type=args.rnn_type,
                  num_layers=args.nlayer,
                  num_hidden_layers=args.nhlayer,
                  dropout=args.dropout,
                  nr_cells=args.mem_slot,
                  cell_size=args.mem_size,
                  read_heads=args.read_heads,
                  gpu_id=args.cuda,
                  debug=args.visdom,
                  batch_first=True,
                  independent_linears=True)
    elif args.memory_type == 'sdnc':
        rnn = SDNC(input_size=args.input_size,
                   hidden_size=args.nhid,
                   rnn_type=args.rnn_type,
                   num_layers=args.nlayer,
                   num_hidden_layers=args.nhlayer,
                   dropout=args.dropout,
                   nr_cells=args.mem_slot,
                   cell_size=args.mem_size,
示例#24
0
文件: train.py 项目: Kajiyu/dnc-py3
            start_step = int(opt[1])

    graph = tf.Graph()
    with graph.as_default():
        with tf.Session(graph=graph) as session:

            llprint("Building Computational Graph ... ")

            optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=momentum)
            summerizer = tf.summary.FileWriter(tb_logs_dir, session.graph)

            ncomputer = DNC(
                RecurrentController,
                input_size,
                output_size,
                sequence_max_length,
                words_count,
                word_size,
                read_heads,
                batch_size
            )

            output, memory_views = ncomputer.get_outputs()

            loss_weights = tf.placeholder(tf.float32, [batch_size, None, 1])
            loss = tf.reduce_mean(
                loss_weights * tf.nn.softmax_cross_entropy_with_logits_v2(logits=output, labels=ncomputer.target_output)
            )

            summeries = []

            gradients = optimizer.compute_gradients(loss)
示例#25
0
test_files = []

for entryname in os.listdir('data/en/test/'):
    entry_path = os.path.join('data/en/test/', entryname)
    if os.path.isfile(entry_path):
        test_files.append(entry_path)

graph = tf.Graph()
with graph.as_default():
    with tf.Session(graph=graph) as session:
        
        ncomputer = DNC(
            RecurrentController,
            input_size=len(lexicon_dictionary),
            output_size=len(lexicon_dictionary),
            max_sequence_length=100,
            memory_words_num=256,
            memory_word_size=64,
            memory_read_heads=4,
        )
        
        ncomputer.restore(session, ckpts_dir, 'step-30001')
        
        outputs, _ = ncomputer.get_outputs()
        softmaxed = tf.nn.softmax(outputs)
        
        tasks_results = {}
        tasks_names = {}
        for test_file in test_files:
            test_data = load(test_file)
            task_regexp = r'qa([0-9]{1,2})_([a-z\-]*)_test.txt.pkl'
示例#26
0
    def test_read_vectors_and_weightings(self):
        m = Memory.state(
            memory_matrix=np.random.uniform(-1, 1,
                                            (5, 11, 7)).astype(np.float32),
            usage_vector=None,
            link_matrix=None,
            precedence_vector=None,
            write_weighting=None,
            read_weightings=DNCMemoryTests.softmax_sample((5, 11, 3), axis=1),
        )
        i = DNC.interface(
            read_keys=np.random.uniform(0, 1, (5, 7, 3)).astype(np.float32),
            read_strengths=np.random.uniform(0, 1, (5, 3)).astype(np.float32),
            write_key=None,
            write_strength=None,
            erase_vector=None,
            write_vector=None,
            free_gates=None,
            allocation_gate=None,
            write_gate=None,
            read_modes=tf.convert_to_tensor(
                DNCMemoryTests.softmax_sample((5, 3, 3), axis=1)),
        )
        # read uses the link matrix that is produced after a write operation
        new_link_matrix = np.random.uniform(0, 1,
                                            (5, 11, 11)).astype(np.float32)

        # assume ContentAddressing and TemporalLinkAddressing are already correct
        op_ca = ContentAddressing.weighting(m.memory_matrix, i.read_keys,
                                            i.read_strengths)
        op_f, op_b = TemporalLinkAddressing.weightings(new_link_matrix,
                                                       m.read_weightings)
        read_op = Memory.read(m.memory_matrix, m.read_weightings,
                              new_link_matrix, i)
        with self.test_session() as session:
            lookup_weightings = session.run(op_ca)
            forward_weighting, backward_weighting = session.run([op_f, op_b])
            updated_read_weightings, updated_read_vectors = session.run(
                read_op)
            # hack to circumvent tf bug in not doing `convert_to_tensor` in einsum reductions correctly
            read_modes_numpy = tf.Session().run(i.read_modes)

        self.assertEqual(updated_read_weightings.shape, (5, 11, 3))
        self.assertEqual(updated_read_vectors.shape, (5, 7, 3))

        expected_read_weightings = np.zeros((5, 11, 3)).astype(np.float32)
        for read_head in range(3):
            backward_weight = read_modes_numpy[:, 0, read_head, np.
                                               newaxis] * backward_weighting[:, :,
                                                                             read_head]
            lookup_weight = read_modes_numpy[:, 1, read_head, np.
                                             newaxis] * lookup_weightings[:, :,
                                                                          read_head]
            forward_weight = read_modes_numpy[:, 2, read_head, np.
                                              newaxis] * forward_weighting[:, :,
                                                                           read_head]
            expected_read_weightings[:, :,
                                     read_head] = backward_weight + lookup_weight + forward_weight
        expected_read_vectors = np.matmul(
            np.transpose(m.memory_matrix, [0, 2, 1]), updated_read_weightings)

        self.assertAllClose(updated_read_weightings, expected_read_weightings)
        self.assertEqual(updated_read_weightings.shape, (5, 11, 3))
        self.assertAllClose(updated_read_vectors, expected_read_vectors)
示例#27
0
def generate_result_images(prediction, target, image_dir, experiment_name,
                           epoch, args, model_path):

    x, y, priority = generate_data(1,
                                   args.sequence_max_length,
                                   args.input_size + 3,
                                   steps=args.steps,
                                   non_uniform=False)

    print(priority.detach().numpy())
    print(np.argsort(-priority.detach().numpy(), axis=1))

    rnn = DNC(input_size=args.input_size + 3,
              hidden_size=args.nhid,
              rnn_type=args.rnn_type,
              num_layers=args.nlayer,
              num_hidden_layers=args.nhlayer,
              dropout=args.dropout,
              nr_cells=args.mem_slot,
              cell_size=args.mem_size,
              read_heads=args.read_heads,
              gpu_id=args.cuda,
              debug=True,
              batch_first=True,
              independent_linears=args.independent_linears)
    rnn.load_state_dict(torch.load(model_path))

    (chx, mhx, rv) = (None, None, None)
    output, (chx, mhx, rv), v = rnn(x, (None, mhx, None),
                                    reset_experience=True,
                                    pass_through_memory=True)

    # This is needed if we want to use make_eval_plot
    sigm = T.nn.Sigmoid()
    prediction = sigm(
        output[:, -args.sequence_max_length:, :-3]).detach().numpy()[0]
    target = y[:, :, :-3].detach().numpy()[0]

    fig = plt.figure(figsize=(5, 5))
    ax1 = fig.add_subplot(221)
    ax2 = fig.add_subplot(222)
    ax3 = fig.add_subplot(212)

    ax1.set_title("Result")
    ax2.set_title("Target")
    ax3.set_title("Input")

    x = x.detach().numpy()[0]
    prediction = np.swapaxes(prediction, 0, 1)
    target = np.swapaxes(target, 0, 1)
    x = np.swapaxes(x, 0, 1)

    prediction_bin = []
    for t in prediction:
        prediction_bin.append((t > 0.5))
    prediction = T.from_numpy(np.array(prediction_bin))

    sns.heatmap(prediction,
                ax=ax1,
                vmin=0,
                vmax=1,
                linewidths=.5,
                linecolor="black",
                cmap="Greys",
                cbar=True)
    sns.heatmap(target,
                ax=ax2,
                vmin=0,
                vmax=1,
                linewidths=.5,
                linecolor="black",
                cmap="Greys",
                cbar=True)
    sns.heatmap(x,
                ax=ax3,
                vmin=0,
                vmax=1,
                linewidths=.5,
                linecolor="black",
                cmap="Greys",
                cbar=True)

    plt.tight_layout()
    plt.savefig(image_dir + "/result_" + experiment_name +
                "_{}.png".format(epoch),
                dpi=250)

    #fig = plt.figure(figsize=(15,10))
    fig = plt.figure()
    ax1_2 = fig.add_subplot(321)
    ax2_2 = fig.add_subplot(325)
    ax3_2 = fig.add_subplot(322)
    ax4_2 = fig.add_subplot(324)
    ax5_2 = fig.add_subplot(326)
    ax6_2 = fig.add_subplot(323)
    ax1_2.set_title("Read Weigths")
    ax2_2.set_title("Write Weights")
    ax3_2.set_title("Forward Mode")
    ax4_2.set_title("Content Mode")
    ax5_2.set_title("Backward Mode")
    ax6_2.set_title("Read Modes")
    ax6_2.set_yticklabels(["back", "forw", "cont"])

    sns.heatmap(v['read_weights'].T, ax=ax1_2, linewidths=.01)
    sns.heatmap(v['write_weights'].T, ax=ax2_2, linewidths=.01)
    sns.heatmap(v['forward_mode'].T, ax=ax3_2, linewidths=.01)
    sns.heatmap(v['content_mode'].T, ax=ax4_2, linewidths=.01)
    sns.heatmap(v['backward_mode'].T, ax=ax5_2, linewidths=.01)
    sns.heatmap(v['read_modes'].T, ax=ax6_2, linewidths=.01)

    plt.tight_layout()
    plt.savefig(image_dir + "/weights_" + experiment_name +
                "_{}.png".format(epoch),
                dpi=250)
示例#28
0
def main():
    """
    Train the DNC to take answer questions from the DREAM dataset.
    :return: None.
    """
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
    dirname = os.path.dirname(__file__)
    ckpts_dir = os.path.join(dirname, 'checkpoints/')
    data_dir = os.path.join(dirname, 'data', 'encoded')
    tb_logs_dir = os.path.join(dirname, 'logs')

    llprint("Loading Data ... ")
    lexicon_dict = load(os.path.join(data_dir, 'lexicon-dict.pkl'))
    data_files = os.listdir(os.path.join(data_dir, 'train'))
    llprint("Done!\n")

    batch_size = 1
    input_size = output_size = len(lexicon_dict)
    sequence_max_length = 100
    word_space_size = len(lexicon_dict)
    words_count = 256
    word_size = 64
    read_heads = 4

    learning_rate = 1e-4
    momentum = 0.9

    from_checkpoint = None
    iterations = 100000

    start_step = 0

    options, _ = getopt.getopt(sys.argv[1:], '',
                               ['checkpoint=', 'iterations=', 'start='])

    for opt in options:
        if opt[0] == '--checkpoint':
            from_checkpoint = opt[1]
            print("Checkpoint found")
        elif opt[0] == '--iterations':
            iterations = int(opt[1])
        elif opt[0] == '--start':
            start_step = int(opt[1])

    graph = tf.Graph()
    with graph.as_default():
        with tf.compat.v1.Session(graph=graph) as session:

            llprint("Building Computational Graph ... ")

            optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate,
                                                            momentum=momentum)
            summarizer = tf.compat.v1.summary.FileWriter(
                tb_logs_dir, session.graph)

            ncomputer = DNC(RecurrentController, input_size, output_size,
                            sequence_max_length, words_count, word_size,
                            read_heads, batch_size)

            output, _ = ncomputer.get_outputs()

            loss_weights = tf.compat.v1.placeholder(tf.float32,
                                                    [batch_size, None, 1])
            loss = tf.reduce_mean(
                loss_weights * tf.nn.softmax_cross_entropy_with_logits(
                    logits=output, labels=ncomputer.target_output))

            summaries = []

            gradients = optimizer.compute_gradients(loss)
            for i, (grad, var) in enumerate(gradients):
                if grad is not None:
                    gradients[i] = (tf.clip_by_value(grad, -10, 10), var)
            for (grad, var) in gradients:
                if grad is not None:
                    summaries.append(
                        tf.compat.v1.summary.histogram(var.name + '/grad',
                                                       grad))

            apply_gradients = optimizer.apply_gradients(gradients)

            summaries.append(tf.compat.v1.summary.scalar("Loss", loss))

            summarize_op = tf.compat.v1.summary.merge(summaries)
            no_summarize = tf.no_op()

            llprint("Done!\n")

            llprint("Initializing Variables ... ")
            session.run(tf.compat.v1.global_variables_initializer())
            llprint("Done!\n")

            if from_checkpoint is not None:
                llprint("Restoring Checkpoint %s ... " % from_checkpoint)
                ncomputer.restore(session, ckpts_dir, from_checkpoint)
                llprint("Done!\n")
            elif os.path.exists(ckpts_dir):
                checkpoints = os.listdir(ckpts_dir)
                if len(checkpoints) != 0 and any("step-" in s
                                                 for s in checkpoints):
                    checkpoint_numbers = [
                        int(checkpoint[checkpoint.find("-") + 1:])
                        for checkpoint in checkpoints
                        if checkpoint[checkpoint.find("-") + 1:].isnumeric()
                    ]
                    checkpoint_numbers.sort()
                    ncomputer.restore(session, ckpts_dir,
                                      f"step-{checkpoint_numbers[-1]}")
                    start = checkpoint_numbers[-1]
                    end = 100000

            last_100_losses = []

            if not 'start' in locals():
                start = 0
                end = 100000
            if from_checkpoint is not None:
                start = int(from_checkpoint[from_checkpoint.find("-") + 1:])

            start_time_100 = time.time()
            end_time_100 = None
            avg_100_time = 0.
            avg_counter = 0

            for i in range(start, end + 1):
                try:
                    llprint("\rIteration %d/%d" % (i, end))

                    sample = np.random.choice(data_files, 1)
                    with open(os.path.join(data_dir, 'train', sample[0])) as f:
                        sample = json.load(f)
                    input_data, target_output, seq_len, weights = prepare_sample(
                        sample, lexicon_dict['='], word_space_size,
                        lexicon_dict)

                    summarize = (i % 100 == 0)
                    take_checkpoint = (i != 0) and (i % 200 == 0)
                    #For debugging
                    outputs, _ = ncomputer.get_outputs()
                    softmaxed = tf.nn.softmax(outputs)

                    loss_value, _, summary, softmax_output = session.run(
                        [
                            loss, apply_gradients,
                            summarize_op if summarize else no_summarize,
                            softmaxed
                        ],
                        feed_dict={
                            ncomputer.input_data: input_data,
                            ncomputer.target_output: target_output,
                            ncomputer.sequence_length: seq_len,
                            loss_weights: weights
                        })
                    softmax_output = np.squeeze(softmax_output, axis=0)
                    given_answers = np.argmax(softmax_output, axis=1)

                    words = []
                    for an_array in target_output[0]:
                        for word in np.where(an_array == 1):
                            words.extend([
                                list(lexicon_dict.keys())[np.where(
                                    an_array == 1)[0][0]]
                            ])

                    last_100_losses.append(loss_value)
                    if summarize:
                        print("\n\tLoss value: ", loss_value)
                        print("\tTarget output: ", words)
                        print("\tOutput: ", [
                            list(lexicon_dict.keys())[num]
                            for num in given_answers
                        ])
                        summarizer.add_summary(summary, i)
                        llprint("\tAvg. Cross-Entropy: %.7f\n" %
                                (np.mean(last_100_losses)))

                        end_time_100 = time.time()
                        elapsed_time = (end_time_100 - start_time_100) / 60
                        avg_counter += 1
                        avg_100_time += (1. / avg_counter) * (elapsed_time -
                                                              avg_100_time)
                        estimated_time = (avg_100_time *
                                          ((end - i) / 100.)) / 60.

                        print("\tAvg. 100 iterations time: %.2f minutes" %
                              avg_100_time)
                        print("\tApprox. time to completion: %.2f hours\n" %
                              estimated_time)

                        start_time_100 = time.time()
                        last_100_losses = []

                    if take_checkpoint:
                        llprint("\nSaving Checkpoint ... line 237 "),
                        ncomputer.save(session, ckpts_dir, 'step-%d' % i)
                        llprint("Done!\n")

                except KeyboardInterrupt:

                    llprint("\nSaving Checkpoint ... "),
                    ncomputer.save(session, ckpts_dir, 'step-%d' % i)
                    llprint("Done!\n")
                    sys.exit(0)
示例#29
0
    batch_size = args.batch_size
    summarize_freq = args.summarize_freq
    check_freq = args.check_freq

    mem_slot = args.mem_slot
    mem_size = args.mem_size
    read_heads = args.read_heads

    rnn = DNC(input_size=args.bits + 2,
              hidden_size=args.nhid,
              rnn_type=args.rnn_type,
              num_layers=args.nlayer,
              num_hidden_layers=args.nhlayer,
              dropout=args.dropout,
              nr_cells=mem_slot,
              cell_size=mem_size,
              read_heads=read_heads,
              gpu_id=args.cuda,
              debug=args.debug,
              batch_first=True,
              independent_linears=True)

    print(rnn)
    if args.cuda != -1:
        rnn = rnn.cuda(args.cuda)

    last_save_losses = []
    optimizer = optim.Adam(rnn.parameters(),
                           lr=args.lr,
                           eps=1e-9,
示例#30
0
    mem_size = args.mem_size
    read_heads = args.read_heads

    independent_linears=False
    if args.independent_linears:
        independent_linears=args.independent_linears

    if args.memory_type == 'dnc':
        rnn = DNC(
        input_size=args.input_size+3,
        hidden_size=args.nhid,
        rnn_type=args.rnn_type,
        num_layers=args.nlayer,
        num_hidden_layers=args.nhlayer,
        dropout=args.dropout,
        nr_cells=mem_slot,
        cell_size=mem_size,
        read_heads=read_heads,
        gpu_id=args.cuda,
        debug=True,
        batch_first=True,
        independent_linears=independent_linears,
        copy_mode=args.copy_operation
    )
    elif args.memory_type == 'sdnc':
        rnn = SDNC(
        input_size=args.input_size+3,
        hidden_size=args.nhid,
        rnn_type=args.rnn_type,
        num_layers=args.nlayer,
        num_hidden_layers=args.nhlayer,
        dropout=args.dropout,