def test_dnc_output_shape(self): batch_size = 3 controller_config = { "hidden_size": 64, } memory_config = { 'read_heads_num': 7, 'word_size': 5, 'words_num': 16, } output_size = 10 for input_size in [10, 17, 49]: dnc = DNC(controller_config, memory_config, output_size, classic_dnc_output=False) initial_state = dnc.initial_state(batch_size) input_shape = dnc._W * dnc._R + input_size test_input = np.random.uniform( -3, 3, (batch_size, input_shape)).astype(np.float32) example_output_op, _ = dnc( tf.convert_to_tensor(test_input), initial_state, ) init = tf.global_variables_initializer() with self.test_session() as sess: init.run() example_output = sess.run(example_output_op) self.assertEqual(example_output.shape, (batch_size, output_size))
def test_eager_dnc_optimization(self): batch_size = 7 input_size = 15 memory_config = { 'memory_size': 27, 'word_size': 9, 'num_read_heads': 10, } output_size = 36 x = tf.keras.Input(shape=( None, input_size, )) dnc_cell = DNC(output_size, controller_units=30, **memory_config) dnc_initial_state = dnc_cell.get_initial_state(batch_size=batch_size) layer = tf.keras.layers.RNN(dnc_cell) y = layer(x, initial_state=dnc_initial_state) model = tf.keras.models.Model(x, y) model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001), loss='mse', run_eagerly=True) model.train_on_batch(np.zeros((batch_size, 5, input_size)), np.zeros((batch_size, output_size))) self.assertEqual(model.output_shape[1], output_size)
def test_dnc_optimization(self): batch_size = 7 time_steps = 15 input_size = 30 controller_config = { "hidden_size": 64, } memory_config = { 'read_heads_num': 10, 'word_size': 9, 'words_num': 27, } output_size = 36 dnc = DNC(controller_config, memory_config, output_size, classic_dnc_output=False) dnc_initial_state = dnc.initial_state(batch_size) inputs = tf.random_normal([time_steps, batch_size, input_size]) dnc_output_op, _ = rnn.dynamic_rnn(cell=dnc, inputs=inputs, initial_state=dnc_initial_state, time_major=True) targets = np.random.rand(time_steps, batch_size, output_size) loss = tf.reduce_mean(tf.square(dnc_output_op - targets)) optimizier_op = tf.train.GradientDescentOptimizer(5).minimize(loss) init_op = tf.global_variables_initializer() with self.test_session(): init_op.run() optimizier_op.run()
def test_final_output(self): output_size = 19 batch_size = 6 controller_config = { "hidden_size": 64, } memory_config = {'words_num': 20, 'word_size': 5, 'read_heads_num': 2} dnc = DNC(controller_config, memory_config, output_size, classic_dnc_output=False) intermediate_output = np.random.uniform( -1, 1, (batch_size, output_size)).astype(np.float32) new_read_vectors = np.random.uniform(0, 1, (batch_size, 5, 2)).astype( np.float32) memory_result = dnc._memory_to_output_weights( tf.convert_to_tensor( np.reshape(new_read_vectors, (-1, dnc._W * dnc._R)))) controller_result = dnc._controller_to_output_weights( tf.convert_to_tensor(intermediate_output)) final_result = memory_result + controller_result init_op = tf.global_variables_initializer() with self.test_session() as sess: init_op.run() output = sess.run(final_result) self.assertEqual(output.shape, (6, 19))
def test_parse_interface_vector(self): output_size = 10 batch_size = 2 memory_config = { 'memory_size': None, 'word_size': 5, 'num_read_heads': 2, } interface_vector_size = 38 interface = np.random.uniform(-3, 3, (batch_size, interface_vector_size)) interface = interface.astype(np.float32) def softmax_dim1(x): y = np.atleast_2d(x) y = y - np.expand_dims(np.max(y, axis=1), 1) y = np.exp(y) y_summed = np.expand_dims(np.sum(y, axis=1), 1) return y / y_summed expected_interface = { "read_keys": np.reshape(interface[:, :10], (-1, 5, 2)), "read_strengths": 1 + np.log(np.exp(np.reshape(interface[:, 10:12], ( -1, 2, ))) + 1), "write_key": np.reshape(interface[:, 12:17], (-1, 5, 1)), "write_strength": 1 + np.log(np.exp(np.reshape(interface[:, 17], (-1, 1))) + 1), "erase_vector": 1.0 / (1 + np.exp(-1 * np.reshape(interface[:, 18:23], (-1, 5)))), "write_vector": np.reshape(interface[:, 23:28], (-1, 5)), "free_gates": 1.0 / (1 + np.exp(-1 * np.reshape(interface[:, 28:30], (-1, 2)))), "allocation_gate": 1.0 / (1 + np.exp(-1 * interface[:, 30, np.newaxis])), "write_gate": 1.0 / (1 + np.exp(-1 * interface[:, 31, np.newaxis])), "read_modes": softmax_dim1(np.reshape(interface[:, 32:], (-1, 3, 2))), } dnc = DNC(output_size, controller_units=64, **memory_config) parsed_interface = dnc._parse_interface_vector(interface)._asdict() for item in expected_interface: with self.subTest(name=item): self.assertAllClose( parsed_interface[item], expected_interface[item], )
def test_restore(self): current_dir = os.path.dirname(__file__) ckpts_dir = os.path.join(current_dir, 'checkpoints') model1_output, model1_memview = None, None sample_input = np.random.uniform(0, 1, (2, 5, 10)).astype(np.float32) sample_seq_len = 5 graph1 = tf.Graph() with graph1.as_default(): with tf.Session(graph=graph1) as session1: computer = DNC(DummyController, 10, 20, 10, 10, 64, 2, batch_size=2) session1.run(tf.initialize_all_variables()) saved_weights = session1.run([ computer.controller.nn_output_weights, computer.controller.interface_weights, computer.controller.mem_output_weights, computer.controller.W, computer.controller.b ]) computer.save(session1, ckpts_dir, 'test-restore') graph2 = tf.Graph() with graph2.as_default(): with tf.Session(graph=graph2) as session2: computer = DNC(DummyController, 10, 20, 10, 10, 64, 2, batch_size=2) session2.run(tf.initialize_all_variables()) computer.restore(session2, ckpts_dir, 'test-restore') restored_weights = session2.run([ computer.controller.nn_output_weights, computer.controller.interface_weights, computer.controller.mem_output_weights, computer.controller.W, computer.controller.b ]) self.assertTrue( np.product([ np.array_equal(restored_weights[i], saved_weights[i]) for i in range(5) ]))
def test_constructor(self): memory_config = { 'memory_size': 4, 'word_size': 5, 'num_read_heads': 2, } dnc = DNC(10, controller_units=64, **memory_config) input_size = 17 test_input = np.random.uniform( -3, 3, (2, dnc._W * dnc._R + input_size)).astype(np.float32) initial_state = dnc.get_initial_state(batch_size=2) _, _ = dnc(test_input, initial_state) self.assertEqual(dnc._interface_vector_size, 38) self.assertEqual(dnc.output_size, 10) self.assertEqual(dnc.get_config()["name"], "DNC")
def test_construction(self): interface = DNC.interface( read_keys=None, read_strengths=None, write_key=np.random.uniform(0, 1, (3, 9, 1)).astype(np.float32), write_strength=np.random.uniform(0, 1, (3, 1)).astype(np.float32), erase_vector=tf.convert_to_tensor( np.zeros((3, 9)).astype(np.float32)), write_vector=tf.convert_to_tensor( np.random.uniform(0, 1, (3, 9)).astype(np.float32)), free_gates=np.random.uniform(0, 1, (3, 5)).astype(np.float32), allocation_gate=np.random.uniform(0, 1, (3, 1)).astype(np.float32), write_gate=np.random.uniform(0, 1, (3, 1)).astype(np.float32), read_modes=None, ) memory = Memory(13, 9, 5) memory_state = memory.get_initial_state(batch_size=3) usage, write_weighting, memory, link_matrix, precedence = memory.write( memory_state, interface) self.assertEqual(usage.shape, (3, 13)) self.assertEqual(write_weighting.shape, (3, 13)) self.assertEqual(memory.shape, (3, 13, 9)) self.assertEqual(link_matrix.shape, (3, 13, 13)) self.assertEqual(precedence.shape, (3, 13))
def test_construction(self): interface = DNC.interface( read_keys=None, read_strengths=None, write_key=np.random.uniform(0, 1, (3, 9, 1)).astype(np.float32), write_strength=np.random.uniform(0, 1, (3, 1)).astype(np.float32), erase_vector=tf.convert_to_tensor( np.zeros((3, 9)).astype(np.float32)), write_vector=tf.convert_to_tensor( np.random.uniform(0, 1, (3, 9)).astype(np.float32)), free_gates=np.random.uniform(0, 1, (3, 5)).astype(np.float32), allocation_gate=np.random.uniform(0, 1, (3, 1)).astype(np.float32), write_gate=np.random.uniform(0, 1, (3, 1)).astype(np.float32), read_modes=None, ) memory = Memory(13, 9, 5) memory_state = memory.initial_state(3) write_op = memory.write(memory_state, interface) init_op = tf.global_variables_initializer() with self.test_session() as session: init_op.run() usage, write_weighting, memory, link_matrix, precedence = session.run( write_op) self.assertEqual(usage.shape, (3, 13)) self.assertEqual(write_weighting.shape, (3, 13)) self.assertEqual(memory.shape, (3, 13, 9)) self.assertEqual(link_matrix.shape, (3, 13, 13)) self.assertEqual(precedence.shape, (3, 13))
def test_save(self): graph = tf.Graph() with graph.as_default(): with tf.compat.v1.Session(graph=graph) as session: computer = DNC(DummyController, 10, 20, 10, 10, 64, 2, batch_size=2) session.run(tf.compat.v1.global_variables_initializer()) current_dir = os.path.dirname(__file__) ckpts_dir = os.path.join(current_dir, 'checkpoints') computer.save(session, ckpts_dir, 'test-save') self.assertTrue(True)
def test_construction(self): graph = tf.Graph() with graph.as_default(): with tf.compat.v1.Session(graph=graph) as session: computer = DNC(DummyController, 10, 20, 10, 10, 64, 1) rcomputer = DNC(DummyRecurrentController, 10, 20, 10, 10, 64, 1) self.assertEqual(computer.input_size, 10) self.assertEqual(computer.output_size, 20) self.assertEqual(computer.words_num, 10) self.assertEqual(computer.word_size, 64) self.assertEqual(computer.read_heads, 1) self.assertEqual(computer.batch_size, 1) self.assertTrue(isinstance(computer.memory, memory.Memory)) self.assertTrue( isinstance(computer.controller, DummyController)) self.assertTrue( isinstance(rcomputer.controller, DummyRecurrentController))
def test_dnc_output_shape(self): batch_size = 3 memory_config = { 'memory_size': 16, 'word_size': 5, 'num_read_heads': 7, } output_size = 10 for input_size in [10, 17, 49]: dnc = DNC(output_size, controller_units=64, **memory_config) initial_state = dnc.get_initial_state(batch_size=batch_size) input_shape = dnc._W * dnc._R + input_size test_input = np.random.uniform( -3, 3, (batch_size, input_shape)).astype(np.float32) example_output, _ = dnc( tf.convert_to_tensor(test_input), initial_state, ) self.assertEqual(example_output.shape, (batch_size, output_size))
def test_read_vectors_and_weightings(self): m = Memory.state( memory_matrix=np.random.uniform(-1, 1, (5, 11, 7)).astype(np.float32), usage_vector=None, link_matrix=None, precedence_vector=None, write_weighting=None, read_weightings=DNCMemoryTests.softmax_sample((5, 11, 3), axis=1), ) # pull out read_modes due to https://github.com/tensorflow/tensorflow/issues/1409 # hack to circumvent tf bug in not doing `convert_to_tensor` in einsum reductions correctly read_modes = DNCMemoryTests.softmax_sample((5, 3, 3), axis=1) i = DNC.interface( read_keys=np.random.uniform(0, 1, (5, 7, 3)).astype(np.float32), read_strengths=np.random.uniform(0, 1, (5, 3)).astype(np.float32), write_key=None, write_strength=None, erase_vector=None, write_vector=None, free_gates=None, allocation_gate=None, write_gate=None, read_modes=tf.convert_to_tensor(read_modes), ) # read uses the link matrix that is produced after a write operation new_link_matrix = np.random.uniform(0, 1, (5, 11, 11)).astype(np.float32) # assume ContentAddressing and TemporalLinkAddressing are already correct lookup_weightings, forward_weighting, backward_weighting, \ updated_read_weightings, updated_read_vectors = self.get_addressing_weights( m, i, new_link_matrix) self.assertEqual(updated_read_weightings.shape, (5, 11, 3)) self.assertEqual(updated_read_vectors.shape, (5, 7, 3)) expected_read_weightings = np.zeros((5, 11, 3)).astype(np.float32) for read_head in range(3): backward_weight = read_modes[:, 0, read_head, np. newaxis] * backward_weighting[:, :, read_head] lookup_weight = read_modes[:, 1, read_head, np.newaxis] * \ lookup_weightings[:, :, read_head] forward_weight = read_modes[:, 2, read_head, np.newaxis] * \ forward_weighting[:, :, read_head] expected_read_weightings[:, :, read_head] = backward_weight + \ lookup_weight + forward_weight expected_read_vectors = np.matmul( np.transpose(m.memory_matrix, [0, 2, 1]), updated_read_weightings) self.assertAllClose(updated_read_weightings, expected_read_weightings) self.assertEqual(updated_read_weightings.shape, (5, 11, 3)) self.assertAllClose(updated_read_vectors, expected_read_vectors)
def test_call(self): graph = tf.Graph() with graph.as_default(): with tf.Session(graph=graph) as session: computer = DNC(DummyController, 10, 20, 10, 10, 64, 2, batch_size=3) rcomputer = DNC(DummyRecurrentController, 10, 20, 10, 10, 64, 2, batch_size=3) input_batches = np.random.uniform(0, 1, (3, 5, 10)).astype( np.float32) session.run(tf.initialize_all_variables()) out_view = session.run(computer.get_outputs(), feed_dict={ computer.input_data: input_batches, computer.sequence_length: 5 }) out, view = out_view rout_rview, ro, rs = session.run( [ rcomputer.get_outputs(), rcomputer.controller.get_state()[0], rcomputer.controller.get_state()[1] ], feed_dict={ rcomputer.input_data: input_batches, rcomputer.sequence_length: 5 }) rout, rview = rout_rview self.assertEqual(out.shape, (3, 5, 20)) self.assertEqual(view['free_gates'].shape, (3, 5, 2)) self.assertEqual(view['allocation_gates'].shape, (3, 5, 1)) self.assertEqual(view['write_gates'].shape, (3, 5, 1)) self.assertEqual(view['read_weightings'].shape, (3, 5, 10, 2)) self.assertEqual(view['write_weightings'].shape, (3, 5, 10)) self.assertEqual(rout.shape, (3, 5, 20)) self.assertEqual(rview['free_gates'].shape, (3, 5, 2)) self.assertEqual(rview['allocation_gates'].shape, (3, 5, 1)) self.assertEqual(rview['write_gates'].shape, (3, 5, 1)) self.assertEqual(rview['read_weightings'].shape, (3, 5, 10, 2)) self.assertEqual(rview['write_weightings'].shape, (3, 5, 10))
def test_constructor(self): controller_config = { "hidden_size": 64, } memory_config = { 'read_heads_num': 2, 'word_size': 5, 'words_num': None, } dnc = DNC(controller_config, memory_config, 10, classic_dnc_output=False) self.assertEqual(dnc._interface_vector_size, 38) self.assertEqual(dnc._controller_to_interface_weights.output_size, 38) self.assertEqual(dnc._controller.output_size, tf.TensorShape([64])) self.assertEqual(dnc._controller_to_output_weights.output_size, 10) self.assertEqual(dnc._memory_to_output_weights.output_size, 10)
graph = tf.Graph() with graph.as_default(): with tf.Session(graph=graph) as session: llprint("Building Computational Graph ... ") optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=momentum) summerizer = tf.train.SummaryWriter(tb_logs_dir, session.graph) ncomputer = DNC( FeedforwardController, input_size, output_size, 2 * sequence_max_length + 1, words_count, word_size, read_heads, batch_size ) # squash the DNC output between 0 and 1 output, _ = ncomputer.get_outputs() squashed_output = tf.clip_by_value(tf.sigmoid(output), 1e-6, 1. - 1e-6) loss = binary_cross_entropy(squashed_output, ncomputer.target_output) gradients = optimizer.compute_gradients(loss) for i, (grad, var) in enumerate(gradients): if grad is not None: #with tf.control_dependencies([tf.Print(tf.zeros(1), [var.name, tf.is_nan(grad)])]):
def main(): """ Train the DNC to take a word and list its instances of vowels in order of occurrence. :return: None. """ dirname = os.path.dirname(__file__) ckpts_dir = os.path.join(dirname, 'checkpoints') data_dir = os.path.join(dirname, 'data', 'encoded') tb_logs_dir = os.path.join(dirname, 'logs') llprint("Loading Data ... ") lexicon_dict = load(os.path.join(data_dir, 'lexicon-dict.pkl')) data = load(os.path.join(data_dir, 'train', 'train.pkl')) llprint("Done!\n") batch_size = 1 input_size = output_size = len(lexicon_dict) sequence_max_length = 100 dict_size = len(lexicon_dict) words_count = 256 word_size = 64 read_heads = 4 learning_rate = 1e-4 momentum = 0.9 from_checkpoint = None iterations = 100000 start_step = 0 options, _ = getopt.getopt(sys.argv[1:], '', ['checkpoint=', 'iterations=', 'start=']) for opt in options: if opt[0] == '--checkpoint': from_checkpoint = opt[1] elif opt[0] == '--iterations': iterations = int(opt[1]) elif opt[0] == '--start': start_step = int(opt[1]) graph = tf.Graph() with graph.as_default(): with tf.compat.v1.Session(graph=graph) as session: llprint("Building Computational Graph ... ") optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate, momentum=momentum) summarizer = tf.compat.v1.summary.FileWriter( tb_logs_dir, session.graph) ncomputer = DNC(RecurrentController, input_size, output_size, sequence_max_length, words_count, word_size, read_heads, batch_size) output, _ = ncomputer.get_outputs() loss_weights = tf.compat.v1.placeholder(tf.float32, [batch_size, None, 1]) loss = tf.reduce_mean( loss_weights * tf.nn.softmax_cross_entropy_with_logits( logits=output, labels=ncomputer.target_output)) summaries = [] gradients = optimizer.compute_gradients(loss) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_value(grad, -10, 10), var) for (grad, var) in gradients: if grad is not None: summaries.append( tf.compat.v1.summary.histogram(var.name + '/grad', grad)) apply_gradients = optimizer.apply_gradients(gradients) summaries.append(tf.compat.v1.summary.scalar("Loss", loss)) summarize_op = tf.compat.v1.summary.merge(summaries) no_summarize = tf.no_op() llprint("Done!\n") llprint("Initializing Variables ... ") session.run(tf.compat.v1.global_variables_initializer()) llprint("Done!\n") if from_checkpoint is not None: llprint("Restoring Checkpoint %s ... " % from_checkpoint) ncomputer.restore(session, ckpts_dir, from_checkpoint) llprint("Done!\n") last_100_losses = [] start = 0 if start_step == 0 else start_step + 1 end = start_step + iterations + 1 start_time_100 = time.time() avg_100_time = 0. avg_counter = 0 for i in range(start, end + 1): try: llprint("\rIteration %d/%d" % (i, end)) sample = np.random.choice(data, 1) input_data, target_output, seq_len, weights = prepare_sample( sample, lexicon_dict['#'], dict_size) summarize = (i % 100 == 0) take_checkpoint = (i != 0) and (i % end == 0) loss_value, _, summary = session.run( [ loss, apply_gradients, summarize_op if summarize else no_summarize ], feed_dict={ ncomputer.input_data: input_data, ncomputer.target_output: target_output, ncomputer.sequence_length: seq_len, loss_weights: weights }) last_100_losses.append(loss_value) if summarize: summarizer.add_summary(summary, i) llprint("\n\tAvg. Cross-Entropy: %.7f\n" % (np.mean(last_100_losses))) end_time_100 = time.time() elapsed_time = (end_time_100 - start_time_100) / 60 avg_counter += 1 avg_100_time += (1. / avg_counter) * (elapsed_time - avg_100_time) estimated_time = (avg_100_time * ((end - i) / 100.)) / 60. print("\tAvg. 100 iterations time: %.2f minutes" % avg_100_time) print("\tApprox. time to completion: %.2f hours" % estimated_time) start_time_100 = time.time() last_100_losses = [] if take_checkpoint: llprint("\nSaving Checkpoint ... "), ncomputer.save(session, ckpts_dir, 'step-%d' % i) llprint("Done!\n") except KeyboardInterrupt: llprint("\nSaving Checkpoint ... "), ncomputer.save(session, ckpts_dir, 'step-%d' % i) llprint("Done!\n") sys.exit(0)
mem_size = int(configs[9]) mem_slot = int(configs[10]) sequence_length = int(configs[11]) iterations = int(configs[12]) non_uniform_priority = configs[13].lower() == True mixin = configs[14].lower() == "true" copy_mode = False # Generate the model rnn = DNC(input_size=sequence_num_of_bits + 3, hidden_size=nhid, rnn_type=rnn_type, num_layers=1, num_hidden_layers=2, dropout=0, nr_cells=mem_slot, cell_size=mem_size, read_heads=1, gpu_id=-1, debug=True, batch_first=True, independent_linears=False, copy_mode=copy_mode) rnn.load_state_dict(torch.load(current_model)) # Execute the evaluation sigm = T.nn.Sigmoid() sequence_length -= 1 for i in tqdm(range(0, args.iterations)):
def main(): """ Tests the latest checkpoint of the DNC that was trained on the vowels task. In this task, the DNC is given an input that consist of a sequence of letters and asked to return any vowels contained in that sequence in order of their appearance in the sequence. For simplicity's sake, y is not considered a vowel. :return: None. """ ckpts_dir = './checkpoints/' lexicon_dictionary = load('./data/encoded/lexicon-dict.pkl') target_code = lexicon_dictionary["#"] test_files = [] for entry_name in os.listdir('./data/encoded/test/'): entry_path = os.path.join('./data/encoded/test/', entry_name) if os.path.isfile(entry_path): test_files.append(entry_path) graph = tf.Graph() with graph.as_default(): with tf.compat.v1.Session(graph=graph) as session: ncomputer = DNC( RecurrentController, input_size=len(lexicon_dictionary), output_size=len(lexicon_dictionary), max_sequence_length=100, memory_words_num=256, memory_word_size=64, memory_read_heads=4, ) ncomputer.restore(session, ckpts_dir, 'step-100001') outputs, _ = ncomputer.get_outputs() softmaxed = tf.nn.softmax(outputs) tasks_results = {} tasks_numbers = [] counter = 0 for test_file in test_files: test_data = load(test_file) task_regexp = r'([0-9]{1,4})test.txt.pkl' task_filename = os.path.basename(test_file) task_match_obj = re.match(task_regexp, task_filename) task_number = task_match_obj.group(1) tasks_numbers.append(task_number) results = [] for story in test_data: a_story = np.array(story['inputs']) # Bool vector indicating if the target code is the value at that index in a_story target_mask_1 = (a_story == target_code) target_mask = target_mask_1.copy() # Sets the first target code appearance to False so that it will remain in answer target_mask[np.where(target_mask_1 == True)[0][0]] = False desired_answers = np.array(story['outputs']) input_vec, seq_len = prepare_sample( [story], len(lexicon_dictionary)) softmax_output = session.run(softmaxed, feed_dict={ ncomputer.input_data: input_vec, ncomputer.sequence_length: seq_len }) softmax_output = np.squeeze(softmax_output, axis=0) given_answers = np.argmax(softmax_output[target_mask], axis=1) is_correct = True if len(given_answers) != len(desired_answers): is_correct = False else: for i in range(len(given_answers)): if given_answers[i] != desired_answers[i]: is_correct = False if not is_correct: print("\nGiven: ", given_answers) print("Expected: ", desired_answers) results.append(False) else: results.append(True) counter += 1 llprint("\rTests Completed ... %d/%d" % (counter, len(test_files))) error_rate = 1. - np.mean(results) tasks_results[task_number] = error_rate print("\n") print( "-------------------------------------------------------------------" ) all_tasks_results = [v for _, v in tasks_results.items()] results_mean = "%.2f%%" % (np.mean(all_tasks_results) * 100) failed_count = "%d" % (np.sum(np.array(all_tasks_results) > 0.05)) print("%-27s%-27s" % ("Percent Failed", results_mean)) print("%-27s%-27s" % ("Total Failed", failed_count))
def main(): """ Runs an interactive shell where the user can submit input with their chosen deliminator and see the output of the DNC's latest checkpoint. :return: None """ dir_path = os.path.dirname(os.path.realpath(__file__)) ckpts_dir = os.path.join(dir_path, 'checkpoints') lexicon_dictionary = load( os.path.join(dir_path, 'data', 'encoded', 'lexicon-dict.pkl')) target_code = lexicon_dictionary["#"] graph = tf.Graph() with graph.as_default(): with tf.compat.v1.Session(graph=graph) as session: ncomputer = DNC( RecurrentController, input_size=len(lexicon_dictionary), output_size=len(lexicon_dictionary), max_sequence_length=100, memory_words_num=256, memory_word_size=64, memory_read_heads=4, ) ncomputer.restore(session, ckpts_dir, 'step-100001') outputs, _ = ncomputer.get_outputs() softmaxed = tf.nn.softmax(outputs) print( "This is an interactive shell script. Here a user may test a trained neural network by passing it " "custom inputs and seeing if they elicid the desired output. \n Please note that a user may only " "test inputs that consists of words in the neural network's lexicon. If the user would like to quit" " the program, they can type ':q!' when prompted for an input. \n If the user would like to see the" " network's lexicon, they can type ':dict' when prompted for an input. Otherwise, the user may " "simply type the sequence of inputs that they would like to use and then hit the enter key. \n " "They will then be asked to specify the deliminator that distinguishes one word from another word." " The input will then be split using that deliminator. \n If all resulting inputs are in the " "network's lexicon, the network will then be fed these inputs and its output will be printed for " "the user along with its expected output.") my_input = input("Input:") while my_input != ":q!": if my_input == ":dict": print( "The neural network has been trained to recognize the following words:" ) print(lexicon_dictionary) my_input = input("Input:") continue deliminator = input("Deliminator:") story = my_input.split(deliminator) if not set(story).issubset(lexicon_dictionary): print("You may only test key in the lexicon dictionary.") my_input = input("Input:") continue desired_answers = get_solution(story) encoded_story = [] encoded_answers = [] for an_input in story: encoded_story.append(lexicon_dictionary[an_input]) for an_output in desired_answers: encoded_answers.append(lexicon_dictionary[an_output]) input_vec, _, seq_len, _ = prepare_sample( [encoded_story], encoded_answers, target_code, len(lexicon_dictionary)) softmax_output = session.run(softmaxed, feed_dict={ ncomputer.input_data: input_vec, ncomputer.sequence_length: seq_len }) softmax_output = np.squeeze(softmax_output, axis=0) given_answers = np.argmax( softmax_output[:len(desired_answers)], axis=1) print("Output: ", [ list(lexicon_dictionary.keys())[list( lexicon_dictionary.values()).index(an_answer)] for an_answer in given_answers ]) is_correct = True if len(given_answers) != len(encoded_answers): is_correct = False else: for i in range(len(given_answers)): if given_answers[i] != encoded_answers[i]: is_correct = False if is_correct: print("Correct!") else: print("Expected: ", desired_answers) my_input = input("Input:")
1, 0, path_len_mean=path_len_mean, path_len_std=path_len_std) ep = env.start_ep() num_subgoals = 3 #her_sample = False her_coeff = 1. ab = False rnn = DNC(input_size=bit_str_len * 2 + 1, hidden_size=len(env.ep.actions_list), rnn_type=args.rnn_type, num_layers=args.nlayer, num_hidden_layers=args.nhlayer, dropout=args.dropout, nr_cells=args.mem_slot, cell_size=args.mem_size, read_heads=args.read_heads, gpu_id=args.cuda, debug=args.visdom, batch_first=True, independent_linears=True) if args.cuda != -1: rnn = rnn.cuda(args.cuda) print(rnn) if args.optim == 'adam': optimizer = optim.Adam(rnn.parameters(), lr=args.lr,
import tensorflow as tf from dnc.dnc import DNC import numpy as np np.random.seed(1) g = tf.Graph() with g.as_default(): batch_size = 4 output_size = 20 input_size = 10 dnc = DNC(output_size, controller_units=128, memory_size=256, word_size=64, num_read_heads=4) initial_state = dnc.get_initial_state(batch_size=batch_size) example_input = np.random.uniform(0, 1, (batch_size, input_size)).astype( np.float32) output_op, _ = dnc( tf.convert_to_tensor(example_input), initial_state, ) init = tf.global_variables_initializer() with tf.Session(graph=g) as sess: init.run() example_output = sess.run(output_op) tf.summary.FileWriter("graphs", g).close()
cuda = args.cuda iterations = args.iterations summarize_freq = args.summarize_freq check_freq = args.check_freq visdom = args.visdom from_checkpoint = None if args.memory_type == 'dnc': rnn = DNC(input_size=args.input_size, hidden_size=args.nhid, rnn_type=args.rnn_type, num_layers=args.nlayer, num_hidden_layers=args.nhlayer, dropout=args.dropout, nr_cells=args.mem_slot, cell_size=args.mem_size, read_heads=args.read_heads, gpu_id=args.cuda, debug=args.visdom, batch_first=True, independent_linears=True) elif args.memory_type == 'sdnc': rnn = SDNC(input_size=args.input_size, hidden_size=args.nhid, rnn_type=args.rnn_type, num_layers=args.nlayer, num_hidden_layers=args.nhlayer, dropout=args.dropout, nr_cells=args.mem_slot, cell_size=args.mem_size,
start_step = int(opt[1]) graph = tf.Graph() with graph.as_default(): with tf.Session(graph=graph) as session: llprint("Building Computational Graph ... ") optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=momentum) summerizer = tf.summary.FileWriter(tb_logs_dir, session.graph) ncomputer = DNC( RecurrentController, input_size, output_size, sequence_max_length, words_count, word_size, read_heads, batch_size ) output, memory_views = ncomputer.get_outputs() loss_weights = tf.placeholder(tf.float32, [batch_size, None, 1]) loss = tf.reduce_mean( loss_weights * tf.nn.softmax_cross_entropy_with_logits_v2(logits=output, labels=ncomputer.target_output) ) summeries = [] gradients = optimizer.compute_gradients(loss)
test_files = [] for entryname in os.listdir('data/en/test/'): entry_path = os.path.join('data/en/test/', entryname) if os.path.isfile(entry_path): test_files.append(entry_path) graph = tf.Graph() with graph.as_default(): with tf.Session(graph=graph) as session: ncomputer = DNC( RecurrentController, input_size=len(lexicon_dictionary), output_size=len(lexicon_dictionary), max_sequence_length=100, memory_words_num=256, memory_word_size=64, memory_read_heads=4, ) ncomputer.restore(session, ckpts_dir, 'step-30001') outputs, _ = ncomputer.get_outputs() softmaxed = tf.nn.softmax(outputs) tasks_results = {} tasks_names = {} for test_file in test_files: test_data = load(test_file) task_regexp = r'qa([0-9]{1,2})_([a-z\-]*)_test.txt.pkl'
def test_read_vectors_and_weightings(self): m = Memory.state( memory_matrix=np.random.uniform(-1, 1, (5, 11, 7)).astype(np.float32), usage_vector=None, link_matrix=None, precedence_vector=None, write_weighting=None, read_weightings=DNCMemoryTests.softmax_sample((5, 11, 3), axis=1), ) i = DNC.interface( read_keys=np.random.uniform(0, 1, (5, 7, 3)).astype(np.float32), read_strengths=np.random.uniform(0, 1, (5, 3)).astype(np.float32), write_key=None, write_strength=None, erase_vector=None, write_vector=None, free_gates=None, allocation_gate=None, write_gate=None, read_modes=tf.convert_to_tensor( DNCMemoryTests.softmax_sample((5, 3, 3), axis=1)), ) # read uses the link matrix that is produced after a write operation new_link_matrix = np.random.uniform(0, 1, (5, 11, 11)).astype(np.float32) # assume ContentAddressing and TemporalLinkAddressing are already correct op_ca = ContentAddressing.weighting(m.memory_matrix, i.read_keys, i.read_strengths) op_f, op_b = TemporalLinkAddressing.weightings(new_link_matrix, m.read_weightings) read_op = Memory.read(m.memory_matrix, m.read_weightings, new_link_matrix, i) with self.test_session() as session: lookup_weightings = session.run(op_ca) forward_weighting, backward_weighting = session.run([op_f, op_b]) updated_read_weightings, updated_read_vectors = session.run( read_op) # hack to circumvent tf bug in not doing `convert_to_tensor` in einsum reductions correctly read_modes_numpy = tf.Session().run(i.read_modes) self.assertEqual(updated_read_weightings.shape, (5, 11, 3)) self.assertEqual(updated_read_vectors.shape, (5, 7, 3)) expected_read_weightings = np.zeros((5, 11, 3)).astype(np.float32) for read_head in range(3): backward_weight = read_modes_numpy[:, 0, read_head, np. newaxis] * backward_weighting[:, :, read_head] lookup_weight = read_modes_numpy[:, 1, read_head, np. newaxis] * lookup_weightings[:, :, read_head] forward_weight = read_modes_numpy[:, 2, read_head, np. newaxis] * forward_weighting[:, :, read_head] expected_read_weightings[:, :, read_head] = backward_weight + lookup_weight + forward_weight expected_read_vectors = np.matmul( np.transpose(m.memory_matrix, [0, 2, 1]), updated_read_weightings) self.assertAllClose(updated_read_weightings, expected_read_weightings) self.assertEqual(updated_read_weightings.shape, (5, 11, 3)) self.assertAllClose(updated_read_vectors, expected_read_vectors)
def generate_result_images(prediction, target, image_dir, experiment_name, epoch, args, model_path): x, y, priority = generate_data(1, args.sequence_max_length, args.input_size + 3, steps=args.steps, non_uniform=False) print(priority.detach().numpy()) print(np.argsort(-priority.detach().numpy(), axis=1)) rnn = DNC(input_size=args.input_size + 3, hidden_size=args.nhid, rnn_type=args.rnn_type, num_layers=args.nlayer, num_hidden_layers=args.nhlayer, dropout=args.dropout, nr_cells=args.mem_slot, cell_size=args.mem_size, read_heads=args.read_heads, gpu_id=args.cuda, debug=True, batch_first=True, independent_linears=args.independent_linears) rnn.load_state_dict(torch.load(model_path)) (chx, mhx, rv) = (None, None, None) output, (chx, mhx, rv), v = rnn(x, (None, mhx, None), reset_experience=True, pass_through_memory=True) # This is needed if we want to use make_eval_plot sigm = T.nn.Sigmoid() prediction = sigm( output[:, -args.sequence_max_length:, :-3]).detach().numpy()[0] target = y[:, :, :-3].detach().numpy()[0] fig = plt.figure(figsize=(5, 5)) ax1 = fig.add_subplot(221) ax2 = fig.add_subplot(222) ax3 = fig.add_subplot(212) ax1.set_title("Result") ax2.set_title("Target") ax3.set_title("Input") x = x.detach().numpy()[0] prediction = np.swapaxes(prediction, 0, 1) target = np.swapaxes(target, 0, 1) x = np.swapaxes(x, 0, 1) prediction_bin = [] for t in prediction: prediction_bin.append((t > 0.5)) prediction = T.from_numpy(np.array(prediction_bin)) sns.heatmap(prediction, ax=ax1, vmin=0, vmax=1, linewidths=.5, linecolor="black", cmap="Greys", cbar=True) sns.heatmap(target, ax=ax2, vmin=0, vmax=1, linewidths=.5, linecolor="black", cmap="Greys", cbar=True) sns.heatmap(x, ax=ax3, vmin=0, vmax=1, linewidths=.5, linecolor="black", cmap="Greys", cbar=True) plt.tight_layout() plt.savefig(image_dir + "/result_" + experiment_name + "_{}.png".format(epoch), dpi=250) #fig = plt.figure(figsize=(15,10)) fig = plt.figure() ax1_2 = fig.add_subplot(321) ax2_2 = fig.add_subplot(325) ax3_2 = fig.add_subplot(322) ax4_2 = fig.add_subplot(324) ax5_2 = fig.add_subplot(326) ax6_2 = fig.add_subplot(323) ax1_2.set_title("Read Weigths") ax2_2.set_title("Write Weights") ax3_2.set_title("Forward Mode") ax4_2.set_title("Content Mode") ax5_2.set_title("Backward Mode") ax6_2.set_title("Read Modes") ax6_2.set_yticklabels(["back", "forw", "cont"]) sns.heatmap(v['read_weights'].T, ax=ax1_2, linewidths=.01) sns.heatmap(v['write_weights'].T, ax=ax2_2, linewidths=.01) sns.heatmap(v['forward_mode'].T, ax=ax3_2, linewidths=.01) sns.heatmap(v['content_mode'].T, ax=ax4_2, linewidths=.01) sns.heatmap(v['backward_mode'].T, ax=ax5_2, linewidths=.01) sns.heatmap(v['read_modes'].T, ax=ax6_2, linewidths=.01) plt.tight_layout() plt.savefig(image_dir + "/weights_" + experiment_name + "_{}.png".format(epoch), dpi=250)
def main(): """ Train the DNC to take answer questions from the DREAM dataset. :return: None. """ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) dirname = os.path.dirname(__file__) ckpts_dir = os.path.join(dirname, 'checkpoints/') data_dir = os.path.join(dirname, 'data', 'encoded') tb_logs_dir = os.path.join(dirname, 'logs') llprint("Loading Data ... ") lexicon_dict = load(os.path.join(data_dir, 'lexicon-dict.pkl')) data_files = os.listdir(os.path.join(data_dir, 'train')) llprint("Done!\n") batch_size = 1 input_size = output_size = len(lexicon_dict) sequence_max_length = 100 word_space_size = len(lexicon_dict) words_count = 256 word_size = 64 read_heads = 4 learning_rate = 1e-4 momentum = 0.9 from_checkpoint = None iterations = 100000 start_step = 0 options, _ = getopt.getopt(sys.argv[1:], '', ['checkpoint=', 'iterations=', 'start=']) for opt in options: if opt[0] == '--checkpoint': from_checkpoint = opt[1] print("Checkpoint found") elif opt[0] == '--iterations': iterations = int(opt[1]) elif opt[0] == '--start': start_step = int(opt[1]) graph = tf.Graph() with graph.as_default(): with tf.compat.v1.Session(graph=graph) as session: llprint("Building Computational Graph ... ") optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate, momentum=momentum) summarizer = tf.compat.v1.summary.FileWriter( tb_logs_dir, session.graph) ncomputer = DNC(RecurrentController, input_size, output_size, sequence_max_length, words_count, word_size, read_heads, batch_size) output, _ = ncomputer.get_outputs() loss_weights = tf.compat.v1.placeholder(tf.float32, [batch_size, None, 1]) loss = tf.reduce_mean( loss_weights * tf.nn.softmax_cross_entropy_with_logits( logits=output, labels=ncomputer.target_output)) summaries = [] gradients = optimizer.compute_gradients(loss) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_value(grad, -10, 10), var) for (grad, var) in gradients: if grad is not None: summaries.append( tf.compat.v1.summary.histogram(var.name + '/grad', grad)) apply_gradients = optimizer.apply_gradients(gradients) summaries.append(tf.compat.v1.summary.scalar("Loss", loss)) summarize_op = tf.compat.v1.summary.merge(summaries) no_summarize = tf.no_op() llprint("Done!\n") llprint("Initializing Variables ... ") session.run(tf.compat.v1.global_variables_initializer()) llprint("Done!\n") if from_checkpoint is not None: llprint("Restoring Checkpoint %s ... " % from_checkpoint) ncomputer.restore(session, ckpts_dir, from_checkpoint) llprint("Done!\n") elif os.path.exists(ckpts_dir): checkpoints = os.listdir(ckpts_dir) if len(checkpoints) != 0 and any("step-" in s for s in checkpoints): checkpoint_numbers = [ int(checkpoint[checkpoint.find("-") + 1:]) for checkpoint in checkpoints if checkpoint[checkpoint.find("-") + 1:].isnumeric() ] checkpoint_numbers.sort() ncomputer.restore(session, ckpts_dir, f"step-{checkpoint_numbers[-1]}") start = checkpoint_numbers[-1] end = 100000 last_100_losses = [] if not 'start' in locals(): start = 0 end = 100000 if from_checkpoint is not None: start = int(from_checkpoint[from_checkpoint.find("-") + 1:]) start_time_100 = time.time() end_time_100 = None avg_100_time = 0. avg_counter = 0 for i in range(start, end + 1): try: llprint("\rIteration %d/%d" % (i, end)) sample = np.random.choice(data_files, 1) with open(os.path.join(data_dir, 'train', sample[0])) as f: sample = json.load(f) input_data, target_output, seq_len, weights = prepare_sample( sample, lexicon_dict['='], word_space_size, lexicon_dict) summarize = (i % 100 == 0) take_checkpoint = (i != 0) and (i % 200 == 0) #For debugging outputs, _ = ncomputer.get_outputs() softmaxed = tf.nn.softmax(outputs) loss_value, _, summary, softmax_output = session.run( [ loss, apply_gradients, summarize_op if summarize else no_summarize, softmaxed ], feed_dict={ ncomputer.input_data: input_data, ncomputer.target_output: target_output, ncomputer.sequence_length: seq_len, loss_weights: weights }) softmax_output = np.squeeze(softmax_output, axis=0) given_answers = np.argmax(softmax_output, axis=1) words = [] for an_array in target_output[0]: for word in np.where(an_array == 1): words.extend([ list(lexicon_dict.keys())[np.where( an_array == 1)[0][0]] ]) last_100_losses.append(loss_value) if summarize: print("\n\tLoss value: ", loss_value) print("\tTarget output: ", words) print("\tOutput: ", [ list(lexicon_dict.keys())[num] for num in given_answers ]) summarizer.add_summary(summary, i) llprint("\tAvg. Cross-Entropy: %.7f\n" % (np.mean(last_100_losses))) end_time_100 = time.time() elapsed_time = (end_time_100 - start_time_100) / 60 avg_counter += 1 avg_100_time += (1. / avg_counter) * (elapsed_time - avg_100_time) estimated_time = (avg_100_time * ((end - i) / 100.)) / 60. print("\tAvg. 100 iterations time: %.2f minutes" % avg_100_time) print("\tApprox. time to completion: %.2f hours\n" % estimated_time) start_time_100 = time.time() last_100_losses = [] if take_checkpoint: llprint("\nSaving Checkpoint ... line 237 "), ncomputer.save(session, ckpts_dir, 'step-%d' % i) llprint("Done!\n") except KeyboardInterrupt: llprint("\nSaving Checkpoint ... "), ncomputer.save(session, ckpts_dir, 'step-%d' % i) llprint("Done!\n") sys.exit(0)
batch_size = args.batch_size summarize_freq = args.summarize_freq check_freq = args.check_freq mem_slot = args.mem_slot mem_size = args.mem_size read_heads = args.read_heads rnn = DNC(input_size=args.bits + 2, hidden_size=args.nhid, rnn_type=args.rnn_type, num_layers=args.nlayer, num_hidden_layers=args.nhlayer, dropout=args.dropout, nr_cells=mem_slot, cell_size=mem_size, read_heads=read_heads, gpu_id=args.cuda, debug=args.debug, batch_first=True, independent_linears=True) print(rnn) if args.cuda != -1: rnn = rnn.cuda(args.cuda) last_save_losses = [] optimizer = optim.Adam(rnn.parameters(), lr=args.lr, eps=1e-9,
mem_size = args.mem_size read_heads = args.read_heads independent_linears=False if args.independent_linears: independent_linears=args.independent_linears if args.memory_type == 'dnc': rnn = DNC( input_size=args.input_size+3, hidden_size=args.nhid, rnn_type=args.rnn_type, num_layers=args.nlayer, num_hidden_layers=args.nhlayer, dropout=args.dropout, nr_cells=mem_slot, cell_size=mem_size, read_heads=read_heads, gpu_id=args.cuda, debug=True, batch_first=True, independent_linears=independent_linears, copy_mode=args.copy_operation ) elif args.memory_type == 'sdnc': rnn = SDNC( input_size=args.input_size+3, hidden_size=args.nhid, rnn_type=args.rnn_type, num_layers=args.nlayer, num_hidden_layers=args.nhlayer, dropout=args.dropout,