def predict_train(config, sess): """Train an NTM for the copy task given a TensorFlow session, which is a connection to the C++ backend""" if not os.path.isdir(config.checkpoint_dir): raise Exception(" [!] Directory %s not found" % config.checkpoint_dir) # delimiter flag-like vector inputs indicating the start and end # you can see these in the figure examples in the README # this is kind of defined redundantly start_symbol = np.zeros([config.input_dim], dtype=np.float32) start_symbol[0] = 1 end_symbol = np.zeros([config.input_dim], dtype=np.float32) end_symbol[1] = 1 # initialise the neural turing machine and the neural-net controller thing cell = NTMCell(input_dim=config.input_dim, output_dim=config.output_dim, controller_layer_size=config.controller_layer_size, write_head_size=config.write_head_size, read_head_size=config.read_head_size) ntm = NTM(cell, sess, config.min_length, config.max_length*3) print(" [*] Initialize all variables") tf.initialize_all_variables().run() print(" [*] Initialization finished") start_time = time.time() for idx in xrange(config.epoch): # generate a sequence of random length seq_length = randint(config.min_length, config.max_length) * 4 inc_seq, comp_seq = generate_predict_sequence(seq_length, config.input_dim - 2) # this somehow associates the desired inputs and outputs with the NTM feed_dict = {input_:vec for vec, input_ in zip(inc_seq, ntm.inputs)} feed_dict.update( {true_output:vec for vec, true_output in zip(comp_seq, ntm.true_outputs)} ) feed_dict.update({ ntm.start_symbol: start_symbol, ntm.end_symbol: end_symbol }) # this runs the session and returns the current training loss and step # I'm kind of surprised it returns the step, but whatevs _, cost, step = sess.run([ntm.optims[seq_length], ntm.get_loss(seq_length), ntm.global_step], feed_dict=feed_dict) # how does one use these checkpoints? if idx % 100 == 0: ntm.save(config.checkpoint_dir, 'copy', step) if idx % print_interval == 0: print("[%5d] %2d: %.2f (%.1fs)" \ % (idx, seq_length, cost, time.time() - start_time)) print("Training Copy task finished") return cell, ntm
def copy_train(config): sess = config.sess if not os.path.isdir(config.checkpoint_dir): raise Exception(" [!] Directory %s not found" % config.checkpoint_dir) # delimiter flag for start and end start_symbol = np.zeros([config.input_dim], dtype=np.float32) start_symbol[0] = 1 end_symbol = np.zeros([config.input_dim], dtype=np.float32) end_symbol[1] = 1 cell = NTMCell(input_dim=config.input_dim, output_dim=config.output_dim, controller_layer_size=config.controller_layer_size, write_head_size=config.write_head_size, read_head_size=config.read_head_size) ntm = NTM(cell, sess, config.min_length, config.max_length) print(" [*] Initialize all variables") tf.initialize_all_variables().run() print(" [*] Initialization finished") start_time = time.time() for idx in xrange(config.epoch): seq_length = randint(config.min_length, config.max_length) seq = generate_copy_sequence(seq_length, config.input_dim - 2) feed_dict = {input_: vec for vec, input_ in zip(seq, ntm.inputs)} feed_dict.update({ true_output: vec for vec, true_output in zip(seq, ntm.true_outputs) }) feed_dict.update({ ntm.start_symbol: start_symbol, ntm.end_symbol: end_symbol }) _, cost, step = sess.run([ ntm.optims[seq_length], ntm.get_loss(seq_length), ntm.global_step ], feed_dict=feed_dict) if idx % 100 == 0: ntm.save(config.checkpoint_dir, 'copy', step) if idx % print_interval == 0: print("[%5d] %2d: %.2f (%.1fs)" \ % (idx, seq_length, cost, time.time() - start_time)) print("Training Copy task finished") return cell, ntm
def copy_train(config): sess = config.sess if not os.path.isdir(config.checkpoint_dir): raise Exception(" [!] Directory %s not found" % config.checkpoint_dir) # delimiter flag for start and end start_symbol = np.zeros([config.input_dim], dtype=np.float32) start_symbol[0] = 1 end_symbol = np.zeros([config.input_dim], dtype=np.float32) end_symbol[1] = 1 cell = NTMCell(input_dim=config.input_dim, output_dim=config.output_dim, controller_layer_size=config.controller_layer_size, write_head_size=config.write_head_size, read_head_size=config.read_head_size) ntm = NTM(cell, sess, config.min_length, config.max_length) print(" [*] Initialize all variables") tf.initialize_all_variables().run() print(" [*] Initialization finished") start_time = time.time() for idx in xrange(config.epoch): seq_length = randint(config.min_length, config.max_length) seq = generate_copy_sequence(seq_length, config.input_dim - 2) feed_dict = {input_:vec for vec, input_ in zip(seq, ntm.inputs)} feed_dict.update( {true_output:vec for vec, true_output in zip(seq, ntm.true_outputs)} ) feed_dict.update({ ntm.start_symbol: start_symbol, ntm.end_symbol: end_symbol }) _, cost, step = sess.run([ntm.optims[seq_length], ntm.get_loss(seq_length), ntm.global_step], feed_dict=feed_dict) if idx % 100 == 0: ntm.save(config.checkpoint_dir, 'copy', step) if idx % print_interval == 0: print("[%5d] %2d: %.2f (%.1fs)" \ % (idx, seq_length, cost, time.time() - start_time)) print("Training Copy task finished") return cell, ntm
turing_machine.sequence_length: 2 * random_length + 2 }) # TODO: This works for batch size = 1 seq_out = np.round(np.reshape(temp_output, (1, -1))).tolist()[0] seq_target = np.reshape(target_output, (1, -1)).tolist()[0] dist = hamming_distance(seq_out, seq_target) val = tf.compat.v1.Summary.Value(tag="Hamming_%", simple_value=dist) summary2 = tf.compat.v1.Summary(value=[val]) summarizer.add_summary(summary, i) summarizer.add_summary(summary2, i) cycle_time = time.perf_counter() - last_time print(f"Iteration: {i}/{iterations}") print("Avg. Logistic Loss: %.4f" % (np.mean(last_100_losses) / batch_size)) print( "Time needed: %.4f s, sample/sec: %d" % (cycle_time, log_frequency * batch_size / cycle_time)) last_100_losses = [] last_time = time.perf_counter() if take_checkpoint: llprint("\nSaving Checkpoint ... "), turing_machine.save(session, ckpts_dir, 'step-%d' % (i)) llprint("Done!\n")
print(" [*] Initialization finished") else: ntm.load(config['checkpoint_dir'], 'copy') start_time = time.time() print('') for idx in range(config['epoch']): seq_length = np.random.randint(2, config['length'] + 1) X, Y, masks = build_seq_batch(seq_length, config['length'], config['input_dim'] - 2) feed_dict = {ntm.inputs: X, ntm.true_outputs: Y, ntm.masks: masks} if idx % print_interval != 0: _, cost, step = sess.run( [ntm.optims, ntm.losses, ntm.global_step], feed_dict=feed_dict) else: _, cost, step, Y_pre = sess.run( [ntm.optims, ntm.losses, ntm.global_step, ntm.outputs], feed_dict=feed_dict) print("[%5d] %2d: %.4f (%.1fs)" \ % (idx, seq_length, cost, time.time() - start_time)) Y_pre = np.array(Y_pre) mask_id = masks.reshape(-1).astype(bool) print(np.argmax(Y, axis=1)[mask_id]) print(np.argmax(Y_pre, axis=1)[mask_id]) print("Training Copy task finished") ntm.save(config['checkpoint_dir'], 'n_copy', idx)