def eval(model_path): vector_length = 8 memory_size = (128, 20) hidden_layer_size = 100 lstm_controller = not args.ff model = NTM(vector_length, hidden_layer_size, memory_size, lstm_controller) print(f"Loading model from {model_path}") checkpoint = torch.load(model_path, map_location=torch.device('cpu')) model.load_state_dict(checkpoint) model.eval() lengths = [20, 100] for l in lengths: sequence_length = l input, target = get_training_sequence(sequence_length, sequence_length, vector_length) state = model.get_initial_state() for vector in input: _, state = model(vector, state) y_out = torch.zeros(target.size()) for j in range(len(target)): y_out[j], state = model(torch.zeros(1, vector_length + 1), state) y_out_binarized = y_out.clone().data y_out_binarized.apply_(lambda x: 0 if x < 0.5 else 1) plot_copy_results(target, y_out, vector_length)
# if we use lr schedule, we need to keep track of errors over time if args.lr_rate is not None: print "Using lr schedule rate of:", args.lr_rate errors = {} error_sum = 0 # deserialize saved model if path given if args.model is None: # If not using a saved model, initialize from params vec_size = args.vec_size seq = SequenceGen(args.task, vec_size, args.hi, args.lo) hidden_size = args.units # Size of hidden layer of neurons N = args.N # number of memory locations M = args.M # size of a memory location heads = args.heads model = NTM(seq.in_size, seq.out_size, hidden_size, N, M, vec_size, heads) else: # otherwise, load the model from specified file print "Using saved model:", args.model model = deserialize(args.model) vec_size = model.vec_size # vec size comes from model seq = SequenceGen(args.task, vec_size, args.hi, args.lo) # An object that keeps the optimizer state during training optimizer = RMSProp(model.W) n = 0 # counts the number of sequences trained on bpc = None # keeps track of trailing bpc (cost) while n < 100:
help="The number of out bits") # Tensorflow checkpoints and tensorboard parser.add_argument('--checkpoint_dir', action="store", dest="checkpoint_dir", default='./tf_ntm_ckpt/', help="The location to save the checkpoint") parser.add_argument('--max_to_keep', action="store", dest="max_to_keep", default=3, type=int, help="Maximum number of checkpoint to keep") parser.add_argument('--report_interval', action="store", dest="report_interval", default=10, type=int, help="The report interval for the train information") parser.add_argument('--train_log_dir', action="store", dest="train_log_dir", default='./tf_ntm_logs/gradient_tape/', help="The location to save the training logs") arg = parser.parse_args() # Training ntm_model = NTM(arg.controller_size, arg.memory_locations, arg.memory_vector_size, arg.maximum_shifts, arg.out_bits, arg.learn_r_bias, arg.learn_w_bias, arg.learn_m_bias) optimizer = tf.optimizers.RMSprop(learning_rate=arg.learning_rate, momentum=arg.momentum) bce_loss = tf.losses.BinaryCrossentropy() # Training metrics train_loss = tf.metrics.Mean(name="train_loss") train_cost = tf.metrics.Mean(name="train_cost") # Tensorboard # tensorboard --logdir tf_ntm_logs/gradient_tape current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_log_dir = arg.train_log_dir + current_time + '/train' # Checkpoints ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer=optimizer, net=ntm_model)
def train(epochs=50_000): tensorboard_log_folder = f"runs/copy-task-{datetime.now().strftime('%Y-%m-%dT%H%M%S')}" writer = SummaryWriter(tensorboard_log_folder) print(f"Training for {epochs} epochs, logging in {tensorboard_log_folder}") sequence_min_length = 1 sequence_max_length = 20 vector_length = 8 memory_size = (128, 20) hidden_layer_size = 100 batch_size = 4 lstm_controller = not args.ff writer.add_scalar("sequence_min_length", sequence_min_length) writer.add_scalar("sequence_max_length", sequence_max_length) writer.add_scalar("vector_length", vector_length) writer.add_scalar("memory_size0", memory_size[0]) writer.add_scalar("memory_size1", memory_size[1]) writer.add_scalar("hidden_layer_size", hidden_layer_size) writer.add_scalar("lstm_controller", lstm_controller) writer.add_scalar("seed", seed) writer.add_scalar("batch_size", batch_size) model = NTM(vector_length, hidden_layer_size, memory_size, lstm_controller) optimizer = optim.RMSprop(model.parameters(), momentum=0.9, alpha=0.95, lr=1e-4) feedback_frequency = 100 total_loss = [] total_cost = [] os.makedirs("models", exist_ok=True) if os.path.isfile(model_path): print(f"Loading model from {model_path}") checkpoint = torch.load(model_path, map_location=torch.device('cpu')) model.load_state_dict(checkpoint) for epoch in range(epochs + 1): optimizer.zero_grad() input, target = get_training_sequence(sequence_min_length, sequence_max_length, vector_length, batch_size) state = model.get_initial_state(batch_size) for vector in input: _, state = model(vector, state) y_out = torch.zeros(target.size()) for j in range(len(target)): y_out[j], state = model(torch.zeros(batch_size, vector_length + 1), state) loss = F.binary_cross_entropy(y_out, target) loss.backward() optimizer.step() total_loss.append(loss.item()) y_out_binarized = y_out.clone().data y_out_binarized.apply_(lambda x: 0 if x < 0.5 else 1) cost = torch.sum(torch.abs(y_out_binarized - target)) / len(target) total_cost.append(cost.item()) if epoch % feedback_frequency == 0: running_loss = sum(total_loss) / len(total_loss) running_cost = sum(total_cost) / len(total_cost) print(f"Loss at step {epoch}: {running_loss}") writer.add_scalar('training loss', running_loss, epoch) writer.add_scalar('training cost', running_cost, epoch) total_loss = [] total_cost = [] torch.save(model.state_dict(), model_path)