def main(argv=None): print("Running on {}".format(device)) parser = argparse.ArgumentParser( description="Train a transformer for a copy task" ) add_optimizer_arguments(parser) add_transformer_arguments(parser) add_auxiliary_arguments(parser) args = parser.parse_args(argv) print("args:\n-----\n", args) data_points = [] data_points_acc = [] n_of_each_model = 10 n_trials = 8 for model_type in [ 'transformer','lstm','rnn',]: #add back transformers and rnn for max_trained_depth in range(1, 12): for ii in range(n_of_each_model): print(f'dep{max_trained_depth}_ii_{ii}') if model_type == "transformer": d_model = 16 model = SequencePredictorRecurrentTransformer( d_model=d_model, n_classes=5, sequence_length=args.sequence_length, attention_type=args.attention_type, n_layers=args.n_layers, n_heads=args.n_heads, d_query=d_model, # used to be d_query dropout=args.dropout, softmax_temp=None, attention_dropout=args.attention_dropout, ) else: d_model = 8 model = SequencePredictorRNN( d_model=d_model, n_classes=5, n_layers=args.n_layers, dropout=args.dropout, rnn_type=model_type ) print(f"Created model:\n{model}") model.to(device) model.load_state_dict(torch.load(f"models_from_colab/agreement_models/model_{model_type}_depth_{max_trained_depth}_num_{ii}.zip", map_location=device)['model_state']) for test_depth in range(1, 21): # was 1, 32 stack_size = test_depth # Change this value to test longer / shorter sequences n_correct = 0 for i_trial in range(n_trials): x, y, m = SubjectVerbAgreement.get_seq(stack_size) model.eval() yhat = model(x.unsqueeze(1)) loss, acc = loss_fn(y.unsqueeze(1), yhat, m.unsqueeze(1)) n_correct += acc data_points.append({'model_type': model_type, 'max_trained_depth': max_trained_depth, 'test_depth': test_depth, 'accuracy': n_correct / n_trials}) print("data points") print(data_points) with open("data_points_pr_acc_r.txt", "wb") as fp: pickle.dump(data_points, fp) """
def main(argv=None): print("Running on {}".format(device)) parser = argparse.ArgumentParser( description="Train a transformer for a copy task" ) add_optimizer_arguments(parser) add_transformer_arguments(parser) add_auxiliary_arguments(parser) args = parser.parse_args(argv) print("args:\n-----\n", args) if args.model_type == "transformer": model = SequencePredictorRecurrentTransformer( d_model=args.d_model, n_classes=5, sequence_length=args.sequence_length, attention_type=args.attention_type, n_layers=args.n_layers, n_heads=args.n_heads, d_query=args.d_model, # used to be d_query dropout=args.dropout, softmax_temp=None, attention_dropout=args.attention_dropout, ) else: model = SequencePredictorRNN( d_model=args.d_model, n_classes=5, n_layers=args.n_layers, dropout=args.dropout, rnn_type=args.model_type ) print(f"Created model:\n{model}") model.to(device) print("Number of epochs model was trained on: ",torch.load(args.continue_from, map_location=device)['epoch']) model.load_state_dict(torch.load(args.continue_from, map_location=device)['model_state']) def format_preds(x, y, preds, mask): n = len(x) n_dig = math.floor(math.log10(n)) + 1 nums = [] for p_dig in range(n_dig): nums.append( "# |" + "".join([str((i//10**p_dig)%10) for i in range(n)]) + "\n") nums = "".join(nums[::-1]) xs = "x |" + "".join([str(int(v)) for v in x]) + "\n" ys = "y |" + "".join([elt if mask[i] == 1 else '?' for i, elt in enumerate([str(int(v)) for v in y])]) + "\n" yh = "yh|" + "".join([elt if mask[i] == 1 else '?' for i, elt in enumerate([str(int(v)) for v in preds])]) + "\n" return nums + xs + ys + yh acc_list = [] max_acc = None for stack_size in range(1, 64): x, y, m = SubjectVerbAgreement.get_seq(stack_size) # print(x.shape, y.shape, m.shape) model.eval() yhat = model(x.unsqueeze(1)) hdn = model.hidden_state # batch x seq x hdn loss, acc = loss_fn(y.unsqueeze(1), yhat, m.unsqueeze(1)) acc_list.append((stack_size, acc)) if acc == 1: max_acc = stack_size print("Highest perfect score at depth:", max_acc) plot_hidden_state_2d(np.array(acc_list), pca=False) stack_size = 7 # Change this value to test longer / shorter sequences x, y, m = SubjectVerbAgreement.get_seq(stack_size) model.eval() yhat = model(x.unsqueeze(1)) hdn = model.hidden_state # batch x seq x hdn loss, acc = loss_fn(y.unsqueeze(1), yhat, m.unsqueeze(1)) print("Model loss: ", loss) print("Model accuracy: ", acc) print(format_preds(x, y, torch.argmax(yhat, dim=2)[0], m)) plot_hidden_state_2d(hdn[0].detach().cpu().numpy(), pca=True) """
def main(argv=None): # Choose a device and move everything there print("Running on {}".format(device)) parser = argparse.ArgumentParser( description="Train a transformer for an agreement task" ) add_optimizer_arguments(parser) add_transformer_arguments(parser) add_auxiliary_arguments(parser) args = parser.parse_args(argv) print("args:\n-----\n", args) # Make the dataset and the model for model_type in ['rnn', 'lstm', 'transformer']: for max_depth in range(1, 12): ii = 0 while ii < 10: print(max_depth, ii) # skip existing models if os.path.isfile("/content/drive/My Drive/final_project_material/agreement_models/model_" + model_type + "_depth_" + str(max_depth) + "_num_" + str(ii)): ii += 1 continue train_set = SubjectVerbAgreement(2*max_depth+1, max_depth=max_depth) test_set = SubjectVerbAgreement(2*max_depth+1, max_depth=max_depth) train_loader = DataLoader( train_set, batch_size=batch_size, pin_memory=device=="cuda" ) test_loader = DataLoader( test_set, batch_size=batch_size, pin_memory=device=="cuda" ) if model_type == "transformer": d_model = 16 model = SequencePredictorRecurrentTransformer( d_model=d_model, n_classes=5, sequence_length=args.sequence_length, attention_type=args.attention_type, n_layers=args.n_layers, n_heads=args.n_heads, d_query=d_model, # used to be d_query dropout=args.dropout, softmax_temp=None, attention_dropout=args.attention_dropout, ) else: d_model=8 model = SequencePredictorRNN( d_model=d_model, n_classes=5, n_layers=args.n_layers, dropout=args.dropout, rnn_type=model_type ) print(f"Created model:\n{model}") model.to(device) # Start training optimizer = get_optimizer(model.parameters(), args) start_epoch = 1 if args.continue_from: start_epoch = load_model( args.continue_from, model, optimizer, device ) lr_schedule = torch.optim.lr_scheduler.LambdaLR( optimizer, lambda e: 1. if e < args.reduce_lr_at else 0.1 ) for e in range(start_epoch, args.epochs+1): print('Epoch:', e) print('Training...') train(model, optimizer, train_loader, device) print('Evaluating...') acc = evaluate(model, test_loader, device, return_accuracy=True) lr_schedule.step() if e == 100: break if acc >= 0.95: save_model("/content/drive/My Drive/final_project_material/agreement_models/model_" + model_type + "_depth_" + str(max_depth) + "_num_" + str(ii), model, optimizer, e) ii += 1 break
def main(argv=None): print("Running on {}".format(device)) parser = argparse.ArgumentParser( description="Train a transformer for a copy task") add_optimizer_arguments(parser) add_transformer_arguments(parser) add_auxiliary_arguments(parser) args = parser.parse_args(argv) print("args:\n-----\n", args) data_points = [] for model_type in ['rnn', 'lstm', 'transformer']: for max_trained_depth in range(1, 12): for test_depth in range(1, 21): for ii in range(10): if model_type == "transformer": model = SequencePredictorRecurrentTransformer( d_model=16, n_classes=5, sequence_length=args.sequence_length, attention_type=args.attention_type, n_layers=args.n_layers, n_heads=args.n_heads, d_query=8, # used to be d_query dropout=args.dropout, softmax_temp=None, attention_dropout=args.attention_dropout, ) else: model = SequencePredictorRNN( d_model=8 if model_type == 'lstm' else 8, n_classes=5, n_layers=args.n_layers, dropout=args.dropout, rnn_type=model_type) print(f"Created model:\n{model}") model.to(device) model_name = "models_from_colab/agreement_models/model_" + model_type + "_depth_" + str( max_trained_depth) + "_num_" + str(ii) + ".zip" model.load_state_dict( torch.load(model_name, map_location=device)['model_state']) stack_size = test_depth x, y, m = SubjectVerbAgreement.get_seq(stack_size) model.eval() yhat = model(x.unsqueeze(1)) hdn = model.hidden_state # batch x seq x hdn loss, acc = loss_fn(y.unsqueeze(1), yhat, m.unsqueeze(1)) data_points.append({ 'model_type': model_type, 'max_trained_depth': max_trained_depth, 'test_depth': test_depth, 'accuracy': acc }) print("data points:") print(data_points) with open("data_points_sva.txt", "wb") as fp: pickle.dump(data_points, fp) """
def main(argv=None): # Choose a device and move everything there print("Running on {}".format(device)) parser = argparse.ArgumentParser( description="Train a transformer for a copy task") add_optimizer_arguments(parser) add_transformer_arguments(parser) add_auxiliary_arguments(parser) args = parser.parse_args(argv) print("args:\n-----\n", args) # Make the dataset and the model max_depth = 12 train_set = CountTaskWithEOS(max_depth * 2 + 1, max_depth=max_depth) test_set = CountTaskWithEOS(max_depth * 2 + 1, max_depth=max_depth) train_loader = DataLoader(train_set, batch_size=args.batch_size, pin_memory=device == "cuda") test_loader = DataLoader(test_set, batch_size=args.batch_size, pin_memory=device == "cuda") if args.model_type == "transformer": model = SequencePredictorRecurrentTransformer( d_model=args.d_model, n_classes=args.n_classes, sequence_length=args.sequence_length, attention_type=args.attention_type, n_layers=args.n_layers, n_heads=args.n_heads, d_query=args.d_model, # used to be d_query dropout=args.dropout, softmax_temp=None, attention_dropout=args.attention_dropout, ) else: model = SequencePredictorRNN(d_model=args.d_model, n_classes=args.n_classes, n_layers=args.n_layers, dropout=args.dropout, rnn_type=args.model_type) print(f"Created model:\n{model}") model.to(device) # Start training optimizer = get_optimizer(model.parameters(), args) start_epoch = 1 if args.continue_from: start_epoch = load_model(args.continue_from, model, optimizer, device) lr_schedule = torch.optim.lr_scheduler.LambdaLR( optimizer, lambda e: 1. if e < args.reduce_lr_at else 0.1) for e in range(start_epoch, args.epochs + 1): train(model, optimizer, train_loader, device) print('Epoch:', e) evaluate(model, test_loader, device) if (e % args.save_frequency) == 0 and args.save_to: save_model(args.save_to, model, optimizer, e) lr_schedule.step() if args.plot_hidden: x, y, m = test_set.__next__() # x is 1d of length sequence_len x.to(device) y.to(device) m.to(device) model.eval() yhat = model(x.unsqueeze(1).to(device)) hdn = model.hidden_state # batch x seq x hdn max_len = 10 print("Plotting on: ", x[:max_len]) plot_hidden_state_2d(hdn[0, :max_len, :].detach().cpu().numpy(), pca=True)