def main(grid_args=None): args = set_arguments(grid_args) vocab, full_dataset = create_full_dataset(args) if args.test: test_dir = 'test' test_dataset = SSTDataset(test_dir, vocab, args.num_classes) max_dev_epoch, max_dev_acc, max_model_filename = train( full_dataset, test_dataset, vocab, args) else: train_dataset = SSTDataset(num_classes=args.num_classes) dev_dataset = SSTDataset(num_classes=args.num_classes) train_dataset, dev_dataset = split_dataset_simple(full_dataset, train_dataset, dev_dataset, split=args.split) max_dev_epoch, max_dev_acc, max_model_filename = train( train_dataset, dev_dataset, vocab, args) with open(args.name + '_results', 'a') as result_file: result_file.write( str(args) + '\nEpoch {epoch}, accuracy {acc:.4f}\n'.format( epoch=max_dev_epoch, acc=max_dev_acc)) return max_dev_epoch, max_dev_acc, max_model_filename
def test_train() -> None: gcn = GraphConvolutionalNetwork(in_features=NUM_FEATURES, gc_hidden_sizes=[512, 256, 128, 64], fc_hidden_sizes=[32, 16, 8, 4], softmax_outputs=True) data = generate_random_data(num_vertices=NUM_VERTICES, num_features=NUM_FEATURES, num_classes=NUM_CLASSES, num_examples=NUM_TRAINING_EXAMPLES) train(model=gcn, data=data, num_epochs=10)
for fc_hs in args.fc_hidden: for lr in args.lrs: for epochs in args.epochs: gcn_model = GraphConvolutionalNetwork( in_features=in_features, gc_hidden_sizes=gc_hs, fc_hidden_sizes=fc_hs, add_residual_connection=False) model_desc = "_gc_" + str(gc_hs) + "_fc_" + str( fc_hs) + "_lr_" + str(lr) + "_epochs_" + str(epochs) train(model=gcn_model, train_data=train_data, validation_data=valid_data, num_epochs=epochs, learning_rate=lr, metrics_to_log=metrics, model_path=args.model_dir + args.model_prefix + ".pt") valid_metrics_result = calculate_metrics(model=gcn_model, data=valid_data) test_metrics_result = calculate_metrics(model=gcn_model, data=test_data) result = { "gc_hidden_layers": str(gc_hs), "fc_hidden_layers": str(fc_hs), "learning_rate": lr }
def optimize(trial, args): setattr(args, 'hidden_dim', int(trial.suggest_categorical('d_model', [128, 256, 512]))) setattr(args, 'depth', int(trial.suggest_discrete_uniform('n_enc', 2, 6, 1))) setattr(args, 'n_layers', int(trial.suggest_discrete_uniform('n_enc', 1, 3, 1))) setattr(args, 'lr', trial.suggest_loguniform('lr', 1e-5, 1e-2)) setattr(args, 'batch_size', int(trial.suggest_categorical('batch_size', [16, 32, 64, 128]))) setattr(args, 'log_dir', os.path.join(args.hyperopt_dir, str(trial._trial_id))) torch.manual_seed(0) train_logger = create_logger('train', args.log_dir) train_logger.info('Arguments are...') for arg in vars(args): train_logger.info(f'{arg}: {getattr(args, arg)}') # construct loader and set device train_loader, val_loader = construct_loader(args) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # build model model_parameters = { 'node_dim': train_loader.dataset.num_node_features, 'edge_dim': train_loader.dataset.num_edge_features, 'hidden_dim': args.hidden_dim, 'depth': args.depth, 'n_layers': args.n_layers } model = G2C(**model_parameters).to(device) # multi gpu training if torch.cuda.device_count() > 1: train_logger.info( f'Using {torch.cuda.device_count()} GPUs for training...') model = torch.nn.DataParallel(model) # get optimizer and scheduler optimizer, scheduler = get_optimizer_and_scheduler( args, model, len(train_loader.dataset)) loss = torch.nn.MSELoss(reduction='sum') # record parameters train_logger.info( f'\nModel parameters are:\n{dict_to_str(model_parameters)}\n') save_yaml_file(os.path.join(args.log_dir, 'model_paramaters.yml'), model_parameters) train_logger.info(f'Optimizer parameters are:\n{optimizer}\n') train_logger.info(f'Scheduler state dict is:') if scheduler: for key, value in scheduler.state_dict().items(): train_logger.info(f'{key}: {value}') train_logger.info('') best_val_loss = math.inf best_epoch = 0 model.to(device) train_logger.info("Starting training...") for epoch in range(1, args.n_epochs): train_loss = train(model, train_loader, optimizer, loss, device, scheduler, logger if args.verbose else None) train_logger.info("Epoch {}: Training Loss {}".format( epoch, train_loss)) val_loss = test(model, val_loader, loss, device, args.log_dir, epoch) train_logger.info("Epoch {}: Validation Loss {}".format( epoch, val_loss)) if scheduler and not isinstance(scheduler, NoamLR): scheduler.step(val_loss) if val_loss <= best_val_loss: best_val_loss = val_loss best_epoch = epoch torch.save(model.state_dict(), os.path.join(args.log_dir, f'epoch_{epoch}_state_dict')) train_logger.info("Best Validation Loss {} on Epoch {}".format( best_val_loss, best_epoch)) train_logger.handlers = [] return best_val_loss
def optimize(trial, args): setattr(args, 'hidden_size', int(trial.suggest_discrete_uniform('hidden_size', 300, 1200, 300))) setattr(args, 'depth', int(trial.suggest_discrete_uniform('depth', 2, 6, 1))) setattr(args, 'dropout', int(trial.suggest_discrete_uniform('dropout', 0, 1, 0.2))) setattr(args, 'lr', trial.suggest_loguniform('lr', 1e-5, 1e-3)) setattr(args, 'batch_size', int(trial.suggest_categorical('batch_size', [25, 50, 100]))) setattr( args, 'graph_pool', trial.suggest_categorical('graph_pool', ['sum', 'mean', 'max', 'attn', 'set2set'])) setattr(args, 'log_dir', os.path.join(args.hyperopt_dir, str(trial._trial_id))) modify_train_args(args) torch.manual_seed(args.seed) train_logger = create_logger('train', args.log_dir) train_loader, val_loader = construct_loader(args) mean = train_loader.dataset.mean std = train_loader.dataset.std stdzer = Standardizer(mean, std, args.task) # create model, optimizer, scheduler, and loss fn model = GNN(args, train_loader.dataset.num_node_features, train_loader.dataset.num_edge_features).to(args.device) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) scheduler = build_lr_scheduler(optimizer, args, len(train_loader.dataset)) loss = get_loss_func(args) best_val_loss = math.inf best_epoch = 0 # record args, optimizer, and scheduler info train_logger.info('Arguments are...') for arg in vars(args): train_logger.info(f'{arg}: {getattr(args, arg)}') train_logger.info(f'\nOptimizer parameters are:\n{optimizer}\n') train_logger.info(f'Scheduler state dict is:') for key, value in scheduler.state_dict().items(): train_logger.info(f'{key}: {value}') train_logger.info('') # train train_logger.info("Starting training...") for epoch in range(0, args.n_epochs): train_loss, train_acc = train(model, train_loader, optimizer, loss, stdzer, args.device, scheduler, args.task) train_logger.info(f"Epoch {epoch}: Training Loss {train_loss}") val_loss, val_acc = eval(model, val_loader, loss, stdzer, args.device, args.task) train_logger.info(f"Epoch {epoch}: Validation Loss {val_loss}") if val_loss <= best_val_loss: best_val_loss = val_loss best_epoch = epoch torch.save(model.state_dict(), os.path.join(args.log_dir, 'best_model')) # report intermediate results for early stopping trial.report(val_loss, epoch) # handle pruning based on the intermediate value if trial.should_prune(): train_logger.handlers = [] raise optuna.TrialPruned() train_logger.info( f"Best Validation Loss {best_val_loss} on Epoch {best_epoch}") # load best model model = GNN(args, train_loader.dataset.num_node_features, train_loader.dataset.num_edge_features).to(args.device) state_dict = torch.load(os.path.join(args.log_dir, 'best_model'), map_location=args.device) model.load_state_dict(state_dict) # predict test data test_loader = construct_loader(args, modes='test') preds, test_loss, test_acc, test_auc = test(model, test_loader, loss, stdzer, args.device, args.task) train_logger.info(f"Test Loss {test_loss}") # save predictions smiles = test_loader.dataset.smiles preds_path = os.path.join(args.log_dir, 'preds.csv') pd.DataFrame(list(zip(smiles, preds)), columns=['smiles', 'prediction']).to_csv(preds_path, index=False) train_logger.handlers = [] return best_val_loss
best_epoch = 0 # record args, optimizer, and scheduler info logger.info('Arguments are...') for arg in vars(args): logger.info(f'{arg}: {getattr(args, arg)}') logger.info(f'\nOptimizer parameters are:\n{optimizer}\n') logger.info(f'Scheduler state dict is:') for key, value in scheduler.state_dict().items(): logger.info(f'{key}: {value}') logger.info('') # train logger.info("Starting training...") for epoch in range(0, args.n_epochs): train_loss, train_acc = train(model, train_loader, optimizer, loss, stdzer, args.device, scheduler, args.task) logger.info(f"Epoch {epoch}: Training Loss {train_loss}") if args.task == 'classification': logger.info( f"Epoch {epoch}: Training Classification Accuracy {train_acc}") val_loss, val_acc = eval(model, val_loader, loss, stdzer, args.device, args.task) logger.info(f"Epoch {epoch}: Validation Loss {val_loss}") if args.task == 'classification': logger.info( f"Epoch {epoch}: Validation Classification Accuracy {val_acc}") if val_loss <= best_val_loss:
logger.info(f'Scheduler state dict is:') if scheduler: for key, value in scheduler.state_dict().items(): logger.info(f'{key}: {value}') logger.info('') loss = torch.nn.MSELoss(reduction='sum') # alternative loss: MAE torch.nn.L1Loss(reduction='sum') # MAE best_val_loss = math.inf best_epoch = 0 logger.info("Starting training...") for epoch in range(1, args.n_epochs): train_loss = train(model, train_loader, optimizer, loss, device, scheduler, logger if args.verbose else None) logger.info("Epoch {}: Training Loss {}".format(epoch, train_loss)) val_loss = test(model, val_loader, loss, device, log_dir, epoch) logger.info("Epoch {}: Validation Loss {}".format(epoch, val_loss)) if scheduler and not isinstance(scheduler, NoamLR): scheduler.step(val_loss) if val_loss <= best_val_loss: best_val_loss = val_loss best_epoch = epoch # torch.save(model.state_dict(), os.path.join(log_dir, 'best_model')) logger.info("Best Validation Loss {} on Epoch {}".format( best_val_loss, best_epoch))
# Model parameters GC_HIDDEN_SIZES = [256, 128] FC_HIDDEN_SIZES = [ 64, 2 ] # Final fully-connected layer size must equal number of classes # Training parameters NUM_EPOCHS = 5 LEARNING_RATE = 2e-4 MAX_EXAMPLES_PER_CLASS = None METRICS_TO_LOG = ["accuracy", "auc"] if __name__ == "__main__": train_data, test_data, in_features = get_model_data( doc_paths=[POSITIVE_REVIEWS_PATH, NEGATIVE_REVIEWS_PATH], embeddings_path=EMBEDDINGS_PATH, max_examples_per_class=MAX_EXAMPLES_PER_CLASS) gcn_model = GraphConvolutionalNetwork(in_features=in_features, gc_hidden_sizes=GC_HIDDEN_SIZES, fc_hidden_sizes=FC_HIDDEN_SIZES, add_residual_connection=False) train(model=gcn_model, train_data=train_data, validation_data=test_data, num_epochs=NUM_EPOCHS, learning_rate=LEARNING_RATE, metrics_to_log=METRICS_TO_LOG, model_path=MODEL_PATH)
def train_and_evaluate(args): use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") print(device) # Set the logger # set_logger(os.path.join(args.model_dir, 'train.log')) print("Downloading datasets") train_dl, train_sz = load_dataset(args, 'train.pkl', True, args.k_years) dev_dl, dev_sz = load_dataset(args, 'dev.pkl', True, args.k_years) print("- done.") # Define the models (2 different set of nodes that share weights for train and eval) print("Creating the model...") model = BaseballFCN(17 * args.k_years, args.hidden_size).to(device) if use_cuda: model = model.cuda() print("- done.") criterion = nn.MSELoss() optimizer = optim.Adam(model.parameters(), args.lr, weight_decay=args.weight_decay) best_model_wts = model.state_dict() best_r2 = float("-inf") #best_y_pred = [] #best_y_true = [] losses = {"train": [], "val": []} r2s = {"train": [], "val": []} # Train the model print("Starting training for {} epoch(s)".format(args.epochs)) since = time.time() #best_model = train_and_evaluate(model, criterion, optimizer, args, dataloaders, dataset_sizes, use_cuda) for epoch in range(1, args.epochs + 1): print("-" * 10) print("Epoch {}/{}".format(epoch, args.epochs)) print(time.ctime()) model, y_true, y_pred, train_loss = train(model, criterion, optimizer, args, train_dl, train_sz, device) epoch_r2 = metrics.r2_score(y_true, y_pred) print("{} Loss: {:.4f} R2: {:.4f}".format("train", train_loss, epoch_r2)) losses["train"].append(train_loss) r2s["train"].append(epoch_r2) y_true, y_pred, player_ids, years, test_loss = test( model, criterion, dev_dl, dev_sz, device) epoch_r2 = metrics.r2_score(y_true, y_pred) losses["val"].append(test_loss) r2s["val"].append(epoch_r2) print("{} Loss: {:.4f} R2: {:.4f}".format("eval", test_loss, epoch_r2)) if epoch_r2 > best_r2: #best_y_true = y_true #best_y_pred = y_pred best_r2 = epoch_r2 best_model_wts = model.state_dict() time_elapsed = time.time() - since print("Training complete in {:.0f}m {:.0f}s".format( time_elapsed // 60, time_elapsed % 60)) print("Best R2: {:4f}".format(best_r2)) pickle.dump(losses, open("model/last5_losses.pkl", "wb")) pickle.dump(r2s, open("model/last5_r2s.pkl", "wb")) model.cpu() model.load_state_dict(best_model_wts) model_name = "best_lastk.model" print("Model name: ", model_name) save_model_path = os.path.join(args.model_dir, model_name) torch.save(model.state_dict(), save_model_path) return best_r2