Пример #1
0
def optimize(trial, args):

    setattr(args, 'hidden_dim',
            int(trial.suggest_categorical('d_model', [128, 256, 512])))
    setattr(args, 'depth',
            int(trial.suggest_discrete_uniform('n_enc', 2, 6, 1)))
    setattr(args, 'n_layers',
            int(trial.suggest_discrete_uniform('n_enc', 1, 3, 1)))
    setattr(args, 'lr', trial.suggest_loguniform('lr', 1e-5, 1e-2))
    setattr(args, 'batch_size',
            int(trial.suggest_categorical('batch_size', [16, 32, 64, 128])))

    setattr(args, 'log_dir',
            os.path.join(args.hyperopt_dir, str(trial._trial_id)))

    torch.manual_seed(0)
    train_logger = create_logger('train', args.log_dir)

    train_logger.info('Arguments are...')
    for arg in vars(args):
        train_logger.info(f'{arg}: {getattr(args, arg)}')

    # construct loader and set device
    train_loader, val_loader = construct_loader(args)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # build model
    model_parameters = {
        'node_dim': train_loader.dataset.num_node_features,
        'edge_dim': train_loader.dataset.num_edge_features,
        'hidden_dim': args.hidden_dim,
        'depth': args.depth,
        'n_layers': args.n_layers
    }
    model = G2C(**model_parameters).to(device)

    # multi gpu training
    if torch.cuda.device_count() > 1:
        train_logger.info(
            f'Using {torch.cuda.device_count()} GPUs for training...')
        model = torch.nn.DataParallel(model)

    # get optimizer and scheduler
    optimizer, scheduler = get_optimizer_and_scheduler(
        args, model, len(train_loader.dataset))
    loss = torch.nn.MSELoss(reduction='sum')

    # record parameters
    train_logger.info(
        f'\nModel parameters are:\n{dict_to_str(model_parameters)}\n')
    save_yaml_file(os.path.join(args.log_dir, 'model_paramaters.yml'),
                   model_parameters)
    train_logger.info(f'Optimizer parameters are:\n{optimizer}\n')
    train_logger.info(f'Scheduler state dict is:')
    if scheduler:
        for key, value in scheduler.state_dict().items():
            train_logger.info(f'{key}: {value}')
        train_logger.info('')

    best_val_loss = math.inf
    best_epoch = 0

    model.to(device)
    train_logger.info("Starting training...")
    for epoch in range(1, args.n_epochs):
        train_loss = train(model, train_loader, optimizer, loss, device,
                           scheduler, logger if args.verbose else None)
        train_logger.info("Epoch {}: Training Loss {}".format(
            epoch, train_loss))

        val_loss = test(model, val_loader, loss, device, args.log_dir, epoch)
        train_logger.info("Epoch {}: Validation Loss {}".format(
            epoch, val_loss))
        if scheduler and not isinstance(scheduler, NoamLR):
            scheduler.step(val_loss)

        if val_loss <= best_val_loss:
            best_val_loss = val_loss
            best_epoch = epoch
            torch.save(model.state_dict(),
                       os.path.join(args.log_dir, f'epoch_{epoch}_state_dict'))
    train_logger.info("Best Validation Loss {} on Epoch {}".format(
        best_val_loss, best_epoch))

    train_logger.handlers = []
    return best_val_loss
Пример #2
0
            f"Epoch {epoch}: Validation Classification Accuracy {val_acc}")

    if val_loss <= best_val_loss:
        best_val_loss = val_loss
        best_epoch = epoch
        torch.save(model.state_dict(), os.path.join(args.log_dir,
                                                    'best_model'))
logger.info(f"Best Validation Loss {best_val_loss} on Epoch {best_epoch}")

# load best model
model = GNN(args, train_loader.dataset.num_node_features,
            train_loader.dataset.num_edge_features).to(args.device)
state_dict = torch.load(os.path.join(args.log_dir, 'best_model'),
                        map_location=args.device)
model.load_state_dict(state_dict)

# predict test data
test_loader = construct_loader(args, modes='test')
preds, test_loss, test_acc, test_auc = test(model, test_loader, loss, stdzer,
                                            args.device, args.task)
logger.info(f"Test Loss {test_loss}")
if args.task == 'classification':
    logger.info(f"Test Classification Accuracy {test_acc}")
    logger.info(f"Test ROC AUC Score {test_auc}")

# save predictions
smiles = test_loader.dataset.smiles
preds_path = os.path.join(args.log_dir, 'preds.csv')
pd.DataFrame(list(zip(smiles, preds)),
             columns=['smiles', 'prediction']).to_csv(preds_path, index=False)
Пример #3
0
def optimize(trial, args):

    setattr(args, 'hidden_size',
            int(trial.suggest_discrete_uniform('hidden_size', 300, 1200, 300)))
    setattr(args, 'depth',
            int(trial.suggest_discrete_uniform('depth', 2, 6, 1)))
    setattr(args, 'dropout',
            int(trial.suggest_discrete_uniform('dropout', 0, 1, 0.2)))
    setattr(args, 'lr', trial.suggest_loguniform('lr', 1e-5, 1e-3))
    setattr(args, 'batch_size',
            int(trial.suggest_categorical('batch_size', [25, 50, 100])))
    setattr(
        args, 'graph_pool',
        trial.suggest_categorical('graph_pool',
                                  ['sum', 'mean', 'max', 'attn', 'set2set']))

    setattr(args, 'log_dir',
            os.path.join(args.hyperopt_dir, str(trial._trial_id)))
    modify_train_args(args)

    torch.manual_seed(args.seed)
    train_logger = create_logger('train', args.log_dir)

    train_loader, val_loader = construct_loader(args)
    mean = train_loader.dataset.mean
    std = train_loader.dataset.std
    stdzer = Standardizer(mean, std, args.task)

    # create model, optimizer, scheduler, and loss fn
    model = GNN(args, train_loader.dataset.num_node_features,
                train_loader.dataset.num_edge_features).to(args.device)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    scheduler = build_lr_scheduler(optimizer, args, len(train_loader.dataset))
    loss = get_loss_func(args)
    best_val_loss = math.inf
    best_epoch = 0

    # record args, optimizer, and scheduler info
    train_logger.info('Arguments are...')
    for arg in vars(args):
        train_logger.info(f'{arg}: {getattr(args, arg)}')
    train_logger.info(f'\nOptimizer parameters are:\n{optimizer}\n')
    train_logger.info(f'Scheduler state dict is:')
    for key, value in scheduler.state_dict().items():
        train_logger.info(f'{key}: {value}')
    train_logger.info('')

    # train
    train_logger.info("Starting training...")
    for epoch in range(0, args.n_epochs):
        train_loss, train_acc = train(model, train_loader, optimizer, loss,
                                      stdzer, args.device, scheduler,
                                      args.task)
        train_logger.info(f"Epoch {epoch}: Training Loss {train_loss}")

        val_loss, val_acc = eval(model, val_loader, loss, stdzer, args.device,
                                 args.task)
        train_logger.info(f"Epoch {epoch}: Validation Loss {val_loss}")

        if val_loss <= best_val_loss:
            best_val_loss = val_loss
            best_epoch = epoch
            torch.save(model.state_dict(),
                       os.path.join(args.log_dir, 'best_model'))

        # report intermediate results for early stopping
        trial.report(val_loss, epoch)

        # handle pruning based on the intermediate value
        if trial.should_prune():
            train_logger.handlers = []
            raise optuna.TrialPruned()

    train_logger.info(
        f"Best Validation Loss {best_val_loss} on Epoch {best_epoch}")

    # load best model
    model = GNN(args, train_loader.dataset.num_node_features,
                train_loader.dataset.num_edge_features).to(args.device)
    state_dict = torch.load(os.path.join(args.log_dir, 'best_model'),
                            map_location=args.device)
    model.load_state_dict(state_dict)

    # predict test data
    test_loader = construct_loader(args, modes='test')
    preds, test_loss, test_acc, test_auc = test(model, test_loader, loss,
                                                stdzer, args.device, args.task)
    train_logger.info(f"Test Loss {test_loss}")

    # save predictions
    smiles = test_loader.dataset.smiles
    preds_path = os.path.join(args.log_dir, 'preds.csv')
    pd.DataFrame(list(zip(smiles, preds)),
                 columns=['smiles', 'prediction']).to_csv(preds_path,
                                                          index=False)

    train_logger.handlers = []
    return best_val_loss
Пример #4
0
    logger.info('')

loss = torch.nn.MSELoss(reduction='sum')
# alternative loss: MAE
torch.nn.L1Loss(reduction='sum')  # MAE

best_val_loss = math.inf
best_epoch = 0

logger.info("Starting training...")
for epoch in range(1, args.n_epochs):
    train_loss = train(model, train_loader, optimizer, loss, device, scheduler,
                       logger if args.verbose else None)
    logger.info("Epoch {}: Training Loss {}".format(epoch, train_loss))

    val_loss = test(model, val_loader, loss, device, log_dir, epoch)
    logger.info("Epoch {}: Validation Loss {}".format(epoch, val_loss))
    if scheduler and not isinstance(scheduler, NoamLR):
        scheduler.step(val_loss)

    if val_loss <= best_val_loss:
        best_val_loss = val_loss
        best_epoch = epoch
        # torch.save(model.state_dict(), os.path.join(log_dir, 'best_model'))

logger.info("Best Validation Loss {} on Epoch {}".format(
    best_val_loss, best_epoch))

log_file = os.path.join(log_dir, log_file_name + '.log')
plot_train_val_loss(log_file)