Exemplo n.º 1
0
def cross_validate(args: Namespace,
                   logger: Logger = None) -> Tuple[float, float]:
    """k-fold cross validation"""
    info = logger.info if logger is not None else print

    # Initialize relevant variables
    init_seed = args.seed
    save_dir = args.save_dir
    task_names = get_task_names(args.data_path)

    # Run training on different random seeds for each fold
    all_scores = []
    for fold_num in range(args.num_folds):
        info(f'Fold {fold_num}')
        args.seed = init_seed + fold_num
        args.save_dir = os.path.join(save_dir, f'fold_{fold_num}')
        makedirs(args.save_dir)
        model_scores = run_training(args, logger)
        all_scores.append(model_scores)
    all_scores = np.array(all_scores)

    # Report results
    info(f'{args.num_folds}-fold cross validation')

    # Report scores for each fold
    for fold_num, scores in enumerate(all_scores):
        info(
            f'Seed {init_seed + fold_num} ==> test {args.metric} = {np.nanmean(scores):.6f}'
        )

        if args.show_individual_scores:
            for task_name, score in zip(task_names, scores):
                info(
                    f'Seed {init_seed + fold_num} ==> test {task_name} {args.metric} = {score:.6f}'
                )

    # Report scores across models
    avg_scores = np.nanmean(
        all_scores, axis=1)  # average score for each model across tasks
    mean_score, std_score = np.nanmean(avg_scores), np.nanstd(avg_scores)
    info(f'Overall test {args.metric} = {mean_score:.6f} +/- {std_score:.6f}')

    if args.show_individual_scores:
        for task_num, task_name in enumerate(task_names):
            info(
                f'Overall test {task_name} {args.metric} = '
                f'{np.nanmean(all_scores[:, task_num]):.6f} +/- {np.nanstd(all_scores[:, task_num]):.6f}'
            )

    return mean_score, std_score
Exemplo n.º 2
0
args.save_dir = '/home/willlamb/checkpoints/dropR'
args.results_dir = '/home/willlamb/results/dropR'
args.wandb_proj = 'official1'
args.wandb_name = 'dropR'

# ensembling and samples
args.ensemble_size = 5
args.pytorch_seeds = [0,1,2,3,4]
args.samples = 30

### dropR ###
args.warmup_epochs = 2.0
args.noam_epochs = 100
args.epochs = 300

args.init_lr = 1e-4
args.max_lr = 1e-3
args.final_lr = 1e-4

args.init_log_noise = -2
args.weight_decay = 0.01

args.dropout_mpnn = 0
args.dropout_ffn = 0.1
args.test_dropout = True  

################################################

# run
results = run_training(args)
Exemplo n.º 3
0
def train():
    """Renders the train page and performs training if request method is POST."""
    global PROGRESS, TRAINING

    warnings, errors = [], []

    if request.method == 'GET':
        return render_train()

    # Get arguments
    data_name, epochs, ensemble_size, checkpoint_name = \
        request.form['dataName'], int(request.form['epochs']), \
        int(request.form['ensembleSize']), request.form['checkpointName']
    gpu = request.form.get('gpu')
    data_path = os.path.join(app.config['DATA_FOLDER'], f'{data_name}.csv')
    dataset_type = request.form.get('datasetType', 'regression')

    # Create and modify args
    args = TrainArgs().parse_args([
        '--data_path', data_path, '--dataset_type', dataset_type, '--epochs',
        str(epochs), '--ensemble_size',
        str(ensemble_size)
    ])

    # Check if regression/classification selection matches data
    data = get_data(path=data_path)
    targets = data.targets()
    unique_targets = {
        target
        for row in targets for target in row if target is not None
    }

    if dataset_type == 'classification' and len(unique_targets - {0, 1}) > 0:
        errors.append(
            'Selected classification dataset but not all labels are 0 or 1. Select regression instead.'
        )

        return render_train(warnings=warnings, errors=errors)

    if dataset_type == 'regression' and unique_targets <= {0, 1}:
        errors.append(
            'Selected regression dataset but all labels are 0 or 1. Select classification instead.'
        )

        return render_train(warnings=warnings, errors=errors)

    if gpu is not None:
        if gpu == 'None':
            args.cuda = False
        else:
            args.gpu = int(gpu)

    current_user = request.cookies.get('currentUser')

    if not current_user:
        # Use DEFAULT as current user if the client's cookie is not set.
        current_user = app.config['DEFAULT_USER_ID']

    ckpt_id, ckpt_name = db.insert_ckpt(checkpoint_name, current_user,
                                        args.dataset_type, args.epochs,
                                        args.ensemble_size, len(targets))

    with TemporaryDirectory() as temp_dir:
        args.save_dir = temp_dir

        process = mp.Process(target=progress_bar, args=(args, PROGRESS))
        process.start()
        TRAINING = 1

        # Run training
        logger = create_logger(name='train',
                               save_dir=args.save_dir,
                               quiet=args.quiet)
        task_scores = run_training(args, logger)
        process.join()

        # Reset globals
        TRAINING = 0
        PROGRESS = mp.Value('d', 0.0)

        # Check if name overlap
        if checkpoint_name != ckpt_name:
            warnings.append(
                name_already_exists_message('Checkpoint', checkpoint_name,
                                            ckpt_name))

        # Move models
        for root, _, files in os.walk(args.save_dir):
            for fname in files:
                if fname.endswith('.pt'):
                    model_id = db.insert_model(ckpt_id)
                    save_path = os.path.join(app.config['CHECKPOINT_FOLDER'],
                                             f'{model_id}.pt')
                    shutil.move(os.path.join(args.save_dir, root, fname),
                                save_path)

    return render_train(trained=True,
                        metric=args.metric,
                        num_tasks=len(args.task_names),
                        task_names=args.task_names,
                        task_scores=format_float_list(task_scores),
                        mean_score=format_float(np.mean(task_scores)),
                        warnings=warnings,
                        errors=errors)
Exemplo n.º 4
0
def train():
    global training_message
    if request.method == 'GET':
        return render_template('train.html',
                               datasets=get_datasets(),
                               started=False,
                               cuda=app.config['CUDA'],
                               gpus=app.config['GPUS'])

    # Get arguments
    data_name, epochs, checkpoint_name = \
        request.form['dataName'], int(request.form['epochs']), request.form['checkpointName']
    gpu = request.form.get('gpu', None)
    dataset_type = request.form.get('datasetType', 'regression')

    if not checkpoint_name.endswith('.pt'):
        checkpoint_name += '.pt'

    # Create and modify args
    parser = ArgumentParser()
    add_train_args(parser)
    args = parser.parse_args()

    args.data_path = os.path.join(app.config['DATA_FOLDER'], data_name)
    args.dataset_type = dataset_type
    args.epochs = epochs

    target_set, all_targets_have_labels, has_invalid_targets = get_target_set(
        args.data_path)
    if len(target_set) == 0:
        return render_template('train.html',
                               datasets=get_datasets(),
                               started=False,
                               cuda=app.config['CUDA'],
                               gpus=app.config['GPUS'],
                               error="No training labels provided")
    if has_invalid_targets:
        return render_template('train.html',
                               datasets=get_datasets(),
                               started=False,
                               cuda=app.config['CUDA'],
                               gpus=app.config['GPUS'],
                               error="Training data contains invalid labels")
    classification_on_regression_dataset = ((not target_set <= set([0, 1]))
                                            and args.dataset_type
                                            == 'classification')
    if classification_on_regression_dataset:
        return render_template(
            'train.html',
            datasets=get_datasets(),
            started=False,
            cuda=app.config['CUDA'],
            gpus=app.config['GPUS'],
            error=
            'Selected classification dataset, but not all labels are 0 or 1')
    regression_on_classification_dataset = (target_set <= set([0, 1]) and
                                            args.dataset_type == 'regression')
    if not all_targets_have_labels:
        training_message += 'One or more targets have no labels. \n'  # TODO could have separate warning messages for each?
    if regression_on_classification_dataset:
        training_message += 'All labels are 0 or 1; did you mean to train classification instead of regression?\n'

    if gpu is not None:
        if gpu == 'None':
            args.no_cuda = True
        else:
            args.gpu = int(gpu)

    with TemporaryDirectory() as temp_dir:
        args.save_dir = temp_dir
        modify_train_args(args)
        if os.path.isdir(args.save_dir):
            training_message += 'Overwriting preexisting checkpoint with the same name.'
        logger = logging.getLogger('train')
        logger.setLevel(logging.DEBUG)
        logger.propagate = False
        set_logger(logger, args.save_dir, args.quiet)

        global progress
        process = mp.Process(target=progress_bar, args=(args, progress))
        process.start()
        global started
        started = 1
        # Run training
        run_training(args, logger)
        process.join()

        # reset globals
        started = 0
        progress = mp.Value('d', 0.0)

        # Move checkpoint
        shutil.move(
            os.path.join(args.save_dir, 'model_0', 'model.pt'),
            os.path.join(app.config['CHECKPOINT_FOLDER'], checkpoint_name))

    warning = training_message if len(training_message) > 0 else None
    training_message = ""
    return render_template('train.html',
                           datasets=get_datasets(),
                           cuda=app.config['CUDA'],
                           gpus=app.config['GPUS'],
                           trained=True,
                           warning=warning)
Exemplo n.º 5
0
def train():
    global progress, training

    warnings, errors = [], []

    if request.method == 'GET':
        return render_train()

    # Get arguments
    data_name, epochs, checkpoint_name = \
        request.form['dataName'], int(request.form['epochs']), request.form['checkpointName']
    gpu = request.form.get('gpu')
    data_path = os.path.join(app.config['DATA_FOLDER'], data_name)
    dataset_type = request.form.get('datasetType', 'regression')

    if not checkpoint_name.endswith('.pt'):
        checkpoint_name += '.pt'

    # Create and modify args
    parser = ArgumentParser()
    add_train_args(parser)
    args = parser.parse_args()

    args.data_path = data_path
    args.dataset_type = dataset_type
    args.epochs = epochs

    # Check if regression/classification selection matches data
    data = get_data(path=data_path)
    targets = data.targets()
    unique_targets = set(np.unique(targets))

    if dataset_type == 'classification' and len(unique_targets - {0, 1}) > 0:
        errors.append(
            'Selected classification dataset but not all labels are 0 or 1. Select regression instead.'
        )

        return render_train(warnings=warnings, errors=errors)

    if dataset_type == 'regression' and unique_targets <= {0, 1}:
        errors.append(
            'Selected regression dataset but all labels are 0 or 1. Select classification instead.'
        )

        return render_train(warnings=warnings, errors=errors)

    if gpu is not None:
        if gpu == 'None':
            args.no_cuda = True
        else:
            args.gpu = int(gpu)

    with TemporaryDirectory() as temp_dir:
        args.save_dir = temp_dir
        modify_train_args(args)

        logger = logging.getLogger('train')
        logger.setLevel(logging.DEBUG)
        logger.propagate = False
        set_logger(logger, args.save_dir, args.quiet)

        process = mp.Process(target=progress_bar, args=(args, progress))
        process.start()
        training = 1

        # Run training
        task_scores = run_training(args, logger)
        process.join()

        # Reset globals
        training = 0
        progress = mp.Value('d', 0.0)

        # Check if name overlap
        original_save_path = os.path.join(app.config['CHECKPOINT_FOLDER'],
                                          checkpoint_name)
        save_path = find_unique_path(original_save_path)
        if save_path != original_save_path:
            warnings.append(
                name_already_exists_message('Checkpoint', original_save_path,
                                            save_path))

        # Move checkpoint
        shutil.move(os.path.join(args.save_dir, 'model_0', 'model.pt'),
                    save_path)

    return render_train(trained=True,
                        metric=args.metric,
                        num_tasks=len(args.task_names),
                        task_names=args.task_names,
                        task_scores=format_float_list(task_scores),
                        mean_score=format_float(np.mean(task_scores)),
                        warnings=warnings,
                        errors=errors)