Пример #1
0
def test_training_loop(random_data_loader):
    """Test the training loop."""
    device = torch.device('cpu')
    model = QLeNet5(F.nll_loss).to(device)
    metrics = {'Loss': LossMetric(model.loss_fn, accumulate=False)}
    optimizer = get_optimizer(model.parameters(), {
        'algorithm': 'sgd',
        'lr': 0.1
    })
    scheduler = get_lr_scheduler(optimizer, {
        'scheduler': 'step_lr',
        'step_size': 1,
        'gamma': 0.7
    }, 3, len(random_data_loader))
    fake_hook = mock.MagicMock()
    hooks = [fake_hook]

    losses = []
    for epoch in range(1, 3):
        train(model=model,
              train_loader=random_data_loader,
              metrics=metrics,
              optimizer=optimizer,
              scheduler=scheduler,
              device=device,
              epoch=epoch,
              log_interval=4,
              hooks=hooks)
        losses.append(metrics['Loss'].compute())

    # Ensure that hooks are called and loss is changing
    assert fake_hook.called
    assert losses[1] != losses[0]
Пример #2
0
def test_get_multi_step_lr_scheduler():
    """Test get multi step lr scheduler."""
    model = QLeNet5(F.nll_loss)
    optimizer = get_optimizer(model.parameters(), {
        'algorithm': 'sgd',
        'lr': 0.1
    })
    scheduler = get_lr_scheduler(optimizer, {
        'scheduler': 'multi_step_lr',
        'milestones': [30, 70],
        'gamma': 0.7
    }, 70, 100)

    assert isinstance(scheduler, lr_scheduler.MultiStepLR)
    for _ in range(30 * 100):
        assert optimizer.param_groups[0]['lr'] == 0.1
        optimizer.step()
        scheduler.step()

    for _ in range(40 * 100):
        assert optimizer.param_groups[0]['lr'] == 0.7 * 0.1
        optimizer.step()
        scheduler.step()

    assert optimizer.param_groups[0]['lr'] == 0.7 * 0.7 * 0.1
Пример #3
0
def test_get_linear_lr_scheduler():
    """Test get linear lr scheduler."""
    model = QLeNet5(F.nll_loss)
    optimizer = get_optimizer(model.parameters(), {
        'algorithm': 'sgd',
        'lr': 0.1
    })

    scheduler = get_lr_scheduler(optimizer, {
        'scheduler': 'linear_lr',
        'min_lr': 1e-5
    }, 80, 100)

    assert isinstance(scheduler, LinearLR)
Пример #4
0
def test_get_step_lr_scheduler():
    """Test get step lr scheduler."""
    model = QLeNet5(F.nll_loss)
    optimizer = get_optimizer(model.parameters(), {
        'algorithm': 'sgd',
        'lr': 0.1
    })

    scheduler = get_lr_scheduler(optimizer, {
        'scheduler': 'step_lr',
        'step_size': 1,
        'gamma': 0.7
    }, 5, 100)

    assert isinstance(scheduler, lr_scheduler.StepLR)
    for _ in range(100):
        assert optimizer.param_groups[0]['lr'] == 0.1
        optimizer.step()
        scheduler.step()

    assert optimizer.param_groups[0]['lr'] == 0.7 * 0.1
Пример #5
0
def test_get_lambda_lr_scheduler():
    """Test get lambda lr scheduler."""
    model = QLeNet5(F.nll_loss)
    optimizer = get_optimizer(model.parameters(), {
        'algorithm': 'sgd',
        'lr': 0.1
    })

    lr_lambda = """lambda s: next(
        v for (a, b), v in {(0, 200): 1, (200, 1000): 0.75}.items() if a <= s < b
    )"""
    scheduler = get_lr_scheduler(optimizer, {
        'scheduler': 'lambda_lr',
        'lr_lambda': lr_lambda
    }, 10, 100)

    assert isinstance(scheduler, lr_scheduler.LambdaLR)
    for _ in range(200):
        assert optimizer.param_groups[0]['lr'] == 0.1
        optimizer.step()
        scheduler.step()

    assert optimizer.param_groups[0]['lr'] == 0.75 * 0.1
Пример #6
0
def classification_task(
    config: dict,
    experiment_root_directory: Path,
    data_loader_cls: Type[QuantDataLoader],
    get_hooks: Callable[[dict, Path, MetricDict, MetricDict],
                        Tuple[List[Hook], List[Hook]]],
    restore_experiment: Optional[Path] = None,
) -> Tuple[List[Dict[str, float]], List[Dict[str, float]]]:
    """
    Driver program for running classification task.

    Args:
        config: merged config with CLI args
        experiment_root_directory: root directory for storing logs, checkpoints, etc.
        data_loader_cls: The QuantDataLoader class
        get_hooks: a function that returns lists of training and testing hooks
        restore_experiment: path to experiment to restore, None for do not restore

    Returns:
        (List of training set metrics for each epoch, list of test set metrics for each epoch).
    """
    env_config = config['environment']
    data_config = config['data']
    model_config = config['model']
    optimization_config = config['optimization']
    log_config = config['log']

    init_logging(log_config['level'])

    device = get_device(env_config['ngpus'], config.get('seed'),
                        **env_config.get('cuda', {}))

    data_loader = data_loader_cls(**data_config)
    train_loader = data_loader.get_train_loader(
    ) if not config.get('skip_training') else None
    test_loader = data_loader.get_test_loader()

    epochs = optimization_config['epochs']

    teacher = None
    use_kd = 'kd_config' in model_config
    if use_kd:
        teacher, kd_loss = get_teacher_and_kd_loss(
            device=device,
            ngpus=env_config['ngpus'],
            strict_keys=model_config.get('strict_keys', True),
            **model_config['kd_config'])

    loss_fn = get_loss_fn(model_config['loss']) if not use_kd else kd_loss
    model = get_model(
        architecture=model_config['architecture'],
        loss_fn=loss_fn,
        arch_config=model_config['arch_config'],
        device=device,
        ngpus=env_config['ngpus'],
    )

    optimizer, scheduler = None, None
    if not config.get('skip_training'):
        optimizer = get_optimizer(model.parameters(),
                                  optimization_config['optimizer'])
        scheduler = get_lr_scheduler(
            optimizer, optimization_config['lr_scheduler'], epochs,
            len(train_loader))  # type: ignore  # noqa: E501

    if restore_experiment is not None:
        checkpoint_path = get_path_to_checkpoint(restore_experiment)
        model, restored_optimizer, restored_scheduler, start_epoch = restore_from_checkpoint(
            model,
            optimizer,
            scheduler,
            checkpoint_path,
            device,
            model_config.get('strict_keys', True),
        )
        optimizer, scheduler = restored_optimizer, restored_scheduler
        start_epoch += 1
    elif config.get('init_from_checkpoint'):
        model, _, _, _ = restore_from_checkpoint(
            model,
            None,
            None,
            config['init_from_checkpoint'],
            device,
            model_config.get('strict_keys', True),
        )
        start_epoch = 1
    else:
        start_epoch = 1

    train_metrics = {
        'Loss': LossMetric(loss_fn, accumulate=True),
        'Top-1 Accuracy': Top1Accuracy(accumulate=True),
        'Top-5 Accuracy': TopKAccuracy(5, accumulate=True),
    }

    test_metrics = {
        'Loss': LossMetric(get_loss_fn(model_config['loss']), accumulate=True),
        'Top-1 Accuracy': Top1Accuracy(accumulate=True),
        'Top-5 Accuracy': TopKAccuracy(5, accumulate=True),
    }

    train_hooks, test_hooks = get_hooks(config, experiment_root_directory,
                                        train_metrics, test_metrics)
    train_epoch_metrics, test_epoch_metrics = [], []

    if config.get('skip_training'):
        computed_test_metrics = evaluate(
            model=model,
            test_loader=test_loader,
            metrics=test_metrics,
            device=device,
            epoch=1,
            hooks=test_hooks,
        )
        test_epoch_metrics.append(computed_test_metrics)
    else:
        for epoch in range(start_epoch, start_epoch + epochs):
            computed_train_metrics = train(
                model=model,
                train_loader=train_loader,  # type: ignore
                metrics=train_metrics,
                optimizer=optimizer,
                scheduler=scheduler,  # type: ignore
                device=device,
                epoch=epoch,
                log_interval=log_config['interval'],
                hooks=train_hooks,
                teacher=teacher,
            )
            computed_test_metrics = evaluate(
                model=model,
                test_loader=test_loader,
                metrics=test_metrics,
                device=device,
                epoch=epoch,
                hooks=test_hooks,
            )

            train_epoch_metrics.append(computed_train_metrics)
            test_epoch_metrics.append(computed_test_metrics)

            if epoch % log_config['save_model_freq'] == 0 or epoch == epochs:
                log_checkpoints(
                    experiment_root_directory / config['experiment_name'] /
                    'checkpoints',
                    model,
                    optimizer,  # type: ignore
                    scheduler,  # type: ignore
                    epoch,
                )

    data_loader.cleanup()

    return train_epoch_metrics, test_epoch_metrics