Exemplo n.º 1
0
def objective(trial):
    logdir = "./logdir"
    num_epochs = 10

    model = Net(trial)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.02)
    criterion = torch.nn.CrossEntropyLoss()

    # model training
    runner = SupervisedRunner()
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        logdir=logdir,
        num_epochs=num_epochs,
        verbose=True,
        callbacks=[
            AccuracyCallback(),
            CatalystPruningCallback(
                trial,
                metric="accuracy01"),  # top-1 accuracy as metric for pruning
        ],
    )

    return runner.state.valid_metrics["accuracy01"]
Exemplo n.º 2
0
def test_evaluation_loader_metrics() -> None:
    """
    Test if metrics in evaluate loader works properly.
    """
    dataset = DummyDataset()
    model = nn.Linear(in_features=dataset.features_dim,
                      out_features=dataset.out_dim)
    loader = DataLoader(dataset=dataset, batch_size=1)
    callbacks = [
        dl.AccuracyCallback(input_key="logits",
                            target_key="targets",
                            topk=(1, ))
    ]
    runner = SupervisedRunner()
    runner.train(
        loaders={
            "train": loader,
            "valid": loader
        },
        model=model,
        num_epochs=1,
        criterion=nn.BCEWithLogitsLoss(),
        callbacks=callbacks,
    )
    runner_internal_metrics = runner.loader_metrics
    evaluate_loader_metrics = runner.evaluate_loader(loader=loader,
                                                     callbacks=callbacks)
    assert runner_internal_metrics["accuracy01"] == evaluate_loader_metrics[
        "accuracy01"]
Exemplo n.º 3
0
def run(config_file):
    config = load_config(config_file)

    os.makedirs(config.work_dir, exist_ok=True)
    save_config(config, config.work_dir + '/config.yml')

    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    all_transforms = {}
    all_transforms['train'] = get_transforms(config.transforms.train)
    all_transforms['valid'] = get_transforms(config.transforms.test)

    dataloaders = {
        phase: make_loader(
            data_folder=config.data.train_dir,
            df_path=config.data.train_df_path,
            phase=phase,
            batch_size=config.train.batch_size,
            num_workers=config.num_workers,
            idx_fold=config.data.params.idx_fold,
            transforms=all_transforms[phase],
            num_classes=config.data.num_classes,
            pseudo_label_path=config.train.pseudo_label_path,
            task='cls'
        )
        for phase in ['train', 'valid']
    }

    # create model
    model = CustomNet(config.model.encoder, config.data.num_classes)

    # train setting
    criterion = get_loss(config)
    params = [
        {'params': model.base_params(), 'lr': config.optimizer.params.encoder_lr},
        {'params': model.fresh_params(), 'lr': config.optimizer.params.decoder_lr}
    ]
    optimizer = get_optimizer(params, config)
    scheduler = get_scheduler(optimizer, config)

    # model runner
    runner = SupervisedRunner(model=model)

    callbacks = [MultiClassAccuracyCallback(threshold=0.5), F1ScoreCallback()]
    if os.path.exists(config.work_dir + '/checkpoints/best.pth'):
        callbacks.append(CheckpointCallback(resume=config.work_dir + '/checkpoints/best_full.pth'))

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=dataloaders,
        logdir=config.work_dir,
        num_epochs=config.train.num_epochs,
        callbacks=callbacks,
        verbose=True,
        fp16=True,
    )
def test_loading_best_state_at_end():
    old_stdout = sys.stdout
    sys.stdout = str_stdout = StringIO()

    # experiment_setup
    logdir = "./logs/periodic_loader"
    checkpoint = logdir + "/checkpoints"
    logfile = checkpoint + "/_metrics.json"

    # data
    num_samples, num_features = int(1e4), int(1e1)
    X = torch.rand(num_samples, num_features)
    y = torch.randint(0, 5, size=[num_samples])
    dataset = TensorDataset(X, y)
    loader = DataLoader(dataset, batch_size=32, num_workers=1)
    loaders = {
        "train": loader,
        "valid": loader,
    }

    # model, criterion, optimizer, scheduler
    model = torch.nn.Linear(num_features, 5)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters())
    runner = SupervisedRunner()

    # first stage
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        logdir=logdir,
        num_epochs=5,
        verbose=False,
        callbacks=[
            PeriodicLoaderCallback(valid=3),
            CheckRunCallback(num_epoch_steps=5),
        ],
        load_best_on_end=True,
    )

    sys.stdout = old_stdout
    exp_output = str_stdout.getvalue()

    assert len(re.findall(r"\(train\)", exp_output)) == 5
    assert len(re.findall(r"\(valid\)", exp_output)) == 1
    assert (len(
        re.findall(r"\(global epoch 3, epoch 3, stage train\)",
                   exp_output)) == 1)
    assert len(re.findall(r".*/train\.\d\.pth", exp_output)) == 1

    assert os.path.isfile(logfile)
    assert os.path.isfile(checkpoint + "/train.3.pth")
    assert os.path.isfile(checkpoint + "/best.pth")
    assert os.path.isfile(checkpoint + "/best_full.pth")
    assert os.path.isfile(checkpoint + "/last.pth")
    assert os.path.isfile(checkpoint + "/last_full.pth")

    shutil.rmtree(logdir, ignore_errors=True)
Exemplo n.º 5
0
def find_lr_range(final_lr: float = 1.0, num_steps: int = 1413):
    config = copy.deepcopy(experiment_config)
    del config["stages"]["scheduler_params"]
    config["stages"]["stage1"]["optimizer_params"]["lr"] = 1e-6

    config["stages"]["callbacks_params"]['lr_finder'] = {
        "callback": "LRFinderLogger",
        "final_lr": final_lr,
        "num_steps": num_steps,
        "scale": "log",
    }

    experiment = Experiment(config)
    
    runner = SupervisedRunner(
        input_key="images",
        output_key=["logit_" + c for c in output_classes.keys()],
        input_target_key=list(output_classes.keys()))
    
    try:
        runner.run_experiment(experiment)
    except NotImplementedError:
        pass
    
    import matplotlib.pyplot as plt
    plt.plot(runner.callbacks['lr_finder'].lr_history, runner.callbacks['lr_finder'].loss_history)
    plt.xscale('log')
    plt.plot()
    
    return experiment, runner
Exemplo n.º 6
0
def train():
    """
    Docs.
    """
    num_features = int(1e1)
    # model, criterion, optimizer, scheduler
    model = torch.nn.Linear(num_features, 1)
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters())
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [3, 6])

    runner = SupervisedRunner()
    runner.train(
        model=model,
        datasets={
            "batch_size": 32,
            "num_workers": 1,
            "get_datasets_fn": datasets_fn,
            "num_features": num_features,
        },
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        logdir="./logs/example_3",
        num_epochs=8,
        verbose=True,
        distributed=False,
        check=True,
    )
Exemplo n.º 7
0
def main():
    epochs = 5
    num_class = 10
    output_path = './output/catalyst'

    # Use if you want to fix seed
    # catalyst.utils.set_global_seed(42)
    # catalyst.utils.prepare_cudnn(deterministic=True)

    model = get_model()
    train_loader, val_loader = get_loaders()
    loaders = {"train": train_loader, "valid": val_loader}

    optimizer, lr_scheduler = get_optimizer(model=model)
    criterion = get_criterion()

    runner = SupervisedRunner(device=catalyst.utils.get_device())
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=lr_scheduler,
        loaders=loaders,
        logdir=output_path,
        callbacks=[AccuracyCallback(num_classes=num_class, accuracy_args=[1])],
        num_epochs=epochs,
        main_metric="accuracy01",
        minimize_metric=False,
        fp16=None,
        verbose=True
    )
Exemplo n.º 8
0
    def test_mnist(self):
        utils.set_global_seed(42)
        x_train = np.random.random((100, 1, 28, 28)).astype(np.float32)
        y_train = _to_categorical(np.random.randint(10, size=(100, 1)),
                                  num_classes=10).astype(np.float32)
        x_valid = np.random.random((20, 1, 28, 28)).astype(np.float32)
        y_valid = _to_categorical(np.random.randint(10, size=(20, 1)),
                                  num_classes=10).astype(np.float32)

        x_train, y_train, x_valid, y_valid = \
            list(map(torch.tensor, [x_train, y_train, x_valid, y_valid]))

        bs = 32
        num_workers = 4
        data_transform = transforms.ToTensor()

        loaders = collections.OrderedDict()

        trainset = torch.utils.data.TensorDataset(x_train, y_train)
        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=bs,
                                                  shuffle=True,
                                                  num_workers=num_workers)

        validset = torch.utils.data.TensorDataset(x_valid, y_valid)
        validloader = torch.utils.data.DataLoader(validset,
                                                  batch_size=bs,
                                                  shuffle=False,
                                                  num_workers=num_workers)

        loaders["train"] = trainloader
        loaders["valid"] = validloader

        # experiment setup
        num_epochs = 3
        logdir = "./logs"

        # model, criterion, optimizer
        model = Net()
        criterion = nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters())

        # model runner
        runner = SupervisedRunner()

        # model training
        runner.train(model=model,
                     criterion=criterion,
                     optimizer=optimizer,
                     loaders=loaders,
                     logdir=logdir,
                     num_epochs=num_epochs,
                     verbose=False,
                     callbacks=[CheckpointCallback(save_n_best=3)])

        with open('./logs/checkpoints/_metrics.json') as f:
            metrics = json.load(f)
            self.assertTrue(
                metrics['train.3']['loss'] < metrics['train.1']['loss'])
            self.assertTrue(metrics['best']['loss'] < 0.35)
Exemplo n.º 9
0
def main(config):
    """
    Main code for training a classification/seg/classification+seg model.

    Args:
        config (dict): dictionary read from a yaml file
            i.e. script/configs/train.yml
    Returns:
        None
    """
    # setting up the train/val split with filenames
    seed = config["io_params"]["split_seed"]
    seed_everything(seed)
    exp = TrainSegExperiment2D(config)
    output_key = "logits"

    print(f"Seed: {seed}")

    runner = SupervisedRunner(output_key=output_key)

    runner.train(model=exp.model,
                 criterion=exp.criterion,
                 optimizer=exp.opt,
                 scheduler=exp.lr_scheduler,
                 loaders=exp.loaders,
                 callbacks=exp.cb_list,
                 **config["runner_params"])
    # Not saving plots if plot_params not specified in config
    if config.get("plot_params"):
        figs = plot_metrics(logdir=config["runner_params"]["logdir"],
                            metrics=config["plot_params"]["metrics"])
        save_figs(figs, save_dir=config["plot_params"]["save_dir"])
Exemplo n.º 10
0
def test_passenger_example(interior_car_task):
    model, task_flow = interior_car_task

    dataset = task_flow.get_dataset()

    train_dataset, val_dataset = torch_split_dataset(dataset, random_state=42)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
    nested_loaders = OrderedDict({'train': train_loader, 'valid': val_loader})

    print(model)

    runner = SupervisedRunner()
    criterion = task_flow.get_loss()
    callbacks = criterion.catalyst_callbacks()

    with tempfile.TemporaryDirectory() as tmp_dir:
        print(tmp_dir)
        runner.train(
            model=model,
            criterion=criterion,
            optimizer=optim.Adam(model.parameters(), lr=1e-3),
            loaders=nested_loaders,
            callbacks=callbacks,
            logdir=tmp_dir,
            num_epochs=20,
        )

    print_any_prediction(criterion, model, nested_loaders, runner)
Exemplo n.º 11
0
def main():
    args = get_parse()
    catalyst.utils.set_global_seed(args.seed)
    catalyst.utils.prepare_cudnn(deterministic=True)

    print('Make Data set data frame')
    df, class_names = make_df(data_root=args.data_rootdir)
    num_class = len(class_names)

    print('Get data loaders')
    loaders = get_train_valid_loaders(
        df=df,
        test_size=0.2,
        random_state=args.seed,
        data_root=args.data_rootdir,
        num_class=num_class,
        batch_size=args.batch_size,
        num_workers=args.num_workers,
        img_size=args.img_size
    )

    print('Make model')
    if args.frn:
        model = se_resnext50_32x4d_frn(pretrained=None)
        model.last_linear = nn.Linear(512 * 16, num_class)
    else:
        model = se_resnext50_32x4d()
        model.last_linear = nn.Linear(512 * 16, num_class)

    print('Get optimizer and scheduler')
    # learning rate for FRN is very very sensitive !!!
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=3e-5 if args.frn else 3e-4)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer=optimizer,
        T_max=args.num_epochs,
        eta_min=1e-6 if args.frn else 1e-5,
        last_epoch=-1
    )

    log_base = './output/cls'
    dir_name = f'seresnext50{"_frn" if args.frn else ""}_bs_{args.batch_size}_fp16_{args.fp16}'

    print('Start training...')
    runner = SupervisedRunner(device=catalyst.utils.get_device())
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        logdir=os.path.join(log_base, dir_name),
        callbacks=get_callbacks(num_classes=num_class),
        num_epochs=args.num_epochs,
        main_metric="accuracy01",
        minimize_metric=False,
        fp16=dict(opt_level="O1") if args.fp16 else None,
        verbose=False
    )
Exemplo n.º 12
0
    def __init__(self, config: EstimatorConfig, model):
        super().__init__(config)

        self.runner = SupervisedRunner()
        self.model_metrics = dict()
        self.model = model
        self.ddp = False
        self.set_device()
Exemplo n.º 13
0
def main(args=None):
    if args is None:
        args = argument_paser()

    # Set experiment id
    exp_id = str(uuid.uuid4())[:8] if args.exp_id is None else args.exp_id
    print(f'Experiment Id: {exp_id}', flush=True)

    # Fix seed
    torch.manual_seed(args.seed)

    # Config gpu
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # Prepare data
    dataset = MovingMnistDataset()
    train_index, valid_index = train_test_split(range(len(dataset)),
                                                test_size=0.3)
    train_loader = DataLoader(Subset(dataset, train_index),
                              batch_size=args.batch_size,
                              shuffle=True)
    valid_loader = DataLoader(Subset(dataset, valid_index),
                              batch_size=args.test_batch_size,
                              shuffle=False)
    loaders = {"train": train_loader, "valid": valid_loader}

    model = ConvLSTMEncoderPredictor(image_size=(64, 64)).to(device)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 betas=(0.9, 0.999))
    criterion = nn.MSELoss()

    runner = SupervisedRunner(device=catalyst.utils.get_device())
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=None,
        loaders=loaders,
        # model will be saved to {logdir}/checkpoints
        logdir=os.path.join(args.log_dir, exp_id),
        callbacks=[
            CheckpointCallback(save_n_best=args.n_saved),
            EarlyStoppingCallback(
                patience=args.es_patience,
                metric="loss",
                minimize=True,
            )
        ],
        num_epochs=args.epochs,
        main_metric="loss",
        minimize_metric=True,
        fp16=None,
        verbose=True)

    return exp_id, model
Exemplo n.º 14
0
def test_zero_period_validation_exception():
    old_stdout = sys.stdout
    sys.stdout = str_stdout = StringIO()

    # experiment_setup
    logdir = "./logs/periodic_loader"
    checkpoint = logdir + "/checkpoints"
    logfile = checkpoint + "/_metrics.json"

    # data
    num_samples, num_features = int(1e4), int(1e1)
    X = torch.rand(num_samples, num_features)
    y = torch.randint(0, 5, size=[num_samples])
    dataset = TensorDataset(X, y)
    loader = DataLoader(dataset, batch_size=32, num_workers=1)
    loaders = {
        "train": loader,
        "train_additional": loader,
        "valid": loader,
        "valid_additional": loader,
    }

    # model, criterion, optimizer, scheduler
    model = torch.nn.Linear(num_features, 5)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters())
    runner = SupervisedRunner()

    with pytest.raises(ValueError):
        runner.train(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            loaders=loaders,
            logdir=logdir,
            num_epochs=10,
            verbose=False,
            valid_loader="valid",
            valid_metric="loss",
            minimize_valid_metric=True,
            callbacks=[
                PeriodicLoaderCallback(
                    valid_loader_key="valid",
                    valid_metric_key="loss",
                    minimize=True,
                    train_additional=1,
                    train_not_exists=3,
                    valid=0,
                    valid_additional=2,
                    valid_not_exist=1,
                )
            ],
        )

    sys.stdout = old_stdout
    exp_output = str_stdout.getvalue()

    shutil.rmtree(logdir, ignore_errors=True)
def main():
    args = get_parse()
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    catalyst.utils.set_global_seed(args.seed)
    catalyst.utils.prepare_cudnn(deterministic=True)

    print('Make Data set data frame')
    df, class_names = make_df(data_root=args.data_rootdir)
    num_class = len(class_names)

    print('Get data loaders')
    loaders = get_train_valid_loaders(df=df,
                                      test_size=0.2,
                                      random_state=args.seed,
                                      data_root=args.data_rootdir,
                                      num_class=num_class,
                                      batch_size=args.batch_size,
                                      num_workers=args.num_workers,
                                      img_size=args.img_size)

    print('Make model')
    model = make_model(model_name=args.model,
                       num_classes=num_class,
                       pretrained=args.use_pretrain,
                       input_size=(args.img_size, args.img_size),
                       dropout_p=0.2)
    if args.frn:
        print(f'Use FRN + TLU instead of BN2d + ReLU')
        model = bnrelu_to_frn(model)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=1e-4 if args.frn else 3e-4)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer=optimizer,
        T_max=args.num_epochs,
        eta_min=3e-6 if args.frn else 1e-5,
        last_epoch=-1)

    log_base = './output/cls'
    dir_name = f'{args.model}_frn_{args.frn}_bs_{args.batch_size}_fp16_{args.fp16}_pretrain_{args.use_pretrain}'

    print('Start training...')
    runner = SupervisedRunner(device=catalyst.utils.get_device())
    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 logdir=os.path.join(log_base, dir_name),
                 callbacks=get_callbacks(num_classes=num_class),
                 num_epochs=args.num_epochs,
                 main_metric="accuracy01",
                 minimize_metric=False,
                 fp16=dict(opt_level="O1") if args.fp16 else None,
                 verbose=False)
Exemplo n.º 16
0
def test_epoch_increasing():
    class IncreaseCheckerCallback(Callback):
        def __init__(self, attribute: str, start_value: int = None):
            super().__init__(CallbackOrder.Internal)
            self.attr = attribute
            self.prev = start_value

        def on_epoch_start(self, runner):
            if not hasattr(runner, self.attr):
                raise ValueError(f"There is no {self.attr} in runner!")
            value = getattr(runner, self.attr)
            if self.prev is not None:
                # print(
                #     f">>> '{self.attr}': "
                #     f"previous - {self.prev}, "
                #     f"current - {value}"
                # )
                assert self.prev < value
            self.prev = value

    # experiment_setup
    logdir = "./logs/core_runner"

    # data
    num_samples, num_features = int(1e4), int(1e1)
    X = torch.rand(num_samples, num_features)
    y = torch.randint(0, 5, size=[num_samples])
    dataset = TensorDataset(X, y)
    loader = DataLoader(dataset, batch_size=32, num_workers=1)
    loaders = {"train": loader, "valid": loader}

    # model, criterion, optimizer, scheduler
    model = torch.nn.Linear(num_features, 5)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters())
    runner = SupervisedRunner()

    callbacks = [
        IncreaseCheckerCallback("epoch_step"),
        IncreaseCheckerCallback("batch_step"),
        IncreaseCheckerCallback("sample_step"),
    ]

    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        logdir=logdir,
        num_epochs=2,
        verbose=False,
        callbacks=callbacks,
    )

    shutil.rmtree(logdir, ignore_errors=True)
Exemplo n.º 17
0
def run_train_with_empty_loader() -> None:
    """
    In this function we push loader to be empty because we
    use batch_size > len(dataset) and drop_last=True.
    """
    dataset = DummyDataset()
    model = nn.Linear(in_features=dataset.features_dim, out_features=dataset.out_dim)
    loader = DataLoader(dataset=dataset, batch_size=len(dataset) + 1, drop_last=True)
    runner = SupervisedRunner()
    runner.train(
        loaders={"train": loader}, model=model, num_epochs=1, criterion=nn.BCEWithLogitsLoss(),
    )
Exemplo n.º 18
0
def train(
    in_csv: str,
    in_dir: str,
    model: str = 'resnet18',
    fold: int = None,
    n_epochs: int = 30,
    image_size: int = 224,
    augmentation: str = 'medium',
    learning_rate: float = 3e-3,
    n_milestones: int = 5,
    batch_size: int = 256,
    n_workers: int = 4,
    fast: bool = False,
    logdir: str = '.',
    verbose: bool = False
):
    model = get_model(model=model)
    loss = criterion.FocalLossMultiClass()  # CrossEntropyLoss
    lr_scaled = learning_rate * (batch_size / 256)  # lr linear scaling
    optimizer = torch.optim.Adam(model.parameters(), lr=lr_scaled)
    scheduler = schedulers.MultiStepLR(
        optimizer,
        milestones=[5, 10, 20, 30, 40],
        gamma=0.3
    )

    runner = SupervisedRunner()
    runner.train(
        model=model,
        criterion=loss,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=get_dataloaders(
            in_csv=in_csv,
            in_dir=in_dir,
            stages=['train', 'valid'],
            fold=fold,
            batch_size=batch_size,
            n_workers=n_workers,
            image_size=(image_size, image_size),
            augmentation=augmentation,
            fast=fast
        ),
        callbacks=[
            AccuracyCallback(accuracy_args=[1]),
            BinaryAUCCallback()
        ],
        logdir=logdir,
        num_epochs=n_epochs,
        verbose=verbose
    )
Exemplo n.º 19
0
def test_onecyle():
    # experiment_setup
    logdir = "./logs/core_runner"

    # data
    num_samples, num_features = int(1e4), int(1e1)
    X = torch.rand(num_samples, num_features)
    y = torch.randint(0, 5, size=[num_samples])
    dataset = TensorDataset(X, y)
    loader = DataLoader(dataset, batch_size=32, num_workers=1)
    loaders = {
        "train": loader,
        "valid": loader,
    }

    # number of steps, epochs, LR range, initial LR and warmup_fraction
    num_steps = 6
    epochs = 8
    min_lr = 1e-4
    max_lr = 2e-3
    init_lr = 1e-3
    warmup_fraction = 0.5

    # model, criterion, optimizer, scheduler
    model = torch.nn.Linear(num_features, 5)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters())
    scheduler = OneCycleLRWithWarmup(
        optimizer,
        num_steps=num_steps,
        lr_range=(max_lr, min_lr),
        init_lr=init_lr,
        warmup_fraction=warmup_fraction,
    )

    runner = SupervisedRunner()

    callbacks = [LRCheckerCallback(init_lr, min_lr)]

    # Single stage
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        logdir=logdir,
        num_epochs=epochs,
        verbose=False,
        callbacks=callbacks,
    )
Exemplo n.º 20
0
def infer(
    config_path,
    log_dir
    ):
    """
        Inference:
            1. loaders
            2. model
    """

    # quering params from experiment config
    batch_size = 116


    test_dataset = LipreadingDataset(
        "test")

    loaders = {
        "infer": DataLoader(
            test_dataset,
            batch_size=batch_size,
            shuffle=True,
            num_workers=0,
            drop_last=False,)
    }

    model = LipNext()

    device = "cuda" if torch.cuda.is_available() else "cpu"
    runner = SupervisedRunner(device=device)

    runner.infer(
        model=model,
        loaders=loaders,
        callbacks=[
            AccuracyCallback(accuracy_args=[1, 3]),
            InferenceCallback(),
            CheckpointCallbackV2(
                config_path=config_path,
                resume=("/home/dmitry.klimenkov/Documents/projects/visper_pytorch/logdir"
                    "/Mobi-VSR-5W-mixed_aligned_patience5_sometests/checkpoints/train.0.35.8553.pth"))
            # NegativeMiningCallback()
        ],
        state_kwargs={
            "log_dir": log_dir
        },
        check=True
    )
Exemplo n.º 21
0
def test_evaluation_loader_custom_model() -> None:
    """
    Test if evaluate loader works with custom model.
    """
    dataset = DummyDataset()
    model = nn.Linear(in_features=dataset.features_dim,
                      out_features=dataset.out_dim)
    loader = DataLoader(dataset=dataset, batch_size=1)
    callbacks = [
        dl.AccuracyCallback(input_key="logits",
                            target_key="targets",
                            topk=(1, ))
    ]
    runner = SupervisedRunner()

    runner.evaluate_loader(loader=loader, callbacks=callbacks, model=model)
Exemplo n.º 22
0
def run(name: str = None,
        config: dict = None,
        device: str = None,
        check: bool = False) -> dict:
    config = config or experiment_config
    device = device or utils.get_device()
    print(f"device: {device}")

    utils.set_global_seed(SEED)

    # inititalize weigths & biases
    name = name or '_'.join(
        filter(None,
               [experiment_name, f"{datetime.datetime.now():%Y-%m-%d-%S}"]))

    # convert parquet ot zip
    parquet_to_images(TRAIN, ZIP_TRAIN_FILE, SIZE)
    parquet_to_images(TEST, ZIP_TEST_FILE, SIZE)

    # run experiment
    runner = SupervisedRunner(
        device=device,
        input_key="images",
        output_key=["logit_" + c for c in output_classes.keys()],
        input_target_key=list(output_classes.keys()),
    )
    experiment = Experiment(config)
    runner.run_experiment(experiment, check=check)

    return {
        'runner': runner,
        'experiment': experiment,
        'config': config,
    }
Exemplo n.º 23
0
def test_evaluation_loader_empty_model() -> None:
    """
    Test if there is no model was given, assertion raises.
    """
    with pytest.raises(AssertionError) as record:
        dataset = DummyDataset()
        loader = DataLoader(dataset=dataset, batch_size=1)
        callbacks = [
            dl.AccuracyCallback(input_key="logits",
                                target_key="targets",
                                topk=(1, ))
        ]
        runner = SupervisedRunner()
        runner.evaluate_loader(loader=loader, callbacks=callbacks, model=None)
        if not record:
            pytest.fail("Expected assertion bacuase model is empty!")
Exemplo n.º 24
0
def run(name: str = None,
        config: dict = None,
        device: str = None,
        check: bool = False) -> dict:
    config = config or experiment_config
    device = device or utils.get_device()
    print(f"device: {device}")

    utils.set_global_seed(SEED)

    config['monitoring_params']['name'] = EXPERIMENT_NAME

    # convert parquet ot zip
    parquet_to_images(TRAIN, ZIP_TRAIN_FILE, SIZE)
    parquet_to_images(TEST, ZIP_TEST_FILE, SIZE)

    # run experiment
    runner = SupervisedRunner(
        device=device,
        input_key="images",
        output_key=["logit_" + c for c in output_classes.keys()],
        input_target_key=list(output_classes.keys()),
    )
    experiment = Experiment(config)
    runner.run_experiment(experiment, check=check)

    return {
        'runner': runner,
        'experiment': experiment,
        'config': config,
    }
Exemplo n.º 25
0
def do_train(data, log, log_dir):
    model = Net(num_features=2)
    runner = SupervisedRunner()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
    criterion = MyLoss()

    log_batch(model, data, log, "init")

    log.debug("Starting training")
    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 loaders=data,
                 logdir=f"{log_dir}/run",
                 load_best_on_end=True,
                 num_epochs=1)

    log_batch(model, data, log, "exit")
Exemplo n.º 26
0
def make_runner():
    runner = SupervisedRunner(
        input_key=(
            'input_ids',
            'attention_mask',
            # 'token_type_ids',
        ),
        device=device,
    )
    return runner
Exemplo n.º 27
0
def test_save_model_grads():
    """
    Tests a feature of `OptimizerCallback` for saving model gradients
    """
    logdir = "./logs"
    dataset_root = "./dataset"
    loaders = _get_loaders(root=dataset_root, batch_size=4, num_workers=1)
    images, _ = next(iter(loaders["train"]))
    _, c, h, w = images.shape
    input_shape = (c, h, w)

    model = _SimpleNet(input_shape)
    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters())

    criterion_callback = CriterionCallback()
    optimizer_callback = OptimizerCallback()
    save_model_grads_callback = SaveModelGradsCallback()
    prefix = save_model_grads_callback.grad_norm_prefix
    test_callback = _OnBatchEndCheckGradsCallback(prefix)

    callbacks = collections.OrderedDict(
        loss=criterion_callback,
        optimizer=optimizer_callback,
        grad_norm=save_model_grads_callback,
        test_callback=test_callback,
    )

    runner = SupervisedRunner()
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        logdir=logdir,
        callbacks=callbacks,
        check=True,
        verbose=True,
    )

    shutil.rmtree(logdir)
    shutil.rmtree(dataset_root)
Exemplo n.º 28
0
def main(config):
    """
    Main code for training a classification model.

    Args:
        config (dict): dictionary read from a yaml file
            i.e. experiments/finetune_classification.yml
    Returns:
        None
    """
    # setting up the train/val split with filenames
    seed = config["io_params"]["split_seed"]
    seed_everything(seed)
    mode = config["mode"].lower()
    assert mode in ["classification", "segmentation", "both"], \
        "The `mode` must be one of ['classification', 'segmentation', 'both']."
    if mode == "classification":
        raise NotImplementedError
    elif mode == "segmentation":
        if config["dim"] == 2:
            exp = TrainSegExperiment2D(config)
        elif config["dim"] == 3:
            exp = TrainSegExperiment(config)
        output_key = "logits"
    elif mode == "both":
        if config["dim"] == 2:
            exp = TrainClfSegExperiment2D(config)
        elif config["dim"] == 3:
            exp = TrainClfSegExperiment3D(config)
        output_key = ["seg_logits", "clf_logits"]

    print(f"Seed: {seed}\nMode: {mode}")

    runner = SupervisedRunner(output_key=output_key)

    runner.train(model=exp.model,
                 criterion=exp.criterion,
                 optimizer=exp.opt,
                 scheduler=exp.lr_scheduler,
                 loaders=exp.loaders,
                 callbacks=exp.cb_list,
                 **config["runner_params"])
Exemplo n.º 29
0
def get_runner(config: dict, device: torch.device):
    if config.get("runner") is not None:
        if config["runner"] == "SAMRunner":
            return SAMRunner(device=device)
        else:
            raise NotImplementedError
    else:
        return SupervisedRunner(
            device=device,
            input_key=config["globals"]["input_key"],
            input_target_key=config["globals"]["input_target_key"])
Exemplo n.º 30
0
def train(num_epochs, model, loaders, logdir):
    criterion = torch.nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=2)

    callbacks = [F1ScoreCallback()]

    # model runner
    runner = SupervisedRunner()

    # model training
    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 logdir=logdir,
                 num_epochs=num_epochs,
                 callbacks=callbacks,
                 verbose=True)