Пример #1
0
def test_disabling_loss_for_train():
    old_stdout = sys.stdout
    sys.stdout = str_stdout = StringIO()

    # experiment_setup
    logdir = "./logs/control_flow"
    checkpoint = logdir + "/checkpoints"
    logfile = checkpoint + "/_metrics.json"

    # data
    num_samples, num_features = int(1e4), int(1e1)
    X = torch.rand(num_samples, num_features)
    y = torch.randint(0, 5, size=[num_samples])
    dataset = TensorDataset(X, y)
    loader = DataLoader(dataset, batch_size=32, num_workers=1)
    loaders = {"train": loader, "valid": loader}

    # model, criterion, optimizer, scheduler
    model = torch.nn.Linear(num_features, 5)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters())
    runner = dl.SupervisedRunner()

    n_epochs = 5
    # first stage
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        logdir=logdir,
        num_epochs=n_epochs,
        verbose=False,
        main_metric="accuracy01",
        callbacks=[
            dl.ControlFlowCallback(dl.CriterionCallback(),
                                   ignore_loaders=["train"]),
            dl.AccuracyCallback(accuracy_args=[1, 3, 5]),
            dl.CheckRunCallback(num_epoch_steps=n_epochs),
        ],
    )

    sys.stdout = old_stdout
    exp_output = str_stdout.getvalue()

    assert len(re.findall(r"\(train\): loss", exp_output)) == 5
    assert len(re.findall(r"\(valid\): loss", exp_output)) == 0
    assert len(re.findall(r".*/train\.\d\.pth", exp_output)) == 1

    assert os.path.isfile(logfile)
    assert os.path.isfile(checkpoint + "/best.pth")
    assert os.path.isfile(checkpoint + "/best_full.pth")
    assert os.path.isfile(checkpoint + "/last.pth")
    assert os.path.isfile(checkpoint + "/last_full.pth")
    pth_files = [
        file for file in os.listdir(checkpoint) if file.endswith(".pth")
    ]
    assert len(pth_files) == 6

    shutil.rmtree(logdir, ignore_errors=True)
Пример #2
0
def test_metric_learning_pipeline():
    """
    Test if classification pipeline can run and compute metrics.
    In this test we check that LoaderMetricCallback works with
    CMCMetric (ICallbackLoaderMetric).
    """
    with TemporaryDirectory() as tmp_dir:
        dataset_train = datasets.MnistMLDataset(root=tmp_dir, download=True)
        sampler = data.BalanceBatchSampler(labels=dataset_train.get_labels(), p=5, k=10)
        train_loader = DataLoader(
            dataset=dataset_train, sampler=sampler, batch_size=sampler.batch_size,
        )
        dataset_val = datasets.MnistQGDataset(root=tmp_dir, transform=None, gallery_fraq=0.2)
        val_loader = DataLoader(dataset=dataset_val, batch_size=1024)

        model = DummyModel(num_features=28 * 28, num_classes=NUM_CLASSES)
        optimizer = Adam(model.parameters(), lr=0.001)

        sampler_inbatch = data.HardTripletsSampler(norm_required=False)
        criterion = nn.TripletMarginLossWithSampler(margin=0.5, sampler_inbatch=sampler_inbatch)

        callbacks = OrderedDict(
            {
                "cmc": dl.ControlFlowCallback(
                    LoaderMetricCallback(
                        CMCMetric(
                            topk_args=[1],
                            embeddings_key="embeddings",
                            labels_key="targets",
                            is_query_key="is_query",
                        ),
                        input_key=["embeddings", "is_query"],
                        target_key=["targets"],
                    ),
                    loaders="valid",
                ),
                "control": dl.PeriodicLoaderCallback(
                    valid_loader_key="valid", valid_metric_key="cmc", valid=2
                ),
            }
        )

        runner = CustomRunner(input_key="features", output_key="embeddings")
        runner.train(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            callbacks=callbacks,
            loaders=OrderedDict({"train": train_loader, "valid": val_loader}),
            verbose=False,
            valid_loader="valid",
            num_epochs=4,
        )
        assert "cmc01" in runner.loader_metrics
def train_experiment(device, engine=None):
    with TemporaryDirectory() as logdir:
        from catalyst import utils

        utils.set_global_seed(RANDOM_STATE)
        # 1. train, valid and test loaders
        transforms = Compose([ToTensor(), Normalize((0.1307, ), (0.3081, ))])

        train_data = MNIST(os.getcwd(),
                           train=True,
                           download=True,
                           transform=transforms)
        train_labels = train_data.targets.cpu().numpy().tolist()
        train_sampler = data.BatchBalanceClassSampler(train_labels,
                                                      num_classes=10,
                                                      num_samples=4)
        train_loader = DataLoader(train_data, batch_sampler=train_sampler)

        valid_dataset = MNIST(root=os.getcwd(),
                              transform=transforms,
                              train=False,
                              download=True)
        valid_loader = DataLoader(dataset=valid_dataset, batch_size=32)

        test_dataset = MNIST(root=os.getcwd(),
                             transform=transforms,
                             train=False,
                             download=True)
        test_loader = DataLoader(dataset=test_dataset, batch_size=32)

        # 2. model and optimizer
        model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 16),
                              nn.LeakyReLU(inplace=True))
        optimizer = Adam(model.parameters(), lr=LR)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2])

        # 3. criterion with triplets sampling
        sampler_inbatch = data.HardTripletsSampler(norm_required=False)
        criterion = nn.TripletMarginLossWithSampler(
            margin=0.5, sampler_inbatch=sampler_inbatch)

        # 4. training with catalyst Runner
        class CustomRunner(dl.SupervisedRunner):
            def handle_batch(self, batch) -> None:
                images, targets = batch["features"].float(
                ), batch["targets"].long()
                features = self.model(images)
                self.batch = {
                    "embeddings": features,
                    "targets": targets,
                }

        callbacks = [
            dl.ControlFlowCallback(
                dl.CriterionCallback(input_key="embeddings",
                                     target_key="targets",
                                     metric_key="loss"),
                loaders="train",
            ),
            dl.SklearnModelCallback(
                feature_key="embeddings",
                target_key="targets",
                train_loader="train",
                valid_loaders=["valid", "infer"],
                model_fn=RandomForestClassifier,
                predict_method="predict_proba",
                predict_key="sklearn_predict",
                random_state=RANDOM_STATE,
                n_estimators=50,
            ),
            dl.ControlFlowCallback(
                dl.AccuracyCallback(target_key="targets",
                                    input_key="sklearn_predict",
                                    topk_args=(1, 3)),
                loaders=["valid", "infer"],
            ),
        ]

        runner = CustomRunner(input_key="features", output_key="embeddings")
        runner.train(
            engine=engine or dl.DeviceEngine(device),
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            callbacks=callbacks,
            loaders={
                "train": train_loader,
                "valid": valid_loader,
                "infer": test_loader
            },
            verbose=False,
            valid_loader="valid",
            valid_metric="accuracy",
            minimize_valid_metric=False,
            num_epochs=TRAIN_EPOCH,
            logdir=logdir,
        )

        valid_path = Path(logdir) / "logs/infer.csv"
        best_accuracy = max(
            float(row["accuracy"]) for row in read_csv(valid_path))

        assert best_accuracy > 0.8
Пример #4
0
                                     pair_transform=False),
        download=True,
    )

    train_loader = DataLoader(train_data,
                              batch_size=batch_size,
                              shuffle=True,
                              pin_memory=True)
    valid_loader = DataLoader(test_data,
                              batch_size=batch_size,
                              pin_memory=True)

    callbacks = [
        dl.ControlFlowCallback(
            dl.CriterionCallback(input_key="out_1",
                                 target_key="out_2",
                                 metric_key="loss"),
            loaders="train",
        ),
        dl.SklearnModelCallback(
            feature_key="embeddings",
            target_key="targets",
            train_loader="train",
            valid_loaders="valid",
            model_fn=LogisticRegression,
            predict_key="sklearn_predict",
            predict_method="predict_proba",
        ),
        dl.OptimizerCallback(metric_key="loss"),
        dl.ControlFlowCallback(
            dl.AccuracyCallback(target_key="targets",
                                input_key="sklearn_predict",
Пример #5
0
def main(args):
    wandb.init(project="teacher-pruning", config=vars(args))
    set_global_seed(42)
    # dataloader initialization
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    train_dataset = Wrp(
        datasets.CIFAR10(root=os.getcwd(),
                         train=True,
                         transform=transform_train,
                         download=True))
    valid_dataset = Wrp(
        datasets.CIFAR10(root=os.getcwd(),
                         train=False,
                         transform=transform_test))
    train_dataloader = DataLoader(dataset=train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=2)
    valid_dataloader = DataLoader(dataset=valid_dataset,
                                  batch_size=128,
                                  num_workers=2)
    loaders = {
        "train": train_dataloader,
        "valid": valid_dataloader,
    }
    # model initialization
    model = PreActResNet18()
    model.fc = nn.Linear(512, 10)
    if args.teacher_model is not None:
        is_kd = True
        teacher_model = NAME2MODEL[args.teacher_model]()
        load_model_from_path(model=teacher_model, path=args.teacher_path)
        model = {
            "student": model,
            "teacher": teacher_model,
        }
        output_hiddens = args.beta is None
        is_kd_on_hiddens = output_hiddens
        runner = KDRunner(device=args.device, output_hiddens=output_hiddens)
        parameters = model["student"].parameters()
    else:
        is_kd = False
        runner = dl.SupervisedRunner(device=args.device)
        parameters = model.parameters()
    # optimizer
    optimizer_cls = NAME2OPTIM[args.optimizer]
    optimizer_kwargs = {"params": parameters, "lr": args.lr}
    if args.optimizer == "sgd":
        optimizer_kwargs["momentum"] = args.momentum
    else:
        optimizer_kwargs["betas"] = (args.beta1, args.beta2)
    optimizer = optimizer_cls(**optimizer_kwargs)
    scheduler = MultiStepLR(optimizer, milestones=[80, 120], gamma=args.gamma)
    logdir = f"logs/{wandb.run.name}"
    # callbacks
    callbacks = [dl.AccuracyCallback(num_classes=10), WandbCallback()]
    if is_kd:
        metrics = {}
        callbacks.append(dl.CriterionCallback(output_key="cls_loss"))
        callbacks.append(DiffOutputCallback())
        coefs = get_loss_coefs(args.alpha, args.beta)
        metrics["cls_loss"] = coefs[0]
        metrics["diff_output_loss"] = coefs[1]
        if is_kd_on_hiddens:
            callbacks.append(DiffHiddenCallback())
            metrics["diff_hidden_loss"] = coefs[2]

        aggregator_callback = dl.MetricAggregationCallback(prefix="loss",
                                                           metrics=metrics,
                                                           mode="weighted_sum")
        wrapped_agg_callback = dl.ControlFlowCallback(aggregator_callback,
                                                      loaders=["train"])
        callbacks.append(wrapped_agg_callback)

    runner.train(
        model=model,
        optimizer=optimizer,
        scheduler=scheduler,
        criterion=nn.CrossEntropyLoss(),
        loaders=loaders,
        callbacks=callbacks,
        num_epochs=args.epoch,
        logdir=logdir,
        verbose=True,
    )
def run_ml_pipeline(sampler_inbatch: data.IInbatchTripletSampler) -> float:
    """
    Full metric learning pipeline, including train and val.

    This function is also used as minimal example in README.md, section name:
    'CV - MNIST with Metric Learning'.

    Args:
        sampler_inbatch: sampler to forming triplets

    Returns:
        best metric value
    """
    # 1. train and valid datasets
    dataset_root = "./data"
    transforms = t.Compose([t.ToTensor(), t.Normalize((0.1307, ), (0.3081, ))])

    dataset_train = datasets.MnistMLDataset(
        root=dataset_root,
        train=True,
        download=True,
        transform=transforms,
    )
    sampler = data.BalanceBatchSampler(labels=dataset_train.get_labels(),
                                       p=5,
                                       k=10)
    train_loader = DataLoader(dataset=dataset_train,
                              sampler=sampler,
                              batch_size=sampler.batch_size)

    dataset_val = datasets.MnistQGDataset(root=dataset_root,
                                          transform=transforms,
                                          gallery_fraq=0.2)
    val_loader = DataLoader(dataset=dataset_val, batch_size=1024)

    # 2. model and optimizer
    model = models.SimpleConv(features_dim=16)
    optimizer = Adam(model.parameters(), lr=0.0005)

    # 3. criterion with triplets sampling
    criterion = nn.TripletMarginLossWithSampler(
        margin=0.5, sampler_inbatch=sampler_inbatch)

    # 4. training with catalyst Runner
    callbacks = [
        dl.ControlFlowCallback(dl.CriterionCallback(), loaders="train"),
        dl.ControlFlowCallback(dl.CMCScoreCallback(topk_args=[1]),
                               loaders="valid"),
        dl.PeriodicLoaderCallback(valid=100),
    ]

    runner = dl.SupervisedRunner(device=utils.get_device())
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        callbacks=callbacks,
        loaders={
            "train": train_loader,
            "valid": val_loader
        },
        minimize_metric=False,
        verbose=True,
        valid_loader="valid",
        num_epochs=100,
        main_metric="cmc01",
    )
    return runner.best_valid_metrics["cmc01"]
def train_experiment(device, engine=None):
    with TemporaryDirectory() as logdir:
        from catalyst import utils

        utils.set_global_seed(RANDOM_STATE)
        # 1. generate data
        num_samples, num_features, num_classes = int(1e4), int(30), 3
        X, y = make_classification(
            n_samples=num_samples,
            n_features=num_features,
            n_informative=num_features,
            n_repeated=0,
            n_redundant=0,
            n_classes=num_classes,
            n_clusters_per_class=1,
        )
        X, y = torch.tensor(X), torch.tensor(y)
        dataset = TensorDataset(X, y)
        loader = DataLoader(dataset,
                            batch_size=64,
                            num_workers=1,
                            shuffle=True)

        # 2. model, optimizer and scheduler
        hidden_size, out_features = 20, 16
        model = nn.Sequential(nn.Linear(num_features, hidden_size), nn.ReLU(),
                              nn.Linear(hidden_size, out_features))
        optimizer = Adam(model.parameters(), lr=LR)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2])

        # 3. criterion with triplets sampling
        sampler_inbatch = data.HardTripletsSampler(norm_required=False)
        criterion = nn.TripletMarginLossWithSampler(
            margin=0.5, sampler_inbatch=sampler_inbatch)

        # 4. training with catalyst Runner
        class CustomRunner(dl.SupervisedRunner):
            def handle_batch(self, batch) -> None:
                features, targets = batch["features"].float(
                ), batch["targets"].long()
                embeddings = self.model(features)
                self.batch = {
                    "embeddings": embeddings,
                    "targets": targets,
                }

        callbacks = [
            dl.SklearnModelCallback(
                feature_key="embeddings",
                target_key="targets",
                train_loader="train",
                valid_loaders="valid",
                model_fn=RandomForestClassifier,
                predict_method="predict_proba",
                predict_key="sklearn_predict",
                random_state=RANDOM_STATE,
                n_estimators=100,
            ),
            dl.ControlFlowCallback(
                dl.AccuracyCallback(target_key="targets",
                                    input_key="sklearn_predict",
                                    topk_args=(1, 3)),
                loaders="valid",
            ),
        ]

        runner = CustomRunner(input_key="features", output_key="embeddings")
        runner.train(
            engine=engine or dl.DeviceEngine(device),
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            callbacks=callbacks,
            scheduler=scheduler,
            loaders={
                "train": loader,
                "valid": loader
            },
            verbose=False,
            valid_loader="valid",
            valid_metric="accuracy",
            minimize_valid_metric=False,
            num_epochs=TRAIN_EPOCH,
            logdir=logdir,
        )

        valid_path = Path(logdir) / "logs/valid.csv"
        best_accuracy = max(
            float(row["accuracy"]) for row in read_csv(valid_path))

        assert best_accuracy > 0.9
Пример #8
0
        dl.CriterionCallback(input_key="projection_left",
                             target_key="projection_right",
                             metric_key="loss"),
        dl.SklearnModelCallback(
            feature_key="embedding_origin",
            target_key="target",
            train_loader="train",
            valid_loaders="valid",
            model_fn=LogisticRegression,
            predict_key="sklearn_predict",
            predict_method="predict_proba",
        ),
        dl.OptimizerCallback(metric_key="loss"),
        dl.ControlFlowCallback(
            dl.AccuracyCallback(target_key="target",
                                input_key="sklearn_predict",
                                topk_args=(1, 3)),
            loaders="valid",
        ),
    ]

    runner = SelfSupervisedRunner()

    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        callbacks=callbacks,
        loaders=get_loaders(args.dataset, args.batch_size, args.num_workers),
        verbose=True,
        logdir=args.logdir,
        valid_loader="train",
Пример #9
0
def test_reid_pipeline():
    """This test checks that reid pipeline runs and compute metrics with ReidCMCScoreCallback"""
    with TemporaryDirectory() as logdir:

        # 1. train and valid loaders
        transforms = Compose([ToTensor(), Normalize((0.1307, ), (0.3081, ))])

        train_dataset = MnistMLDataset(root=os.getcwd(),
                                       download=True,
                                       transform=transforms)
        sampler = data.BatchBalanceClassSampler(
            labels=train_dataset.get_labels(),
            num_classes=3,
            num_samples=10,
            num_batches=20)
        train_loader = DataLoader(dataset=train_dataset,
                                  batch_sampler=sampler,
                                  num_workers=0)

        valid_dataset = MnistReIDQGDataset(root=os.getcwd(),
                                           transform=transforms,
                                           gallery_fraq=0.2)
        valid_loader = DataLoader(dataset=valid_dataset, batch_size=1024)

        # 2. model and optimizer
        model = models.MnistSimpleNet(out_features=16)
        optimizer = Adam(model.parameters(), lr=0.001)

        # 3. criterion with triplets sampling
        sampler_inbatch = data.AllTripletsSampler(max_output_triplets=1000)
        criterion = nn.TripletMarginLossWithSampler(
            margin=0.5, sampler_inbatch=sampler_inbatch)

        # 4. training with catalyst Runner
        callbacks = [
            dl.ControlFlowCallback(
                dl.CriterionCallback(input_key="embeddings",
                                     target_key="targets",
                                     metric_key="loss"),
                loaders="train",
            ),
            dl.ControlFlowCallback(
                dl.ReidCMCScoreCallback(
                    embeddings_key="embeddings",
                    pids_key="targets",
                    cids_key="cids",
                    is_query_key="is_query",
                    topk_args=[1],
                ),
                loaders="valid",
            ),
            dl.PeriodicLoaderCallback(valid_loader_key="valid",
                                      valid_metric_key="cmc01",
                                      minimize=False,
                                      valid=2),
        ]

        runner = ReIDCustomRunner()
        runner.train(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            callbacks=callbacks,
            loaders=OrderedDict({
                "train": train_loader,
                "valid": valid_loader
            }),
            verbose=False,
            logdir=logdir,
            valid_loader="valid",
            valid_metric="cmc01",
            minimize_valid_metric=False,
            num_epochs=10,
        )
        assert "cmc01" in runner.loader_metrics
        assert runner.loader_metrics["cmc01"] > 0.7
Пример #10
0
def train_experiment(device, engine=None):
    with TemporaryDirectory() as logdir:

        # 1. train and valid loaders
        transforms = Compose([ToTensor(), Normalize((0.1307, ), (0.3081, ))])

        train_dataset = datasets.MnistMLDataset(root=os.getcwd(),
                                                download=True,
                                                transform=transforms)
        sampler = data.BatchBalanceClassSampler(
            labels=train_dataset.get_labels(),
            num_classes=5,
            num_samples=10,
            num_batches=10)
        train_loader = DataLoader(dataset=train_dataset, batch_sampler=sampler)

        valid_dataset = datasets.MnistQGDataset(root=os.getcwd(),
                                                transform=transforms,
                                                gallery_fraq=0.2)
        valid_loader = DataLoader(dataset=valid_dataset, batch_size=1024)

        # 2. model and optimizer
        model = models.MnistSimpleNet(out_features=16)
        optimizer = Adam(model.parameters(), lr=0.001)

        # 3. criterion with triplets sampling
        sampler_inbatch = data.HardTripletsSampler(norm_required=False)
        criterion = nn.TripletMarginLossWithSampler(
            margin=0.5, sampler_inbatch=sampler_inbatch)

        # 4. training with catalyst Runner
        callbacks = [
            dl.ControlFlowCallback(
                dl.CriterionCallback(input_key="embeddings",
                                     target_key="targets",
                                     metric_key="loss"),
                loaders="train",
            ),
            dl.ControlFlowCallback(
                dl.CMCScoreCallback(
                    embeddings_key="embeddings",
                    labels_key="targets",
                    is_query_key="is_query",
                    topk_args=[1],
                ),
                loaders="valid",
            ),
            dl.PeriodicLoaderCallback(valid_loader_key="valid",
                                      valid_metric_key="cmc01",
                                      minimize=False,
                                      valid=2),
        ]

        runner = CustomRunner(input_key="features", output_key="embeddings")
        runner.train(
            engine=engine or dl.DeviceEngine(device),
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            callbacks=callbacks,
            loaders={
                "train": train_loader,
                "valid": valid_loader
            },
            verbose=False,
            logdir=logdir,
            valid_loader="valid",
            valid_metric="cmc01",
            minimize_valid_metric=False,
            num_epochs=2,
        )
Пример #11
0
def train_experiment(device, engine=None):

    with TemporaryDirectory() as logdir:

        # 1. data and transforms

        transforms = Compose([
            torchvision.transforms.ToPILImage(),
            torchvision.transforms.RandomCrop((28, 28)),
            torchvision.transforms.RandomVerticalFlip(),
            torchvision.transforms.RandomHorizontalFlip(),
            torchvision.transforms.ToTensor(),
            Normalize((0.1307, ), (0.3081, )),
        ])

        transform_original = Compose([
            ToTensor(),
            Normalize((0.1307, ), (0.3081, )),
        ])

        mnist = MNIST("./logdir", train=True, download=True, transform=None)
        contrastive_mnist = SelfSupervisedDatasetWrapper(
            mnist,
            transforms=transforms,
            transform_original=transform_original)
        train_loader = torch.utils.data.DataLoader(contrastive_mnist,
                                                   batch_size=BATCH_SIZE)

        mnist_valid = MNIST("./logdir",
                            train=False,
                            download=True,
                            transform=None)
        contrastive_valid = SelfSupervisedDatasetWrapper(
            mnist_valid,
            transforms=transforms,
            transform_original=transform_original)
        valid_loader = torch.utils.data.DataLoader(contrastive_valid,
                                                   batch_size=BATCH_SIZE)

        # 2. model and optimizer
        encoder = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 16),
                                nn.LeakyReLU(inplace=True))
        projection_head = nn.Sequential(
            nn.Linear(16, 16, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(16, 16, bias=True),
        )

        class ContrastiveModel(torch.nn.Module):
            def __init__(self, model, encoder):
                super(ContrastiveModel, self).__init__()
                self.model = model
                self.encoder = encoder

            def forward(self, x):
                emb = self.encoder(x)
                projection = self.model(emb)
                return emb, projection

        model = ContrastiveModel(model=projection_head, encoder=encoder)

        optimizer = Adam(model.parameters(), lr=LR)

        # 3. criterion with triplets sampling
        criterion = NTXentLoss(tau=0.1)

        callbacks = [
            dl.ControlFlowCallback(
                dl.CriterionCallback(input_key="projection_left",
                                     target_key="projection_right",
                                     metric_key="loss"),
                loaders="train",
            ),
            dl.SklearnModelCallback(
                feature_key="embedding_left",
                target_key="target",
                train_loader="train",
                valid_loaders="valid",
                model_fn=RandomForestClassifier,
                predict_method="predict_proba",
                predict_key="sklearn_predict",
                random_state=RANDOM_STATE,
                n_estimators=50,
            ),
            dl.ControlFlowCallback(
                dl.AccuracyCallback(target_key="target",
                                    input_key="sklearn_predict",
                                    topk_args=(1, 3)),
                loaders="valid",
            ),
        ]

        runner = dl.SelfSupervisedRunner()

        logdir = "./logdir"
        runner.train(
            model=model,
            engine=engine or dl.DeviceEngine(device),
            criterion=criterion,
            optimizer=optimizer,
            callbacks=callbacks,
            loaders={
                "train": train_loader,
                "valid": valid_loader
            },
            verbose=False,
            logdir=logdir,
            valid_loader="train",
            valid_metric="loss",
            minimize_valid_metric=True,
            num_epochs=TRAIN_EPOCH,
        )

        valid_path = Path(logdir) / "logs/valid.csv"
        best_accuracy = max(
            float(row["accuracy"]) for row in read_csv(valid_path)
            if row["accuracy"] != "accuracy")

        assert best_accuracy > 0.6
Пример #12
0
        def forward(self, x):
            emb = self.encoder(x)
            projection = self.model(emb)
            return emb, projection

    model = ContrastiveModel(projection_head, encoder)
    # 2. model and optimizer
    optimizer = Adam(model.parameters(), lr=args.learning_rate)

    # 3. criterion with triplets sampling
    criterion = NTXentLoss(tau=args.temperature)

    callbacks = [
        dl.ControlFlowCallback(
            dl.CriterionCallback(input_key="projection_left",
                                 target_key="projection_right",
                                 metric_key="loss"),
            loaders="train",
        )
    ]

    runner = SelfSupervisedRunner()

    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        callbacks=callbacks,
        loaders={
            "train": train_loader,
            # "valid": valid_loader
        },