Пример #1
0
def main(args):
    if args.wandb:
        import wandb
        wandb.init()
        logdir = args.logdir + "/" + wandb.run.name
    else:
        logdir = args.logdir
    set_global_seed(args.seed)
    datasets = load_dataset(args.dataset)

    tokenizer = AutoTokenizer.from_pretrained(args.model)
    datasets = datasets.map(
        lambda e: tokenizer(
            e["text"], truncation=True, padding="max_length", max_length=128),
        batched=True,
    )
    datasets = datasets.map(lambda e: {"labels": e["label"]}, batched=True)
    datasets.set_format(
        type="torch",
        columns=["input_ids", "token_type_ids", "attention_mask", "labels"],
    )
    loaders = {
        "train":
        DataLoader(datasets["train"], batch_size=args.batch_size,
                   shuffle=True),
        "valid":
        DataLoader(datasets["test"], batch_size=args.batch_size),
    }
    metric_callback = LoaderMetricCallback(
        metric=HFMetric(metric=load_metric("accuracy")),
        input_key="logits",
        target_key="labels",
    )
    teacher_model = AutoModelForSequenceClassification.from_pretrained(
        args.model, num_labels=args.num_labels)
    callbacks = [metric_callback, OptimizerCallback(metric_key="loss")]
    runner = HFRunner()
    runner.train(model=teacher_model,
                 loaders=loaders,
                 optimizer=torch.optim.Adam(teacher_model.parameters(),
                                            lr=args.lr),
                 callbacks=callbacks,
                 num_epochs=args.num_epochs,
                 valid_metric="accuracy",
                 minimize_valid_metric=False,
                 logdir=logdir,
                 valid_loader="valid",
                 verbose=args.verbose,
                 seed=args.seed)
    if args.wandb:
        import csv
        with open(logdir + "/valid.csv") as fi:
            reader = csv.DictReader(fi)
            accuracy = []
            for row in reader:
                if row["accuracy"] == "accuracy":
                    continue
                accuracy.append(float(row["accuracy"]))

        wandb.log({"accuracy": max(accuracy[-args.num_epochs:])})
Пример #2
0
 def get_callbacks(self, stage: str):
     return {
         "criterion":
         CriterionCallback(metric_key="loss",
                           input_key="logits",
                           target_key="targets"),
         "optimizer":
         OptimizerCallback(metric_key="loss"),
         # "scheduler": dl.SchedulerCallback(loader_key="valid", metric_key="loss"),
         "checkpoint":
         CheckpointCallback(self._logdir,
                            loader_key="valid",
                            metric_key="loss",
                            minimize=True,
                            save_n_best=3),
         "test_nn_module":
         ModuleTypeChecker(),
         "test_device":
         DeviceCheckCallback(self._device, logger=logger),
         "test_loss_minimization":
         LossMinimizationCallback("loss", logger=logger),
         "test_logits_type":
         TensorTypeChecker("logits"),
         # "loss_type_checker": TensorTypeChecker("loss", True),
     }
Пример #3
0
def test_pruning():
    from catalyst.callbacks import (
        AccuracyCallback,
        ControlFlowCallback,
        CriterionCallback,
        OptimizerCallback,
        PruningCallback,
    )
    from catalyst.contrib.datasets import MNIST
    import torch
    from torch.utils.data import DataLoader
    from torchvision.transforms import ToTensor

    from compressors.distillation.callbacks import KLDivCallback, MetricAggregationCallback
    from compressors.models import MLP
    from compressors.pruning.runners import PruneRunner
    from compressors.utils.data import TorchvisionDatasetWrapper as Wrp

    model = MLP(num_layers=3)

    datasets = {
        "train":
        Wrp(MNIST("./data", train=True, download=True, transform=ToTensor())),
        "valid":
        Wrp(MNIST("./data", train=False, transform=ToTensor())),
    }

    loaders = {
        dl_key: DataLoader(dataset, shuffle=dl_key == "train", batch_size=32)
        for dl_key, dataset in datasets.items()
    }

    optimizer = torch.optim.Adam(model.parameters())

    runner = PruneRunner(num_sessions=10)

    runner.train(model=model,
                 loaders=loaders,
                 optimizer=optimizer,
                 criterion=torch.nn.CrossEntropyLoss(),
                 callbacks=[
                     PruningCallback(
                         pruning_fn="l1_unstructured",
                         amount=0.2,
                         remove_reparametrization_on_stage_end=False,
                     ),
                     OptimizerCallback(metric_key="loss"),
                     CriterionCallback(input_key="logits",
                                       target_key="targets",
                                       metric_key="loss"),
                     AccuracyCallback(input_key="logits",
                                      target_key="targets"),
                 ],
                 logdir="./pruned_model",
                 valid_loader="valid",
                 valid_metric="accuracy",
                 minimize_valid_metric=False,
                 check=True)
Пример #4
0
def test_distil():
    from itertools import chain

    from catalyst.callbacks import AccuracyCallback, OptimizerCallback
    from catalyst.contrib.datasets import MNIST
    import torch
    from torch.utils.data import DataLoader
    from torchvision import transforms as T

    from compressors.distillation.runners import EndToEndDistilRunner
    from compressors.models import MLP
    from compressors.utils.data import TorchvisionDatasetWrapper as Wrp

    teacher = MLP(num_layers=4)
    student = MLP(num_layers=3)

    datasets = {
        "train":
        Wrp(MNIST("./data", train=True, download=True,
                  transform=T.ToTensor())),
        "valid":
        Wrp(MNIST("./data", train=False, transform=T.ToTensor())),
    }

    loaders = {
        dl_key: DataLoader(dataset, shuffle=dl_key == "train", batch_size=32)
        for dl_key, dataset in datasets.items()
    }

    optimizer = torch.optim.Adam(
        chain(teacher.parameters(), student.parameters()))

    runner = EndToEndDistilRunner(hidden_state_loss="mse",
                                  num_train_teacher_epochs=5)

    runner.train(
        model=torch.nn.ModuleDict({
            "teacher": teacher,
            "student": student
        }),
        loaders=loaders,
        optimizer=optimizer,
        num_epochs=4,
        callbacks=[
            OptimizerCallback(metric_key="loss"),
            AccuracyCallback(input_key="logits", target_key="targets"),
        ],
        valid_metric="accuracy01",
        minimize_valid_metric=False,
        logdir="./logs",
        valid_loader="valid",
        criterion=torch.nn.CrossEntropyLoss(),
        check=True,
    )
Пример #5
0
 def get_callbacks(self, stage: str):
     return {
         "criterion": CriterionCallback(
             metric_key="loss", input_key="logits", target_key="targets"
         ),
         "optimizer": OptimizerCallback(metric_key="loss"),
         # "scheduler": dl.SchedulerCallback(loader_key="valid", metric_key="loss"),
         "checkpoint": CheckpointCallback(
             self._logdir, loader_key="valid", metric_key="loss", minimize=True, save_n_best=3
         ),
         "test_nn_parallel_data_parallel": DataParallelTypeChecker(),
         "test_loss_minimization": LossMinimizationCallback("loss", logger=logger),
         "test_logits_type": OPTTensorTypeChecker("logits", self._opt_level),
     }
Пример #6
0
 def get_callbacks(self, stage: str):
     return {
         "criterion": CriterionCallback(
             metric_key="loss", input_key="logits", target_key="targets"
         ),
         "optimizer": OptimizerCallback(metric_key="loss"),
         # "scheduler": dl.SchedulerCallback(loader_key="valid", metric_key="loss"),
         "checkpoint": CheckpointCallback(
             self._logdir, loader_key="valid", metric_key="loss", minimize=True, save_n_best=3
         ),
         "test_nn_parallel_distributed_data_parallel": DistributedDataParallelTypeChecker(),
         "test_loss_minimization": LossMinimizationCallback("loss", logger=logger),
         "test_world_size": WorldSizeCheckCallback(NUM_CUDA_DEVICES, logger=logger),
     }
def test_fast_zero_grad():
    model = nn.Linear(10, 2)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.BCEWithLogitsLoss()

    batch_size = 3
    inp = torch.randn(batch_size, 10)
    target = torch.FloatTensor(batch_size, 2).uniform_()

    callback = OptimizerCallback(metric_key="loss", use_fast_zero_grad=True)

    loss1 = criterion(model(inp), target)
    loss1_value = loss1.detach().item()

    runner = DummyRunner(loss1, optimizer)

    callback.on_stage_start(runner)
    callback.on_epoch_start(runner)
    callback.on_batch_end(runner)

    loss2 = criterion(model(inp), target)
    loss2_value = loss2.detach().item()

    runner.batch_metrics = {"loss": loss2}
    callback.on_epoch_start(runner)
    callback.on_batch_end(runner)

    assert loss1_value > loss2_value
Пример #8
0
def test_tracer_callback():
    """
    Tests a feature of `TracingCallback` for model tracing during training
    """
    logdir = "./logs"
    dataset_root = "./data"
    loaders = _get_loaders(root=dataset_root, batch_size=4, num_workers=1)
    images, targets = next(iter(loaders["train"]))
    _, c, h, w = images.shape
    input_shape = (c, h, w)

    model = _TracedNet(input_shape)
    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters())

    method_name = "forward"
    mode = "eval"
    requires_grad = False
    checkpoint_name = "best"
    opt_level = None

    trace_name = get_trace_name(
        method_name=method_name,
        mode=mode,
        requires_grad=requires_grad,
        additional_string=checkpoint_name,
    )
    tracing_path = Path(logdir) / "trace" / trace_name
    criterion_callback = CriterionCallback()
    optimizer_callback = OptimizerCallback()
    tracer_callback = TracingCallback(
        metric="loss",
        minimize=False,
        trace_mode=mode,
        mode=checkpoint_name,
        do_once=True,
        method_name=method_name,
        requires_grad=requires_grad,
        opt_level=opt_level,
    )
    test_callback = _OnStageEndCheckModelTracedCallback(
        path=tracing_path,
        inputs=images,
    )

    callbacks = collections.OrderedDict(
        loss=criterion_callback,
        optimizer=optimizer_callback,
        tracer_callback=tracer_callback,
        test_callback=test_callback,
    )

    runner = SupervisedRunner(input_key="x")
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        logdir=logdir,
        callbacks=callbacks,
        check=True,
        verbose=True,
    )

    shutil.rmtree(logdir)
Пример #9
0
def main(args):

    set_global_seed(42)

    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    datasets = {
        "train":
        Wrp(
            CIFAR100(root=".",
                     train=True,
                     download=True,
                     transform=transform_train)),
        "valid":
        Wrp(CIFAR100(root=".", train=False, transform=transform_test)),
    }

    loaders = {
        k: DataLoader(v,
                      batch_size=args.batch_size,
                      shuffle=k == "train",
                      num_workers=2)
        for k, v in datasets.items()
    }
    teacher_model = NAME2MODEL[args.teacher](num_classes=100)
    if args.teacher_path is None:
        teacher_sd = load_state_dict_from_url(NAME2URL[args.teacher])
        teacher_model.load_state_dict(teacher_sd)
    else:
        unpack_checkpoint(torch.load(args.teacher_path), model=teacher_model)
    student_model = NAME2MODEL[args.student](num_classes=100)

    optimizer = torch.optim.SGD(student_model.parameters(),
                                lr=args.lr,
                                momentum=0.9,
                                weight_decay=5e-4)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     [150, 180, 210],
                                                     gamma=0.1)

    runner = DistilRunner(apply_probability_shift=args.probability_shift)
    runner.train(model={
        "teacher": teacher_model,
        "student": student_model
    },
                 loaders=loaders,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 valid_metric="accuracy",
                 minimize_valid_metric=False,
                 logdir=args.logdir,
                 callbacks=[
                     ControlFlowCallback(AttentionHiddenStatesCallback(),
                                         loaders="train"),
                     ControlFlowCallback(KLDivCallback(temperature=4),
                                         loaders="train"),
                     CriterionCallback(input_key="s_logits",
                                       target_key="targets",
                                       metric_key="cls_loss"),
                     ControlFlowCallback(
                         MetricAggregationCallback(
                             prefix="loss",
                             metrics={
                                 "attention_loss": args.beta,
                                 "kl_div_loss": args.alpha,
                                 "cls_loss": 1 - args.alpha,
                             },
                             mode="weighted_sum",
                         ),
                         loaders="train",
                     ),
                     AccuracyCallback(input_key="s_logits",
                                      target_key="targets"),
                     OptimizerCallback(metric_key="loss", model_key="student"),
                     SchedulerCallback(),
                 ],
                 valid_loader="valid",
                 num_epochs=args.num_epochs,
                 criterion=torch.nn.CrossEntropyLoss(),
                 seed=args.seed)
Пример #10
0
def test_hf():
    """tests example pipeline"""
    datasets = load_dataset("ag_news")
    tokenizer = AutoTokenizer.from_pretrained(
        "google/bert_uncased_L-4_H-128_A-2")
    datasets = datasets.map(
        lambda e: tokenizer(
            e["text"], truncation=True, padding="max_length", max_length=128),
        batched=True,
    )
    datasets = datasets.map(lambda e: {"labels": e["label"]}, batched=True)
    datasets.set_format(
        type="torch",
        columns=["input_ids", "token_type_ids", "attention_mask", "labels"],
    )
    loaders = {
        "train": DataLoader(datasets["train"], batch_size=32, shuffle=True),
        "valid": DataLoader(datasets["test"], batch_size=32),
    }
    metric_callback = LoaderMetricCallback(
        metric=HFMetric(metric=load_metric("accuracy")),
        input_key="logits",
        target_key="labels",
    )
    teacher_model = AutoModelForSequenceClassification.from_pretrained(
        "google/bert_uncased_L-4_H-128_A-2", num_labels=4)
    runner = HFRunner()
    runner.train(
        model=teacher_model,
        loaders=loaders,
        optimizer=torch.optim.Adam(teacher_model.parameters(), lr=1e-4),
        callbacks=[metric_callback],
        num_epochs=3,
        valid_metric="accuracy",
        minimize_valid_metric=False,
        check=True,
    )
    metric_callback = LoaderMetricCallback(
        metric=HFMetric(metric=load_metric("accuracy")),
        input_key="s_logits",
        target_key="labels",
    )

    slct_callback = ControlFlowCallback(
        HiddenStatesSelectCallback(hiddens_key="t_hidden_states",
                                   layers=[1, 3]),
        loaders="train",
    )

    lambda_hiddens_callback = ControlFlowCallback(
        LambdaPreprocessCallback(lambda s_hiddens, t_hiddens: (
            [c_s[:, 0] for c_s in s_hiddens],
            [t_s[:, 0] for t_s in t_hiddens],  # tooks only CLS token
        )),
        loaders="train",
    )

    mse_hiddens = ControlFlowCallback(MSEHiddenStatesCallback(),
                                      loaders="train")

    kl_div = ControlFlowCallback(KLDivCallback(), loaders="train")

    aggregator = ControlFlowCallback(
        MetricAggregationCallback(
            prefix="loss",
            metrics={
                "kl_div_loss": 0.2,
                "mse_loss": 0.2,
                "task_loss": 0.6
            },
            mode="weighted_sum",
        ),
        loaders="train",
    )

    runner = HFDistilRunner()

    student_model = AutoModelForSequenceClassification.from_pretrained(
        "google/bert_uncased_L-2_H-128_A-2", num_labels=4)
    runner.train(
        model=torch.nn.ModuleDict({
            "teacher": teacher_model,
            "student": student_model
        }),
        loaders=loaders,
        optimizer=torch.optim.Adam(student_model.parameters(), lr=1e-4),
        callbacks=[
            metric_callback,
            slct_callback,
            lambda_hiddens_callback,
            mse_hiddens,
            kl_div,
            aggregator,
            OptimizerCallback(metric_key="loss"),
        ],
        check=True,
        num_epochs=3,
        valid_metric="accuracy",
        minimize_valid_metric=False,
        valid_loader="valid",
    )
Пример #11
0
datasets = {
    "train": Wrp(MNIST("./data", train=True, download=True, transform=T.ToTensor())),
    "valid": Wrp(MNIST("./data", train=False, transform=T.ToTensor())),
}

loaders = {
    dl_key: DataLoader(dataset, shuffle=dl_key == "train", batch_size=32)
    for dl_key, dataset in datasets.items()
}

optimizer = torch.optim.Adam(chain(teacher.parameters(), student.parameters()))

runner = EndToEndDistilRunner(hidden_state_loss="mse", num_train_teacher_epochs=5)

runner.train(
    model=torch.nn.ModuleDict({"teacher": teacher, "student": student}),
    loaders=loaders,
    optimizer=optimizer,
    num_epochs=4,
    callbacks=[
        OptimizerCallback(metric_key="loss"),
        AccuracyCallback(input_key="logits", target_key="targets"),
    ],
    valid_metric="accuracy01",
    minimize_valid_metric=False,
    logdir="./logs",
    valid_loader="valid",
    criterion=torch.nn.CrossEntropyLoss(),
    check=True,
)
Пример #12
0
def main(args):
    if args.wandb:
        import wandb
        wandb.init()
        logdir = args.logdir + "/" + wandb.run.name
    else:
        logdir = args.logdir
    set_global_seed(args.seed)
    datasets = load_dataset(args.dataset)

    tokenizer = AutoTokenizer.from_pretrained(args.teacher_model)
    datasets = datasets.map(
        lambda e: tokenizer(
            e["text"], truncation=True, padding="max_length", max_length=128),
        batched=True,
    )
    datasets = datasets.map(lambda e: {"labels": e["label"]}, batched=True)
    datasets.set_format(
        type="torch",
        columns=["input_ids", "token_type_ids", "attention_mask", "labels"],
    )
    loaders = {
        "train":
        DataLoader(datasets["train"], batch_size=args.batch_size,
                   shuffle=True),
        "valid":
        DataLoader(datasets["test"], batch_size=args.batch_size),
    }
    teacher_model = AutoModelForSequenceClassification.from_pretrained(
        args.teacher_model, num_labels=args.num_labels)
    unpack_checkpoint(torch.load(args.teacher_path), model=teacher_model)
    metric_callback = LoaderMetricCallback(
        metric=HFMetric(metric=load_metric("accuracy")),
        input_key="s_logits",
        target_key="labels",
    )
    layers = [int(layer) for layer in args.layers.split(",")]
    slct_callback = ControlFlowCallback(
        HiddenStatesSelectCallback(hiddens_key="t_hidden_states",
                                   layers=layers),
        loaders="train",
    )

    lambda_hiddens_callback = ControlFlowCallback(
        LambdaPreprocessCallback(lambda s_hiddens, t_hiddens: (
            [c_s[:, 0] for c_s in s_hiddens],
            [t_s[:, 0] for t_s in t_hiddens],  # tooks only CLS token
        )),
        loaders="train",
    )

    mse_hiddens = ControlFlowCallback(MSEHiddenStatesCallback(),
                                      loaders="train")

    kl_div = ControlFlowCallback(
        KLDivCallback(temperature=args.kl_temperature), loaders="train")

    runner = HFDistilRunner()

    student_model = AutoModelForSequenceClassification.from_pretrained(
        args.student_model, num_labels=args.num_labels)
    callbacks = [
        metric_callback, slct_callback, lambda_hiddens_callback, kl_div,
        OptimizerCallback(metric_key="loss"),
        CheckpointCallback(logdir=logdir,
                           loader_key="valid",
                           mode="model",
                           metric_key="accuracy",
                           minimize=False)
    ]
    if args.beta > 0:
        aggregator = ControlFlowCallback(
            MetricAggregationCallback(
                prefix="loss",
                metrics={
                    "kl_div_loss": args.alpha,
                    "mse_loss": args.beta,
                    "task_loss": 1 - args.alpha
                },
                mode="weighted_sum",
            ),
            loaders="train",
        )
        callbacks.append(mse_hiddens)
        callbacks.append(aggregator)
    else:
        aggregator = ControlFlowCallback(
            MetricAggregationCallback(
                prefix="loss",
                metrics={
                    "kl_div_loss": args.alpha,
                    "task_loss": 1 - args.alpha
                },
                mode="weighted_sum",
            ),
            loaders="train",
        )
        callbacks.append(aggregator)
    runner.train(model=torch.nn.ModuleDict({
        "teacher": teacher_model,
        "student": student_model
    }),
                 loaders=loaders,
                 optimizer=torch.optim.Adam(student_model.parameters(),
                                            lr=args.lr),
                 callbacks=callbacks,
                 num_epochs=args.num_epochs,
                 valid_metric="accuracy",
                 logdir=logdir,
                 minimize_valid_metric=False,
                 valid_loader="valid",
                 verbose=args.verbose,
                 seed=args.seed)

    if args.wandb:
        import csv
        import shutil
        with open(logdir + "/valid.csv") as fi:
            reader = csv.DictReader(fi)
            accuracy = []
            for row in reader:
                if row["accuracy"] == "accuracy":
                    continue
                accuracy.append(float(row["accuracy"]))

        wandb.log({"accuracy": max(accuracy[-args.num_epochs:])})
        shutil.rmtree(logdir)
Пример #13
0
def main(args):

    set_global_seed(args.seed)

    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    datasets = {
        "train":
        Wrp(
            CIFAR100(root=".",
                     train=True,
                     download=True,
                     transform=transform_train)),
        "valid":
        Wrp(CIFAR100(root=".", train=False, transform=transform_test)),
    }

    loaders = {
        k: DataLoader(v,
                      batch_size=args.batch_size,
                      shuffle=k == "train",
                      num_workers=2)
        for k, v in datasets.items()
    }
    model = NAME2MODEL[args.model](num_classes=100)

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=0.9,
                                weight_decay=5e-4)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     [150, 180, 210],
                                                     gamma=0.1)

    runner = SupervisedRunner()
    runner.train(model=model,
                 loaders=loaders,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 valid_metric="accuracy",
                 minimize_valid_metric=False,
                 logdir=args.logdir,
                 callbacks=[
                     CriterionCallback(input_key="logits",
                                       target_key="targets",
                                       metric_key="loss"),
                     AccuracyCallback(input_key="logits",
                                      target_key="targets"),
                     OptimizerCallback(metric_key="loss"),
                     SchedulerCallback(),
                 ],
                 valid_loader="valid",
                 num_epochs=args.num_epochs,
                 criterion=torch.nn.CrossEntropyLoss(),
                 seed=args.seed)