示例#1
0
def main(cfg: Config):
    if cfg.enable_accimage:
        enable_accimage()

    model = resnet50()
    optimizer = optim.SGD(lr=1e-1 * cfg.batch_size * get_num_nodes() / 256, momentum=0.9, weight_decay=1e-4)
    scheduler = lr_scheduler.MultiStepLR([30, 60, 80])
    train_loader, test_loader = DATASET_REGISTRY("fast_imagenet" if cfg.use_fast_collate else
                                                 "imagenet")(cfg.batch_size,
                                                             train_size=cfg.batch_size * 50 if cfg.debug else None,
                                                             test_size=cfg.batch_size * 50 if cfg.debug else None,
                                                             num_workers=cfg.num_workers)

    use_multi_gpus = not is_distributed() and torch.cuda.device_count() > 1
    with SupervisedTrainer(model,
                           optimizer,
                           F.cross_entropy,
                           reporters=[reporters.TensorboardReporter(".")],
                           scheduler=scheduler,
                           data_parallel=use_multi_gpus,
                           use_amp=cfg.use_amp,
                           use_cuda_nonblocking=True,
                           use_sync_bn=cfg.use_sync_bn,
                           report_accuracy_topk=5) as trainer:

        for epoch in trainer.epoch_range(cfg.epochs):
            trainer.train(train_loader)
            trainer.test(test_loader)

        print(f"Max Test Accuracy={max(trainer.reporter.history('accuracy/test')):.3f}")
示例#2
0
def main():
    train_loader, test_loader = cifar10_loaders(args.batch_size)

    if args.model == "resnet":
        model = resnet20()
    elif args.model == "senet":
        model = se_resnet20(num_classes=10, reduction=args.reduction)
    elif args.model == "gcn":
        model = resnet20_gcn()
    else:
        raise TypeError(f"{args.model} is not valid argument")

    optimizer = optim.SGD(lr=1e-1, momentum=0.9, weight_decay=1e-4)
    scheduler = lr_scheduler.StepLR(80, 0.1)
    tqdm_rep = reporter.TQDMReporter(range(args.epochs),
                                     callbacks=[callbacks.AccuracyCallback()],
                                     save_dir='logs/',
                                     report_freq=-1)
    # tb_rep = reporter.TensorboardReporter(callbacks=[callbacks.AccuracyCallback(), callbacks.LossCallback()], save_dir='logs/')
    trainer = Trainer(model,
                      optimizer,
                      F.cross_entropy,
                      scheduler=scheduler,
                      callbacks=tqdm_rep)
    for _ in tqdm_rep:
        trainer.train(train_loader)
        trainer.test(test_loader)
示例#3
0
def test_basic_trainer():
    model = nn.Linear(10, 10)
    optimizer = optim.SGD()
    scheduler = lr_scheduler.StepLR(9)
    trainer = trainers.SupervisedTrainer(model,
                                         optimizer,
                                         F.cross_entropy,
                                         scheduler=scheduler,
                                         update_scheduler_by_epoch=False)
    loader = [(torch.randn(2, 10), torch.zeros(2, dtype=torch.long))
              for _ in range(10)]
    for _ in trainer.epoch_range(1):
        trainer.train(loader)
    assert pytest.approx(trainer.optimizer.param_groups[0]["lr"], 0.01)

    optimizer = torch.optim.SGD(model.parameters(), lr=1e-1)
    trainer = trainers.SupervisedTrainer(model,
                                         optimizer,
                                         F.cross_entropy,
                                         scheduler=scheduler,
                                         update_scheduler_by_epoch=False)
    for _ in trainer.epoch_range(1):
        trainer.train(loader)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 9)
    trainer = trainers.SupervisedTrainer(model,
                                         optimizer,
                                         F.cross_entropy,
                                         scheduler=scheduler,
                                         update_scheduler_by_epoch=False)
    trainer.run(loader, loader, 15, 11)
    assert trainer.step == 11 - 1
示例#4
0
def main():
    train_loader, test_loader = vision_loaders(args.data_name,
                                               args.batch_size,
                                               download=True,
                                               num_workers=1)

    if args.baseline:
        model = resnet20()
    else:
        # model = se_resnet18(num_classes=1000)
        # model = se_resnet50(num_classes=100)
        model = resnet20_ASP(num_classes=10)
        # model = resnet32_ASP(num_classes=10)

    optimizer = optim.SGD(lr=1e-1, momentum=0.9, weight_decay=1e-4)
    scheduler = lr_scheduler.StepLR(80, 0.1)
    tqdm_rep = reporters.TQDMReporter(range(args.epochs))
    _callbacks = [tqdm_rep, callbacks.AccuracyCallback()]
    with Trainer(model,
                 optimizer,
                 F.cross_entropy,
                 scheduler=scheduler,
                 callbacks=_callbacks) as trainer:
        for i in tqdm_rep:
            trainer.train(train_loader)
            trainer.test(test_loader)
示例#5
0
def main():
    model = {
        "resnet20": resnet20,
        "wrn28_10": wrn28_10
    }[args.model](num_classes=10)
    weight_decay = {"resnet20": 1e-4, "wrn28_10": 5e-4}[args.model]
    lr_decay = {"resnet20": 0.1, "wrn28_10": 0.2}[args.model]
    train_loader, test_loader = cifar10_loaders(args.batch_size)
    optimizer = optim.SGD(lr=1e-1, momentum=0.9, weight_decay=weight_decay)
    scheduler = lr_scheduler.MultiStepLR([100, 150], gamma=lr_decay)
    tq = reporters.TQDMReporter(range(args.epochs), verb=True)
    c = [
        callbacks.AccuracyCallback(),
        callbacks.LossCallback(),
        reporters.IOReporter("."),
        reporters.TensorboardReporter("."),
        callbacks.WeightSave("."), tq
    ]

    with trainers.SupervisedTrainer(model,
                                    optimizer,
                                    F.cross_entropy,
                                    callbacks=c,
                                    scheduler=scheduler) as trainer:
        for _ in tq:
            trainer.train(train_loader)
            trainer.test(test_loader)
示例#6
0
def test_dict_model():
    # test if model and optimizer are dict
    class Trainer(trainers.TrainerBase):
        def iteration(self, data):
            input, target = data
            output = self.model["generator"](
                input) + self.model["discriminator"](input)
            loss = self.loss_f(output, target)
            results = utils.Map(loss=loss, output=output)
            if self.is_train:
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
            return results

    model = {
        "generator": nn.Linear(10, 10),
        "discriminator": nn.Linear(10, 10)
    }
    optimizer = {"generator": optim.SGD(lr=0.1), "discriminator": None}
    trainer = Trainer(model, optimizer, F.cross_entropy)
    loader = [(torch.randn(2, 10), torch.zeros(2, dtype=torch.long))
              for _ in range(10)]
    for _ in range(1):
        trainer.train(loader)
        trainer.test(loader)
示例#7
0
def main():
    if args.distributed:
        init_distributed()

    model = se_resnet50(num_classes=1000)

    optimizer = optim.SGD(lr=0.6 / 1024 * args.batch_size,
                          momentum=0.9, weight_decay=1e-4)
    scheduler = lr_scheduler.MultiStepLR([50, 70])
    train_loader, test_loader = imagenet_loaders(args.root, args.batch_size, distributed=args.distributed,
                                                 num_train_samples=args.batch_size * 10 if args.debug else None,
                                                 num_test_samples=args.batch_size * 10 if args.debug else None)

    c = [callbacks.AccuracyCallback(), callbacks.AccuracyCallback(k=5),
         callbacks.LossCallback(),
         callbacks.WeightSave('.'),
         reporters.TensorboardReporter('.'),
         reporters.TQDMReporter(range(args.epochs))]

    with SupervisedTrainer(model, optimizer, F.cross_entropy,
                           callbacks=c,
                           scheduler=scheduler,
                           ) as trainer:
        for _ in c[-1]:
            trainer.train(train_loader)
            trainer.test(test_loader)
示例#8
0
def main():
    if is_distributed():
        init_distributed()

    model = se_resnet50(num_classes=1000)

    optimizer = optim.SGD(lr=0.6 / 1024 * args.batch_size,
                          momentum=0.9,
                          weight_decay=1e-4)
    scheduler = lr_scheduler.MultiStepLR([50, 70])
    train_loader, test_loader = DATASET_REGISTRY("imagenet")(args.batch_size)

    c = [
        callbacks.AccuracyCallback(),
        callbacks.AccuracyCallback(k=5),
        callbacks.LossCallback(),
        callbacks.WeightSave("."),
        reporters.TensorboardReporter("."),
        reporters.TQDMReporter(range(args.epochs)),
    ]

    with SupervisedTrainer(
            model,
            optimizer,
            F.cross_entropy,
            callbacks=c,
            scheduler=scheduler,
    ) as trainer:
        for _ in c[-1]:
            trainer.train(train_loader)
            trainer.test(test_loader)
示例#9
0
def main():
    model = se_resnet50(num_classes=1000)

    optimizer = optim.SGD(lr=0.6 / 1024 * args.batch_size, momentum=0.9, weight_decay=1e-4)
    scheduler = lr_scheduler.MultiStepLR([50, 70])

    c = [callbacks.AccuracyCallback(), callbacks.LossCallback()]
    r = reporter.TQDMReporter(range(args.epochs), callbacks=c)
    tb = reporter.TensorboardReporter(c)
    rep = callbacks.CallbackList(r, tb, callbacks.WeightSave("checkpoints"))

    if args.distributed:
        # DistributedSupervisedTrainer sets up torch.distributed
        if args.local_rank == 0:
            print("\nuse DistributedDataParallel")
        trainer = DistributedSupervisedTrainer(model, optimizer, F.cross_entropy, callbacks=rep, scheduler=scheduler,
                                               init_method=args.init_method, backend=args.backend)
    else:
        multi_gpus = torch.cuda.device_count() > 1
        if multi_gpus:
            print("\nuse DataParallel")
        trainer = SupervisedTrainer(model, optimizer, F.cross_entropy, callbacks=rep,
                                    scheduler=scheduler, data_parallel=multi_gpus)
    # if distributed, need to setup loaders after DistributedSupervisedTrainer
    train_loader, test_loader = imagenet_loaders(args.root, args.batch_size, distributed=args.distributed,
                                                 num_train_samples=args.batch_size * 10 if args.debug else None,
                                                 num_test_samples=args.batch_size * 10 if args.debug else None)
    for _ in r:
        trainer.train(train_loader)
        trainer.test(test_loader)
示例#10
0
def get_components(cfg):
    labeled_loader, unlabeled_loader, val_loader, test_loader = get_dataloader(cfg.data.name,
                                                                               cfg.data.labeled_size,
                                                                               cfg.data.unlabeled_size,
                                                                               cfg.data.val_size,
                                                                               cfg.data.batch_size,
                                                                               cfg.data.random_state,
                                                                               download=cfg.data.download,
                                                                               pilaugment=cfg.data.get('pilaugment',
                                                                                                       False)
                                                                               )

    model = wrn28_2(num_classes=6 if cfg.data.name == "animal" else 10)
    optimizer = {'adam': optim.Adam(lr=cfg.optim.lr),
                 'sgd': optim.SGD(lr=cfg.optim.lr, momentum=0.9)}[cfg.optim.name]
    scheduler = {'adam': None,
                 'sgd': lr_scheduler.CosineAnnealingWithWarmup(cfg.optim.epochs,
                                                               4, cfg.optim.epochs // 100)}[cfg.optim.name]
    ema_model = partial(EMAModel, ema_rate=cfg.model.ema_rate, weight_decay=cfg.optim.wd * cfg.optim.lr)
    num_classes = {"animal": 6, "cifar100": 100, "tinyimagenet": 200}.get(cfg.data.name, 10)
    tq = reporters.TQDMReporter(range(cfg.optim.epochs))
    _callbacks = [callbacks.AccuracyCallback(),
                  callbacks.LossCallback(),
                  reporters.IOReporter("."),
                  reporters.TensorboardReporter("."), tq]
    return PackedLoader(labeled_loader, unlabeled_loader), val_loader, test_loader, model, optimizer, \
           scheduler, ema_model, num_classes, tq, _callbacks
示例#11
0
def train_and_eval(cfg):
    train_loader, val_loader, test_loader, num_classes = get_dataloader(
        cfg.data.name, cfg.data.val_size, cfg.data.batch_size,
        cfg.data.download, cfg.augment, False)
    model = get_model(cfg.model.name, num_classes)
    optimizer = optim.SGD(cfg.optim.model.lr,
                          momentum=0.9,
                          weight_decay=cfg.optim.model.weight_decay)
    scheduler = lr_scheduler.MultiStepLR(cfg.optim.model.steps)
    tq = reporters.TQDMReporter(range(cfg.optim.epochs), verb=cfg.verb)
    callback = [
        callbacks.AccuracyCallback(),
        callbacks.LossCallback(),
        reporters.TensorboardReporter("."),
        reporters.IOReporter("."), tq
    ]

    with trainers.SupervisedTrainer(model,
                                    optimizer,
                                    F.cross_entropy,
                                    callbacks=callback,
                                    scheduler=scheduler) as trainer:
        for ep in tq:
            trainer.train(train_loader)
            trainer.test(val_loader, 'val')
            trainer.test(test_loader)
示例#12
0
def main(cfg):
    if cfg.use_accimage:
        enable_accimage()
    model = MODEL_REGISTRY(cfg.model.name)(num_classes=10)
    train_loader, test_loader = DATASET_REGISTRY("fast_cifar10" if cfg.use_fast_collate else "cifar10"
                                                 )(cfg.data.batch_size, num_workers=4,
                                                   use_prefetcher=cfg.use_prefetcher)
    optimizer = None if cfg.bn_no_wd else optim.SGD(lr=1e-1, momentum=0.9, weight_decay=cfg.optim.weight_decay)
    scheduler = lr_scheduler.MultiStepLR([100, 150], gamma=cfg.optim.lr_decay)

    if cfg.bn_no_wd:
        def set_optimizer(trainer):
            bn_params = []
            non_bn_parameters = []
            for name, p in trainer.model.named_parameters():
                if "bn" in name:
                    bn_params.append(p)
                else:
                    non_bn_parameters.append(p)
            optim_params = [
                {"params": bn_params, "weight_decay": 0},
                {"params": non_bn_parameters, "weight_decay": cfg.optim.weight_decay},
            ]
            trainer.optimizer = torch.optim.SGD(optim_params, lr=1e-1, momentum=0.9)

        trainers.SupervisedTrainer.set_optimizer = set_optimizer

    if cfg.use_zerograd_none:
        import types

        def set_optimizer(trainer):
            # see Apex for details
            def zero_grad(self):
                for group in self.param_groups:
                    for p in group['params']:
                        p.grad = None

            trainer.optimizer = trainer.optimizer(trainer.model.parameters())
            trainer.optimizer.zero_grad = types.MethodType(zero_grad, trainer.optimizer)

        trainers.SupervisedTrainer.set_optimizer = set_optimizer

    with trainers.SupervisedTrainer(model,
                                    optimizer,
                                    F.cross_entropy,
                                    reporters=[reporters.TensorboardReporter('.')],
                                    scheduler=scheduler,
                                    use_amp=cfg.use_amp,
                                    debug=cfg.debug
                                    ) as trainer:

        for _ in trainer.epoch_range(cfg.optim.epochs):
            trainer.train(train_loader)
            trainer.test(test_loader)

        print(f"Max Test Accuracy={max(trainer.reporter.history('accuracy/test')):.3f}")
示例#13
0
def main(cfg):
    model = {
        "resnet20": resnet20,
        "wrn28_10": wrn28_10
    }[cfg.model](num_classes=10)
    weight_decay = {"resnet20": 1e-4, "wrn28_10": 5e-4}[cfg.model]
    lr_decay = {"resnet20": 0.1, "wrn28_10": 0.2}[cfg.model]
    train_loader, test_loader = vision_loaders("cifar10", cfg.batch_size)
    optimizer = None if cfg.bn_no_wd else optim.SGD(
        lr=1e-1, momentum=0.9, weight_decay=weight_decay)
    scheduler = lr_scheduler.MultiStepLR([100, 150], gamma=lr_decay)
    tq = reporters.TQDMReporter(range(cfg.epochs), verb=True)
    c = [
        callbacks.AccuracyCallback(),
        callbacks.LossCallback(),
        reporters.IOReporter("."),
        reporters.TensorboardReporter("."),
        callbacks.WeightSave("."), tq
    ]

    if cfg.bn_no_wd:

        def set_optimizer(trainer):
            bn_params = []
            non_bn_parameters = []
            for name, p in trainer.model.named_parameters():
                if "bn" in name:
                    bn_params.append(p)
                else:
                    non_bn_parameters.append(p)
            optim_params = [
                {
                    "params": bn_params,
                    "weight_decay": 0
                },
                {
                    "params": non_bn_parameters,
                    "weight_decay": weight_decay
                },
            ]
            trainer.optimizer = torch.optim.SGD(optim_params,
                                                lr=1e-1,
                                                momentum=0.9)

        trainers.SupervisedTrainer.set_optimizer = set_optimizer

    with trainers.SupervisedTrainer(model,
                                    optimizer,
                                    F.cross_entropy,
                                    callbacks=c,
                                    scheduler=scheduler) as trainer:

        for _ in tq:
            trainer.train(train_loader)
            trainer.test(test_loader)
示例#14
0
def main():
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_transform = transforms.Compose([
        transforms.Resize(256),
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(), normalize
    ])

    valid_transform = transforms.Compose(
        [transforms.Resize(224),
         transforms.ToTensor(), normalize])

    train_dataset = ImageFolder(args.data, train_transform)
    valid_dataset = ImageFolder(args.data, valid_transform)

    num_samples = int(len(train_dataset) / 10)
    indices = list(range(num_samples))
    split = int(np.floor(0.1 * num_samples))
    np.random.shuffle(indices)
    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)
    train_loader = DataLoader(train_dataset,
                              args.batch_size,
                              sampler=train_sampler,
                              num_workers=4)
    valid_loader = DataLoader(valid_dataset,
                              args.batch_size,
                              sampler=valid_sampler,
                              num_workers=4)
    print("num data:", num_samples)
    print("num train batches:", len(train_loader))
    print("num test batches:", len(valid_loader))
    # return

    # train_loader, test_loader = cifar10_loaders(args.batch_size)

    model = se_resnet50(num_classes=42)
    # model.load_state_dict(torch.load("seresnet50-60a8950a85b2b.pkl"))
    optimizer = optim.SGD(lr=1e-1, momentum=0.9, weight_decay=1e-4)
    scheduler = lr_scheduler.StepLR(80, 0.1)
    tqdm_rep = reporters.TQDMReporter(range(args.epochs),
                                      callbacks.AccuracyCallback())
    _callbacks = [tqdm_rep, callbacks.AccuracyCallback()]
    with Trainer(model,
                 optimizer,
                 F.cross_entropy,
                 scheduler=scheduler,
                 callbacks=_callbacks) as trainer:
        for _ in tqdm_rep:
            trainer.train(train_loader)
            trainer.test(valid_loader)
            torch.save(trainer.model.state_dict(), "se_resnet50.pkl")
示例#15
0
def main():
    if args.distributed:
        init_distributed()
    if args.enable_accimage:
        enable_accimage()

    model = resnet50()
    optimizer = optim.SGD(lr=1e-1 * args.batch_size * get_num_nodes() / 256,
                          momentum=0.9,
                          weight_decay=1e-4)
    scheduler = lr_scheduler.MultiStepLR([30, 60, 80])
    c = [callbacks.AccuracyCallback(), callbacks.LossCallback()]
    r = reporters.TQDMReporter(range(args.epochs), callbacks=c)
    tb = reporters.TensorboardReporter(c)
    rep = callbacks.CallbackList(r, tb, callbacks.WeightSave("checkpoints"))
    _train_loader, _test_loader = imagenet_loaders(
        args.root,
        args.batch_size,
        distributed=args.distributed,
        num_train_samples=args.batch_size * 10 if args.debug else None,
        num_test_samples=args.batch_size * 10 if args.debug else None)

    if args.distributed:
        # DistributedSupervisedTrainer sets up torch.distributed
        if args.local_rank == 0:
            print("\nuse DistributedDataParallel\n")
        trainer = DistributedSupervisedTrainer(model,
                                               optimizer,
                                               F.cross_entropy,
                                               callbacks=rep,
                                               scheduler=scheduler,
                                               init_method=args.init_method,
                                               backend=args.backend,
                                               enable_amp=args.enable_amp)
    else:
        use_multi_gpus = torch.cuda.device_count() > 1
        if use_multi_gpus:
            print("\nuse DataParallel\n")
        trainer = SupervisedTrainer(model,
                                    optimizer,
                                    F.cross_entropy,
                                    callbacks=rep,
                                    data_parallel=use_multi_gpus)

    for epoch in r:
        if args.use_prefetcher:
            train_loader = prefetcher.DataPrefetcher(_train_loader)
            test_loader = prefetcher.DataPrefetcher(_test_loader)
        else:
            train_loader, test_loader = _train_loader, _test_loader
        # following apex's training scheme
        trainer.train(train_loader)
        trainer.test(test_loader)

    rep.close()
示例#16
0
def main(cfg):
    if cfg.use_accimage:
        enable_accimage()
    model = MODEL_REGISTRY(cfg.name)(num_classes=10)
    train_loader, test_loader = DATASET_REGISTRY(
        "fast_cifar10" if cfg.use_fast_collate else "cifar10")(
            cfg.batch_size, num_workers=4, use_prefetcher=cfg.use_prefetcher)
    optimizer = None if cfg.bn_no_wd else optim.SGD(
        lr=cfg.lr, momentum=0.9, weight_decay=cfg.weight_decay)
    scheduler = lr_scheduler.CosineAnnealingWithWarmup(cfg.epochs, 4, 5)

    if cfg.bn_no_wd:

        def set_optimizer(trainer):
            bn_params = []
            non_bn_parameters = []
            for name, p in trainer.model.named_parameters():
                if "bn" in name:
                    bn_params.append(p)
                else:
                    non_bn_parameters.append(p)
            optim_params = [
                {
                    "params": bn_params,
                    "weight_decay": 0
                },
                {
                    "params": non_bn_parameters,
                    "weight_decay": cfg.weight_decay
                },
            ]
            trainer.optimizer = torch.optim.SGD(optim_params,
                                                lr=1e-1,
                                                momentum=0.9)

        trainers.SupervisedTrainer.set_optimizer = set_optimizer

    with trainers.SupervisedTrainer(
            model,
            optimizer,
            F.cross_entropy,
            reporters=[reporters.TensorboardReporter('.')],
            scheduler=scheduler,
            use_amp=cfg.use_amp,
            debug=cfg.debug) as trainer:

        for _ in trainer.epoch_range(cfg.epochs):
            trainer.train(train_loader)
            trainer.test(test_loader)
            trainer.scheduler.step()

        print(
            f"Max Test Accuracy={max(trainer.reporter.history('accuracy/test')):.3f}"
        )
示例#17
0
def test(tmp_path, rep, save_freq):
    temp_dir = tmp_path / "test"

    @callbacks.metric_callback_decorator
    def ca(data):
        output, target = data["output"], data["data"][1]
        return {
            i: v
            for i, v in enumerate(metrics.classwise_accuracy(output, target))
        }

    model = nn.Linear(10, 10)
    optimizer = optim.SGD(lr=0.1)

    c = callbacks.CallbackList(
        callbacks.AccuracyCallback(), ca,
        callbacks.WeightSave(save_path=temp_dir, save_freq=save_freq))
    epoch = range(1)
    loader = [(torch.randn(2, 10), torch.zeros(2, dtype=torch.long))
              for _ in range(10)]
    with {
            "tqdm": lambda: reporters.TQDMReporter(epoch, c, temp_dir),
            "logger": lambda: reporters.LoggerReporter(c, temp_dir),
            "tensorboard": lambda: reporters.TensorboardReporter(c, temp_dir)
    }[rep]() as _rep:
        tr = trainers.SupervisedTrainer(model,
                                        optimizer,
                                        F.cross_entropy,
                                        callbacks=_rep,
                                        verb=False)
        if rep == "tqdm":
            epoch = _rep
        for _ in epoch:
            tr.train(loader)
            tr.test(loader)
        tr.exit()

    try:
        # .../test/**/0.pkl
        save_file = list(Path(temp_dir).glob("*/*.pkl"))[0]
    except IndexError as e:
        print(list(Path(temp_dir).glob("*/*")))
        raise e
    tr.resume(save_file)

    c = callbacks.AccuracyCallback()
    with {
            "tqdm": lambda: reporters.TQDMReporter(epoch, c, temp_dir),
            "logger": lambda: reporters.LoggerReporter(c, temp_dir),
            "tensorboard": lambda: reporters.TensorboardReporter(c, temp_dir)
    }[rep]() as _rep:
        inferencer = Inferencer(model, _rep)
        inferencer.load(save_file)
        inferencer.run(loader)
示例#18
0
def main():
    model = MODELS[args.teacher_model](num_classes=10)
    train_loader, test_loader = cifar10_loaders(args.batch_size)
    weight_decay = 1e-4 if "resnet" in args.teacher_model else 5e-4
    lr_decay = 0.1 if "resnet" in args.teacher_model else 0.2
    optimizer = optim.SGD(lr=1e-1, momentum=0.9, weight_decay=weight_decay)
    scheduler = lr_scheduler.MultiStepLR([50, 80], gamma=lr_decay)

    trainer = trainers.SupervisedTrainer(model,
                                         optimizer,
                                         F.cross_entropy,
                                         scheduler=scheduler)
    trainer.logger.info("Train the teacher model!")
    for _ in trange(args.teacher_epochs, ncols=80):
        trainer.train(train_loader)
        trainer.test(test_loader)

    teacher_model = model.eval()

    weight_decay = 1e-4 if "resnet" in args.student_model else 5e-4
    lr_decay = 0.1 if "resnet" in args.student_model else 0.2
    optimizer = optim.SGD(lr=1e-1, momentum=0.9, weight_decay=weight_decay)
    scheduler = lr_scheduler.MultiStepLR([50, 80], gamma=lr_decay)
    model = MODELS[args.student_model](num_classes=10)

    c = [callbacks.AccuracyCallback(), callbacks.LossCallback(), kl_loss]
    with reporters.TQDMReporter(
            range(args.student_epochs),
            callbacks=c) as tq, reporters.TensorboardReporter(c) as tb:
        trainer = DistillationTrainer(model,
                                      optimizer,
                                      F.cross_entropy,
                                      callbacks=[tq, tb],
                                      scheduler=scheduler,
                                      teacher_model=teacher_model,
                                      temperature=args.temperature)
        trainer.logger.info("Train the student model!")
        for _ in tq:
            trainer.train(train_loader)
            trainer.test(test_loader)
示例#19
0
def main(cfg):
    train_loader, test_loader, num_classes = DATASET_REGISTRY(cfg.data.name)(cfg.data.batch_size,
                                                                             return_num_classes=True,
                                                                             num_workers=4)
    model = MODEL_REGISTRY(cfg.model.name)(num_classes=num_classes)
    optimizer = optim.SGD(lr=1e-1, momentum=0.9, weight_decay=cfg.optim.weight_decay)
    scheduler = lr_scheduler.CosineAnnealingWithWarmup(200, 4, 5)

    with Trainer(model, optimizer, naive_cross_entropy_loss, scheduler=scheduler, cfg=cfg.model,
                 num_classes=num_classes) as trainer:
        for _ in trainer.epoch_range(200):
            trainer.train(train_loader)
            trainer.test(test_loader)
        print(f"Max Test Accuracy={max(trainer.reporter.history('accuracy/test')):.4f}")
def test(rep):
    tmpdir = str(gettempdir())
    if rep == "tensorboard" and not is_tensorboardX_available:
        pytest.skip("tensorboardX is not available")

    @callbacks.metric_callback_decorator
    def ca(data):
        output, target = data["output"], data["data"][1]
        return {
            i: v
            for i, v in enumerate(metrics.classwise_accuracy(output, target))
        }

    model = nn.Linear(10, 10)
    optimizer = optim.SGD(lr=0.1)

    c = callbacks.CallbackList(callbacks.AccuracyCallback(), ca,
                               callbacks.WeightSave(tmpdir))
    epoch = range(1)
    loader = [(torch.randn(2, 10), torch.zeros(2, dtype=torch.long))
              for _ in range(10)]
    with {
            "tqdm": lambda: reporters.TQDMReporter(epoch, c, tmpdir),
            "logger": lambda: reporters.LoggerReporter(c, tmpdir),
            "tensorboard": lambda: reporters.TensorboardReporter(c, tmpdir)
    }[rep]() as _rep:
        tr = trainers.SupervisedTrainer(model,
                                        optimizer,
                                        F.cross_entropy,
                                        callbacks=_rep,
                                        verb=False)
        if rep == "tqdm":
            epoch = _rep
        for _ in epoch:
            tr.train(loader)
            tr.test(loader)

    save_file = list(Path(tmpdir).glob("*/*.pkl"))[0]
    tr.resume(save_file)

    c = callbacks.AccuracyCallback()
    with {
            "tqdm": lambda: reporters.TQDMReporter(epoch, c, tmpdir),
            "logger": lambda: reporters.LoggerReporter(c, tmpdir),
            "tensorboard": lambda: reporters.TensorboardReporter(c, tmpdir)
    }[rep]() as _rep:
        inferencer = Inferencer(model, _rep)
        inferencer.load(save_file)
        inferencer.run(loader)
def test_update_scheduler():
    model = nn.Linear(10, 10)
    optimizer = optim.SGD(lr=0.1)
    trainer = trainers.SupervisedTrainer(model, optimizer, F.cross_entropy)
    trainer.update_scheduler(lr_scheduler.LambdaLR(lambda step: 0.1 ** step),
                             update_scheduler_by_epoch=False)
    loader = [(torch.randn(2, 10), torch.zeros(2, dtype=torch.long)) for _ in range(2)]
    trainer.train(loader)
    # lambda calculates the factor!
    assert list(trainer.optimizer.param_groups)[0]['lr'] == 0.1 ** 2

    trainer.update_scheduler(lr_scheduler.LambdaLR(lambda epoch: 0.1 ** epoch, last_epoch=1),
                             update_scheduler_by_epoch=True)
    trainer.train(loader)
    assert list(trainer.optimizer.param_groups)[0]['lr'] == 0.1 ** 3
示例#22
0
def main():
    train_loader, test_loader = get_dataloader(args.batch_size, args.root)
    gpus = list(range(torch.cuda.device_count()))
    se_resnet = nn.DataParallel(se_resnet50(num_classes=1000),
                                device_ids=gpus)
    optimizer = optim.SGD(lr=0.6 / 1024 * args.batch_size, momentum=0.9, weight_decay=1e-4)
    scheduler = lr_scheduler.StepLR(30, gamma=0.1)
    weight_saver = callbacks.WeightSave("checkpoints")
    tqdm_rep = reporter.TQDMReporter(range(args.epochs), callbacks=[callbacks.AccuracyCallback()])

    trainer = Trainer(se_resnet, optimizer, F.cross_entropy, scheduler=scheduler,
                      callbacks=callbacks.CallbackList(weight_saver, tqdm_rep))
    for _ in tqdm_rep:
        trainer.train(train_loader)
        trainer.test(test_loader)
示例#23
0
def main():
    train_loader, test_loader = cifar10_loaders(args.batch_size)
    print(test_loader.sampler)

    if args.baseline:
        model = resnet20()
    else:
        model = se_resnet20(num_classes=10, reduction=args.reduction)
    optimizer = optim.SGD(lr=1e-1, momentum=0.9, weight_decay=1e-4)
    scheduler = lr_scheduler.StepLR(80, 0.1)
    tqdm_rep = reporters.TQDMReporter(range(args.epochs), callbacks.AccuracyCallback())
    _callbacks = [tqdm_rep, callbacks.AccuracyCallback()]
    with Trainer(model, optimizer, F.cross_entropy, scheduler=scheduler, callbacks=_callbacks) as trainer:
        for _ in tqdm_rep:
            trainer.train(train_loader)
            trainer.test(test_loader)
示例#24
0
def main(cfg):
    if cfg.distributed.enable:
        init_distributed(use_horovod=cfg.distributed.use_horovod,
                         backend=cfg.distributed.backend,
                         init_method=cfg.distributed.init_method)
    if cfg.enable_accimage:
        enable_accimage()

    model = resnet50()
    optimizer = optim.SGD(lr=1e-1 * cfg.batch_size * get_num_nodes() / 256,
                          momentum=0.9,
                          weight_decay=1e-4)
    scheduler = lr_scheduler.MultiStepLR([30, 60, 80])
    tq = reporters.TQDMReporter(range(cfg.epochs))
    c = [
        callbacks.AccuracyCallback(),
        callbacks.AccuracyCallback(k=5),
        callbacks.LossCallback(), tq,
        reporters.TensorboardReporter("."),
        reporters.IOReporter(".")
    ]
    _train_loader, _test_loader = imagenet_loaders(
        cfg.root,
        cfg.batch_size,
        distributed=cfg.distributed.enable,
        num_train_samples=cfg.batch_size * 10 if cfg.debug else None,
        num_test_samples=cfg.batch_size * 10 if cfg.debug else None)

    use_multi_gpus = not cfg.distributed.enable and torch.cuda.device_count(
    ) > 1
    with SupervisedTrainer(model,
                           optimizer,
                           F.cross_entropy,
                           callbacks=c,
                           scheduler=scheduler,
                           data_parallel=use_multi_gpus,
                           use_horovod=cfg.distributed.use_horovod) as trainer:

        for epoch in tq:
            if cfg.use_prefetcher:
                train_loader = prefetcher.DataPrefetcher(_train_loader)
                test_loader = prefetcher.DataPrefetcher(_test_loader)
            else:
                train_loader, test_loader = _train_loader, _test_loader
            # following apex's training scheme
            trainer.train(train_loader)
            trainer.test(test_loader)
示例#25
0
def train_and_eval(cfg: BaseConfig):
    if cfg.path is None:
        print('cfg.path is None, so FasterAutoAugment is not used')
        policy = None
    else:
        path = Path(hydra.utils.get_original_cwd()) / cfg.path
        assert path.exists()
        policy_weight = torch.load(path, map_location='cpu')
        policy = Policy.faster_auto_augment_policy(
            num_chunks=cfg.model.num_chunks, **policy_weight['policy_kwargs'])
        policy.load_state_dict(policy_weight['policy'])
    train_loader, test_loader, num_classes = DATASET_REGISTRY(cfg.data.name)(
        batch_size=cfg.data.batch_size,
        drop_last=True,
        download=cfg.data.download,
        return_num_classes=True,
        norm=[
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ],
        num_workers=4)
    model = MODEL_REGISTRY(cfg.model.name)(num_classes)
    optimizer = optim.SGD(cfg.optim.lr,
                          momentum=cfg.optim.momentum,
                          weight_decay=cfg.optim.weight_decay,
                          nesterov=cfg.optim.nesterov)
    scheduler = lr_scheduler.CosineAnnealingWithWarmup(
        cfg.optim.epochs, cfg.optim.scheduler.mul, cfg.optim.scheduler.warmup)
    tqdm = callbacks.TQDMReporter(range(cfg.optim.epochs))
    c = [callbacks.LossCallback(), callbacks.AccuracyCallback(), tqdm]
    with EvalTrainer(model,
                     optimizer,
                     F.cross_entropy,
                     callbacks=c,
                     scheduler=scheduler,
                     policy=policy,
                     cfg=cfg.model,
                     use_cuda_nonblocking=True) as trainer:
        for _ in tqdm:
            trainer.train(train_loader)
            trainer.test(test_loader)
    print(f"Min. Error Rate: {1 - max(c[1].history['test']):.3f}")
示例#26
0
def main():
    Trainer = trainers.SupervisedTrainer if args.baseline else MixupTrainer
    model = MODELS[args.model](num_classes=NUMCLASSES[args.dataset])
    train_loader, test_loader = DATASETS[args.dataset](args.batch_size)
    optimizer = optim.SGD(lr=1e-1, momentum=0.9, weight_decay=1e-4)
    scheduler = lr_scheduler.MultiStepLR(args.steps, gamma=0.1)
    c = [callbacks.AccuracyCallback(), callbacks.LossCallback()]

    with reporters.TQDMReporter(
            range(args.epochs),
            callbacks=c) as tq, reporters.TensorboardReporter(c) as tb:
        trainer = Trainer(model,
                          optimizer,
                          naive_cross_entropy_loss,
                          callbacks=[tq, tb],
                          scheduler=scheduler,
                          alpha=args.alpha,
                          num_classes=NUMCLASSES[args.dataset])
        for _ in tq:
            trainer.train(train_loader)
            trainer.test(test_loader)
示例#27
0
    import miniargs
    from torch.nn import functional as F

    p = miniargs.ArgumentParser()
    p.add_int("--batch_size", default=128)
    p.add_int("--epochs", default=300)
    p.add_str("--optimizer", choices=["sgd", "adam"])
    p.add_float("--lr", default=1e-2)
    p.add_multi_str("--group", default=["conv1", "layer1", "layer2", "layer3"])
    p.add_int("--step", default=50)
    p.add_int("--num_convs", default=3)
    p.add_int("--num_fcs", default=3)
    args = p.parse()

    optimizer = {"adam": optim.Adam(lr=3e-4, weight_decay=1e-4),
                 "sgd": optim.SGD(lr=args.lr, momentum=0.9, weight_decay=1e-4)}[args.optimizer]

    train_loader, test_loader = cifar10_loaders(args.batch_size)
    resnet = module_converter(resnet56(num_classes=10), keys=["conv1", "bn1", "relu", "layer1", "layer2", "layer3"])
    aux = nn.ModuleDict(OrderedDict({k: v for k, v in {
        # 32x32
        "conv1": generate_aux(32, 16, 10, args.num_convs, args.num_fcs),
        # 32x32
        "layer1": generate_aux(32, 16, 10, args.num_convs, args.num_fcs),
        # 16x16
        "layer2": generate_aux(16, 32, 10, args.num_convs, args.num_fcs),
        # 8x8
        "layer3": generate_aux(8, 64, 10, args.num_convs, args.num_fcs),
    }.items() if k in args.group}))
    model = NaiveGreedyModule(resnet, aux=aux,
                              tail=nn.Sequential(nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(64, 10)))
示例#28
0
def main(cfg):
    if cfg.use_accimage:
        enable_accimage()
    data = DATASET_REGISTRY(cfg.data).setup(
        cfg.batch_size,
        num_workers=4,
        download=cfg.download,
        prefetch_factor=cfg.prefetch_factor,
        persistent_workers=cfg.persistent_workers)
    model = MODEL_REGISTRY(cfg.model)(num_classes=data.num_classes)
    optimizer = None if cfg.bn_no_wd else optim.SGD(
        lr=cfg.lr,
        momentum=0.9,
        weight_decay=cfg.weight_decay,
        multi_tensor=cfg.use_multi_tensor)
    scheduler = lr_scheduler.CosineAnnealingWithWarmup(cfg.epochs, 4, 5)

    if cfg.bn_no_wd:

        def set_optimizer(trainer):
            bn_params = []
            non_bn_parameters = []
            for name, p in trainer.model.named_parameters():
                if "norm" in name:
                    bn_params.append(p)
                else:
                    non_bn_parameters.append(p)
            optim_params = [
                {
                    "params": bn_params,
                    "weight_decay": 0
                },
                {
                    "params": non_bn_parameters,
                    "weight_decay": cfg.weight_decay
                },
            ]
            trainer.optimizer = torch.optim.SGD(optim_params,
                                                lr=1e-1,
                                                momentum=0.9)

        trainers.SupervisedTrainer.set_optimizer = set_optimizer

    with trainers.SupervisedTrainer(
            model,
            optimizer,
            F.cross_entropy,
            reporters=[reporters.TensorboardReporter('.')],
            scheduler=scheduler,
            use_amp=cfg.use_amp,
            use_channel_last=cfg.use_channel_last,
            debug=cfg.debug) as trainer:

        for _ in trainer.epoch_range(cfg.epochs):
            trainer.train(data.train_loader)
            trainer.test(data.test_loader)
            trainer.scheduler.step()

        print(
            f"Max Test Accuracy={max(trainer.reporter.history('accuracy/test')):.3f}"
        )