Exemplo n.º 1
0
def set_scheduler(optimizer, scheduler_name, **kwargs):
    """
    Set the scheduler on learning rate for the optimizer.

    Reference:
        https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
    """

    if scheduler_name == "LambdaLR":
        scheduler = lr_scheduler.LambdaLR(optimizer, **kwargs)
    elif scheduler_name == "MultiplicativeLR":
        scheduler = lr_scheduler.MultiplicativeLR(optimizer, **kwargs)
    elif scheduler_name == "StepLR":
        scheduler = lr_scheduler.StepLR(optimizer, **kwargs)
    elif scheduler_name == "MultiStepLR":
        scheduler = lr_scheduler.MultiStepLR(optimizer, **kwargs)
    elif scheduler_name == "ExponentialLR":
        scheduler = lr_scheduler.ExponentialLR(optimizer, **kwargs)
    elif scheduler_name == "CosineAnnealingLR":
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer, **kwargs)
    elif scheduler_name == "ReduceLROnPlateau":
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, **kwargs)
    else:
        msg = ("Unknown name of the scheduler {}, should be one of"
               " {{LambdaLR, MultiplicativeLR, StepLR, MultiStepLR,"
               " ExponentialLR, CosineAnnealingLR, ReduceLROnPlateau,"
               " CyclicLR, OneCycleLR, CosineAnnealingWarmRestarts}}.")
        raise NotImplementedError(msg.format(scheduler_name))

    return scheduler
Exemplo n.º 2
0
def main(args):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Lambda(lambda x: distributions.Bernoulli(probs=x).sample())
    ])
    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        './data', train=True, download=True, transform=transform),
                                               batch_size=args.batch_size,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        './data', train=False, download=True, transform=transform),
                                              batch_size=args.batch_size)

    model = MODEL_MAP[args.model](in_channels=1)
    optimizer = optim.Adam(model.parameters())
    scheduler = lr_scheduler.MultiplicativeLR(optimizer, lambda _: 0.9984)

    criterion = nn.BCELoss(reduction='none')

    def loss_fn(x, _, preds):
        batch_size = x.shape[0]
        x, preds = x.view((batch_size, -1)), preds.view((batch_size, -1))
        return criterion(preds, x).sum(dim=1).mean()

    trainer = pg.trainer.Trainer(model,
                                 loss_fn,
                                 optimizer,
                                 train_loader,
                                 test_loader,
                                 lr_scheduler=scheduler,
                                 log_dir=args.log_dir,
                                 save_checkpoint_epochs=1)
    trainer.interleaved_train_and_eval(n_epochs=args.n_epochs)
Exemplo n.º 3
0
def reproduce(n_epochs=457,
              batch_size=256,
              log_dir="/tmp/run",
              device="cuda",
              debug_loader=None):
    """Training script with defaults to reproduce results.

    The code inside this function is self contained and can be used as a top level
    training script, e.g. by copy/pasting it into a Jupyter notebook.

    Args:
        n_epochs: Number of epochs to train for.
        batch_size: Batch size to use for training and evaluation.
        log_dir: Directory where to log trainer state and TensorBoard summaries.
        device: Device to train on (either 'cuda' or 'cpu').
        debug_loader: Debug DataLoader which replaces the default training and
            evaluation loaders if not 'None'. Do not use unless you're writing unit
            tests.
    """
    from torch import optim
    from torch.nn import functional as F
    from torch.optim import lr_scheduler

    from pytorch_generative import datasets
    from pytorch_generative import models
    from pytorch_generative import trainer

    train_loader, test_loader = debug_loader, debug_loader
    if train_loader is None:
        train_loader, test_loader = datasets.get_mnist_loaders(
            batch_size, dynamically_binarize=True)

    model = models.PixelCNN(
        in_channels=1,
        out_channels=1,
        n_residual=15,
        residual_channels=16,
        head_channels=32,
    )
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    scheduler = lr_scheduler.MultiplicativeLR(optimizer,
                                              lr_lambda=lambda _: 0.999977)

    def loss_fn(x, _, preds):
        batch_size = x.shape[0]
        x, preds = x.view((batch_size, -1)), preds.view((batch_size, -1))
        loss = F.binary_cross_entropy_with_logits(preds, x, reduction="none")
        return loss.sum(dim=1).mean()

    trainer = trainer.Trainer(
        model=model,
        loss_fn=loss_fn,
        optimizer=optimizer,
        train_loader=train_loader,
        eval_loader=test_loader,
        lr_scheduler=scheduler,
        log_dir=log_dir,
        device=device,
    )
    trainer.interleaved_train_and_eval(n_epochs)
Exemplo n.º 4
0
def main(args):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Lambda(lambda x: distributions.Bernoulli(probs=x).sample())
    ])

    ###################################

    ##### Load MNISIT ####

    #train_loader = torch.utils.data.DataLoader(
    #        datasets.MNIST('./data', train=True, download=True, transform=transform),
    #        batch_size=args.batch_size, shuffle=True)
    #test_loader = torch.utils.data.DataLoader(
    #        datasets.MNIST('./data', train=False, download=True, transform=transform),
    #        batch_size=args.batch_size)

    ##### Load ImageNet ####

    path_train = "/home/dsi/eyalbetzalel/pytorch-generative-v2/pytorch-generative-v2/imagenet64/train"
    datasetTrain = datasets.ImageFolder(path_train, transform=transform)
    path_test = "/home/dsi/eyalbetzalel/pytorch-generative-v2/pytorch-generative-v2/imagenet64/test"
    datasetTest = datasets.ImageFolder(path_test, transform=transform)

    print("Loading ImageNet Dataset (Long)")
    train_loader = torch.utils.data.DataLoader(datasetTrain,
                                               batch_size=args.batch_size,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(datasetTest,
                                              batch_size=args.batch_size)
    print("Finish Loading ImageNet Dataset")
    ###################################

    model = MODEL_MAP[args.model](in_channels=1)
    optimizer = optim.Adam(model.parameters())
    scheduler = lr_scheduler.MultiplicativeLR(optimizer, lambda _: 0.9984)

    criterion = nn.BCELoss(reduction='none')

    def loss_fn(x, _, preds):
        batch_size = x.shape[0]
        x, preds = x.view((batch_size, -1)), preds.view((batch_size, -1))
        return criterion(preds, x).sum(dim=1).mean()

    trainer = pg.trainer.Trainer(model,
                                 loss_fn,
                                 optimizer,
                                 train_loader,
                                 test_loader,
                                 lr_scheduler=scheduler,
                                 log_dir=args.log_dir,
                                 save_checkpoint_epochs=1)
    trainer.interleaved_train_and_eval(n_epochs=args.n_epochs)
Exemplo n.º 5
0
 def __init__(
     self,
     lr_lambda: Union[Callable[[int], float], List[Callable[[int], float]]],
     last_epoch: int = -1,
     step_on_batch: bool = False,
 ):
     """Constructor for MultiplicativeLR."""
     super().__init__(
         lambda opt: _schedulers.MultiplicativeLR(
             opt, lr_lambda, last_epoch=last_epoch),
         step_on_batch=step_on_batch,
     )
Exemplo n.º 6
0
 def __init__(self,
              lr_lambda: Union[Callable[[int], float],
                               List[Callable[[int], float]]],
              last_epoch: int = -1,
              step_on_iteration: bool = False):
     from distutils.version import LooseVersion
     if LooseVersion(torch.__version__) >= LooseVersion("1.4.0"):
         super().__init__(lambda opt: _scheduler.MultiplicativeLR(
             opt, lr_lambda, last_epoch=last_epoch),
                          step_on_iteration=step_on_iteration)
     else:
         raise ImportError("Update torch>=1.4.0 to use 'MultiplicativeLR'")
Exemplo n.º 7
0
    def __init__(
        self,
        lr_lambda: Union[Callable[[int], float], List[Callable[[int], float]]],
        last_epoch: int = -1,
        step_on_batch: bool = False,
    ):
        """Constructor for MultiplicativeLR.

        Args:
            lr_lambda (function or list of functions): A function which computes a
                multiplicative factor given an integer parameter epoch, or a list
                of such functions, one for each group in an optimizer.param_groups.
            last_epoch (int): The index of last epoch. Default: -1.
            step_on_batch (bool): Step on each training iteration rather than each epoch.
                Defaults to False.
        """
        super().__init__(
            lambda opt: _schedulers.MultiplicativeLR(
                opt, lr_lambda, last_epoch=last_epoch),
            step_on_batch=step_on_batch,
        )
Exemplo n.º 8
0
def reproduce(n_epochs=457,
              batch_size=128,
              log_dir="/tmp/run",
              device="cuda",
              debug_loader=None):
    """Training script with defaults to reproduce results.

    The code inside this function is self contained and can be used as a top level
    training script, e.g. by copy/pasting it into a Jupyter notebook.

    Args:
        n_epochs: Number of epochs to train for.
        batch_size: Batch size to use for training and evaluation.
        log_dir: Directory where to log trainer state and TensorBoard summaries.
        device: Device to train on (either 'cuda' or 'cpu').
        debug_loader: Debug DataLoader which replaces the default training and
            evaluation loaders if not 'None'. Do not use unless you're writing unit
            tests.
    """
    from torch import optim
    from torch.nn import functional as F
    from torch.optim import lr_scheduler
    from torch.utils import data
    from torchvision import datasets
    from torchvision import transforms

    from pytorch_generative import trainer
    from pytorch_generative import models

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])
    train_loader = data.DataLoader(
        datasets.CIFAR10("tmp/data",
                         train=True,
                         download=True,
                         transform=transform),
        batch_size=batch_size,
        shuffle=True,
        pin_memory=True,
        num_workers=2,
    )
    test_loader = data.DataLoader(
        datasets.CIFAR10("tmp/data",
                         train=False,
                         download=True,
                         transform=transform),
        batch_size=batch_size,
        pin_memory=True,
        num_workers=2,
    )

    model = models.VQVAE(
        in_channels=3,
        out_channels=3,
        hidden_channels=128,
        residual_channels=32,
        n_residual_blocks=2,
        n_embeddings=512,
        embedding_dim=64,
    )
    optimizer = optim.Adam(model.parameters(), lr=2e-4)
    scheduler = lr_scheduler.MultiplicativeLR(optimizer,
                                              lr_lambda=lambda _: 0.999977)

    def loss_fn(x, _, preds):
        preds, vq_loss = preds
        recon_loss = F.mse_loss(preds, x)
        loss = recon_loss + vq_loss

        return {
            "vq_loss": vq_loss,
            "reconstruction_loss": recon_loss,
            "loss": loss,
        }

    model_trainer = trainer.Trainer(
        model=model,
        loss_fn=loss_fn,
        optimizer=optimizer,
        train_loader=train_loader,
        eval_loader=test_loader,
        lr_scheduler=scheduler,
        log_dir=log_dir,
        device=device,
    )
    model_trainer.interleaved_train_and_eval(n_epochs)
Exemplo n.º 9
0
def reproduce(
    n_epochs=457,
    batch_size=64,
    log_dir="/tmp/run",
    n_gpus=1,
    device_id=0,
    debug_loader=None,
):
    """Training script with defaults to reproduce results.

    The code inside this function is self contained and can be used as a top level
    training script, e.g. by copy/pasting it into a Jupyter notebook.

    Args:
        n_epochs: Number of epochs to train for.
        batch_size: Batch size to use for training and evaluation.
        log_dir: Directory where to log trainer state and TensorBoard summaries.
        n_gpus: Number of GPUs to use for training the model. If 0, uses CPU.
        device_id: The device_id of the current GPU when training on multiple GPUs.
        debug_loader: Debug DataLoader which replaces the default training and
            evaluation loaders if not 'None'. Do not use unless you're writing unit
            tests.
    """
    from torch import optim
    from torch.nn import functional as F
    from torch.optim import lr_scheduler

    from pytorch_generative import datasets
    from pytorch_generative import models
    from pytorch_generative import trainer

    train_loader, test_loader = debug_loader, debug_loader
    if train_loader is None:
        train_loader, test_loader = datasets.get_mnist_loaders(
            batch_size, dynamically_binarize=True)

    model = models.ImageGPT(
        in_channels=1,
        out_channels=1,
        in_size=28,
        n_transformer_blocks=8,
        n_attention_heads=2,
        n_embedding_channels=64,
    )
    optimizer = optim.Adam(model.parameters(), lr=5e-3)
    scheduler = lr_scheduler.MultiplicativeLR(optimizer,
                                              lr_lambda=lambda _: 0.999977)

    def loss_fn(x, _, preds):
        batch_size = x.shape[0]
        x, preds = x.view((batch_size, -1)), preds.view((batch_size, -1))
        loss = F.binary_cross_entropy_with_logits(preds, x, reduction="none")
        return loss.sum(dim=1).mean()

    model_trainer = trainer.Trainer(
        model=model,
        loss_fn=loss_fn,
        optimizer=optimizer,
        train_loader=train_loader,
        eval_loader=test_loader,
        lr_scheduler=scheduler,
        log_dir=log_dir,
        n_gpus=n_gpus,
        device_id=device_id,
    )
    model_trainer.interleaved_train_and_eval(n_epochs)
Exemplo n.º 10
0
                               shuffle=False,
                               num_workers=0)
print('Creating eval loader')
eval_set = ElisaDataset('elisadata/standard', 'EVALUATE')
eval_loader = data.DataLoader(dataset=eval_set,
                              batch_size=args.eval_batch_size,
                              shuffle=False,
                              num_workers=0)

elisa_net = network.ElisaNet(args.c_feat).cuda()

params = [{'params': elisa_net.parameters()}]
solver = optim.Adam(params, lr=args.lr)

lmda = lambda x: 0.5  # TODO: can change this based on bad_epochs
scheduler = LS.MultiplicativeLR(solver, lr_lambda=lmda)

es = EarlyStopping(mode=args.es_mode,
                   min_delta=args.loss_delta,
                   patience=args.patience)

epoch = 0

if args.resume_epoch != 0:
    load_weights([elisa_net], solver, args.resume_epoch, args)
    epoch = args.resume_epoch
    solver = lr_resume(solver, args.lr_resume)
    print('Loaded weights from epoch {}'.format(args.resume_epoch))

while epoch < args.epochs and not args.eval:
    epoch += 1
Exemplo n.º 11
0
def reproduce(n_epochs=457,
              batch_size=64,
              log_dir="/tmp/run",
              device="cuda",
              debug_loader=None):
    """Training script with defaults to reproduce results.

    The code inside this function is self contained and can be used as a top level
    training script, e.g. by copy/pasting it into a Jupyter notebook.

    Args:
        n_epochs: Number of epochs to train for.
        batch_size: Batch size to use for training and evaluation.
        log_dir: Directory where to log trainer state and TensorBoard summaries.
        device: Device to train on (either 'cuda' or 'cpu').
        debug_loader: Debug DataLoader which replaces the default training and
            evaluation loaders if not 'None'. Do not use unless you're writing unit
            tests.
    """
    from torch import optim
    from torch.nn import functional as F
    from torch.optim import lr_scheduler
    from torch.utils import data
    from torchvision import datasets
    from torchvision import transforms

    from pytorch_generative import trainer
    from pytorch_generative import models

    transform = transforms.Compose([
        transforms.ToTensor(),
        lambda x: distributions.Bernoulli(probs=x).sample()
    ])
    train_loader = debug_loader or data.DataLoader(
        datasets.MNIST(
            "/tmp/data", train=True, download=True, transform=transform),
        batch_size=batch_size,
        shuffle=True,
        num_workers=8,
    )
    test_loader = debug_loader or data.DataLoader(
        datasets.MNIST(
            "/tmp/data", train=False, download=True, transform=transform),
        batch_size=batch_size,
        num_workers=8,
    )

    model = models.ImageGPT(
        in_channels=1,
        out_channels=1,
        in_size=28,
        n_transformer_blocks=8,
        n_attention_heads=2,
        n_embedding_channels=64,
    )
    optimizer = optim.Adam(model.parameters(), lr=5e-3)
    scheduler = lr_scheduler.MultiplicativeLR(optimizer,
                                              lr_lambda=lambda _: 0.999977)

    bce_loss_fn = nn.BCEWithLogitsLoss(reduction="none")

    def loss_fn(x, _, preds):
        batch_size = x.shape[0]
        x, preds = x.view((batch_size, -1)), preds.view((batch_size, -1))
        loss = F.binary_cross_entropy_with_logits(preds, x, reduction="none")
        return loss.sum(dim=1).mean()

    model_trainer = trainer.Trainer(
        model=model,
        loss_fn=loss_fn,
        optimizer=optimizer,
        train_loader=train_loader,
        eval_loader=test_loader,
        lr_scheduler=scheduler,
        log_dir=log_dir,
        device=device,
    )
    model_trainer.interleaved_train_and_eval(n_epochs)
Exemplo n.º 12
0
def reproduce(n_epochs=457,
              batch_size=128,
              log_dir="/tmp/run",
              device="cuda",
              debug_loader=None):
    """Training script with defaults to reproduce results.

    The code inside this function is self contained and can be used as a top level
    training script, e.g. by copy/pasting it into a Jupyter notebook.

    Args:
        n_epochs: Number of epochs to train for.
        batch_size: Batch size to use for training and evaluation.
        log_dir: Directory where to log trainer state and TensorBoard summaries.
        device: Device to train on (either 'cuda' or 'cpu').
        debug_loader: Debug DataLoader which replaces the default training and
            evaluation loaders if not 'None'. Do not use unless you're writing unit
            tests.
    """
    from torch import optim
    from torch.nn import functional as F
    from torch.optim import lr_scheduler
    from torch.utils import data
    from torchvision import datasets
    from torchvision import transforms

    from pytorch_generative import trainer
    from pytorch_generative import models

    transform = transforms.ToTensor()
    train_loader = debug_loader or data.DataLoader(
        datasets.MNIST(
            "/tmp/data", train=True, download=True, transform=transform),
        batch_size=batch_size,
        shuffle=True,
        num_workers=8,
    )
    test_loader = debug_loader or data.DataLoader(
        datasets.MNIST(
            "/tmp/data", train=False, download=True, transform=transform),
        batch_size=batch_size,
        num_workers=8,
    )

    model = models.VAE(
        in_channels=1,
        out_channels=1,
        in_size=28,
        latent_dim=10,
        hidden_channels=32,
        n_residual_blocks=2,
        residual_channels=16,
    )
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    scheduler = lr_scheduler.MultiplicativeLR(optimizer,
                                              lr_lambda=lambda _: 0.999977)

    def loss_fn(x, _, preds):
        preds, vae_loss = preds
        recon_loss = F.binary_cross_entropy_with_logits(preds, x)
        loss = recon_loss * 100 + vae_loss
        return {
            "recon_loss": recon_loss,
            "vae_loss": vae_loss,
            "loss": loss,
        }

    def sample_fn(model):
        return torch.sigmoid(model.sample(n_images=64))

    model_trainer = trainer.Trainer(
        model=model,
        loss_fn=loss_fn,
        optimizer=optimizer,
        train_loader=train_loader,
        eval_loader=test_loader,
        lr_scheduler=scheduler,
        sample_epochs=5,
        sample_fn=sample_fn,
        log_dir=log_dir,
        device=device,
    )
    model_trainer.interleaved_train_and_eval(n_epochs)
Exemplo n.º 13
0
    def __init__(
            self,
            model=None,
            device=None,
            hparams=dict(),
            name='',
    ):
        # reproducibility
        torch.manual_seed(config.seed)
        np.random.seed(config.seed)
        torch.set_default_tensor_type('torch.FloatTensor')

        context = dict()
        context['hparams'] = hparams
        context['max_epoch'] = hparams.get('max_epoch', config.max_epoch)
        context['normal_classes'] = hparams.get('normal_classes', config.normal_classes)

        # acquiring device cuda if available
        context[constants.DEVICE] = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print("device:", context[constants.DEVICE])

        transform_train = transforms.Compose([
            transforms.RandomResizedCrop(size=32, scale=(0.2, 1.)),
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
            transforms.RandomGrayscale(p=0.2),
            # transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
        ])

        transform_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
        ])
        print('loading training data')
        context['train_data'] = DatasetSelection(
            hparams.get('dataset', config.dataset),
            classes=context['normal_classes'], train=True, return_indexes=True,
            transform=transform_train)
        print('loading test data')
        context['test_data'] = DatasetSelection(hparams.get('dataset', config.dataset), train=False,
                                                transform=transform_test)

        context['input_shape'] = context['train_data'].input_shape()

        print('initializing data loaders')
        context['train_loader'] = context['train_data'].get_dataloader(
            shuffle=True, batch_size=hparams.get('train_batch_size', config.train_batch_size))
        context['test_loader'] = context['test_data'].get_dataloader(
            shuffle=False, batch_size=hparams.get('test_batch_size', config.test_batch_size))

        print('initializing models')
        context['models'] = model or DeepSVDD(
            train_data=context['train_data'],
            latent_size=hparams.get('latent_size', model_config.latent_size),
            nce_t=hparams.get('nce_t', model_config.nce_t),
            nce_k=hparams.get('nce_k', model_config.nce_k),
            nce_m=hparams.get('nce_m', model_config.nce_m),
            device=device
        ).to(context[constants.DEVICE])
        context["models"].resnet = context["models"].resnet.to(context[constants.DEVICE])
        print('initializing center - ', end='')
        context["models"].init_center(
            context[constants.DEVICE], init_zero=hparams.get('zero_centered', False))
        print(context["models"].center.mean())
        checkpoint = hparams.get('checkpoint', config.checkpoint_drmade)
        if checkpoint:
            context["models"].load(checkpoint, context[constants.DEVICE])

        print(f'models: {context["models"].name} was initialized')

        base_lr = hparams.get('base_lr', model_config.deepsvdd_sgd_base_lr)
        lr_decay = hparams.get('lr_decay', model_config.deepsvdd_sgd_lr_decay)
        lr_schedule = hparams.get('lr_schedule', model_config.deepsvdd_sgd_schedule)

        sgd_momentum = hparams.get('sgd_momentum', model_config.deepsvdd_sgd_momentum)
        sgd_weight_decay = hparams.get('sgd_weight_deecay', model_config.deepsvdd_sgd_weight_decay)
        pgd_eps = hparams.get('pgd/eps', model_config.deepsvdd_pgd_eps)
        pgd_iterations = hparams.get('pgd/iterations', model_config.deepsvdd_pgd_iterations)
        pgd_alpha = hparams.get('pgd/alpha', model_config.deepsvdd_pgd_alpha)
        pgd_randomize = hparams.get('pgd/randomize', model_config.deepsvdd_pgd_randomize)

        radius_factor = hparams.get('radius_factor', model_config.radius_factor)
        nce_factor = hparams.get('nce_factor', model_config.nce_factor)

        print(f'initializing optimizer SGD - base_lr:{base_lr}')
        optimizer = SGD(
            context['models'].resnet.parameters(), lr=base_lr,
            momentum=sgd_momentum,
            weight_decay=sgd_weight_decay,
        )
        context['optimizers'] = [optimizer]
        context['optimizer/sgd'] = optimizer

        print(f'initializing learning rate scheduler - lr_decay:{lr_decay} half_schedule:{lr_schedule}')
        context['lr_multiplicative_factor_lambda'] = lambda epoch: 0.1 \
            if (epoch + 1) % lr_schedule == 0 else lr_decay
        scheduler = lr_scheduler.MultiplicativeLR(
            optimizer, lr_lambda=context['lr_multiplicative_factor_lambda'], last_epoch=-1)
        context['schedulers'] = [scheduler]
        context['scheduler/sgd'] = scheduler

        # setting up tensorboard data summerizer
        context['name'] = name or '{}{}-{}{}{}{}|SGDm{}wd{}-baselr{}-decay{}-0.1schedule{}'.format(
            hparams.get('dataset', config.dataset).__name__,
            '{}'.format(
                '' if not context['normal_classes'] else '[' + ','.join(
                    str(i) for i in hparams.get('normal_classes', config.normal_classes)) + ']'
            ),
            context['models'].name,
            f'|NCE{nce_factor}' if nce_factor else '',
            f'|Radius{radius_factor}' if radius_factor else '',
            '' if not pgd_eps else '|pgd-eps{}-iterations{}alpha{}{}'.format(
                pgd_eps, pgd_iterations, pgd_alpha, 'randomized' if pgd_randomize else '',
            ),
            sgd_momentum,
            sgd_weight_decay,
            base_lr,
            lr_decay,
            lr_schedule,
        )
        super(DeepSVDDTrainer, self).__init__(context['name'], context, )

        attacker = PGDAttackAction(
            Radius('radius'), eps=pgd_eps, iterations=pgd_iterations,
            randomize=pgd_randomize, alpha=pgd_alpha)

        train_loop = RobustNCEDeepSVDDLoop(
            name='train',
            data_loader=context['train_loader'],
            device=context[constants.DEVICE],
            optimizers=('sgd',),
            attacker=attacker,
            log_interval=hparams.get('log_interval', config.log_data_feed_loop_interval),
        )

        self.context['loops'] = [train_loop]
        print('setting up writer')
        self.setup_writer()
        print('trainer', context['name'], 'is ready!')
Exemplo n.º 14
0
def reproduce(n_epochs=457,
              batch_size=128,
              log_dir="/tmp/run",
              device="cuda",
              debug_loader=None):
    """Training script with defaults to reproduce results.

    The code inside this function is self contained and can be used as a top level
    training script, e.g. by copy/pasting it into a Jupyter notebook.

    Args:
        n_epochs: Number of epochs to train for.
        batch_size: Batch size to use for training and evaluation.
        log_dir: Directory where to log trainer state and TensorBoard summaries.
        device: Device to train on (either 'cuda' or 'cpu').
        debug_loader: Debug DataLoader which replaces the default training and
            evaluation loaders if not 'None'. Do not use unless you're writing unit
            tests.
    """
    from torch import optim
    from torch.nn import functional as F
    from torch.optim import lr_scheduler

    from pytorch_generative import datasets
    from pytorch_generative import models
    from pytorch_generative import trainer

    train_loader, test_loader = debug_loader, debug_loader
    if train_loader is None:
        train_loader, test_loader = datasets.get_mnist_loaders(batch_size)

    model = models.VAE(
        in_channels=1,
        out_channels=1,
        latent_channels=2,
        hidden_channels=128,
        residual_channels=32,
    )
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    scheduler = lr_scheduler.MultiplicativeLR(optimizer,
                                              lr_lambda=lambda _: 0.999977)

    def loss_fn(x, _, preds):
        preds, vae_loss = preds
        recon_loss = F.binary_cross_entropy_with_logits(preds,
                                                        x,
                                                        reduction="none")
        recon_loss = recon_loss.mean(dim=(1, 2, 3))
        loss = recon_loss + vae_loss
        return {
            "recon_loss": recon_loss.mean(),
            "vae_loss": vae_loss.mean(),
            "loss": loss.mean(),
        }

    def sample_fn(model):
        return torch.sigmoid(model.sample(n_samples=16))

    model_trainer = trainer.Trainer(
        model=model,
        loss_fn=loss_fn,
        optimizer=optimizer,
        train_loader=train_loader,
        eval_loader=test_loader,
        lr_scheduler=scheduler,
        sample_epochs=1,
        sample_fn=sample_fn,
        log_dir=log_dir,
        device=device,
    )
    model_trainer.interleaved_train_and_eval(n_epochs)
Exemplo n.º 15
0
    def __init__(self, hparams: dict = None, name=None, drmade=None, device=None, checkpoint_path=None):
        if checkpoint_path:
            self.load_checkpoint(checkpoint_path)
            return
        super().__init__(hparams=hparams, name=name, drmade=drmade, device=device)

        hparams = self.get(constants.HPARAMS_DICT)

        # pgd encoder made inputs
        input_limits = self.get('drmade').decoder.output_limits
        pgd_eps = hparams.get('pgd/eps', model_config.pretrain_encoder_made_pgd_eps)
        pgd_iterations = hparams.get('pgd/iterations', model_config.pretrain_encoder_made_pgd_iterations)
        pgd_alpha = hparams.get('pgd/alpha', model_config.pretrain_encoder_made_pgd_alpha)
        pgd_randomize = hparams.get('pgd/randomize', model_config.pretrain_encoder_made_pgd_randomize)
        pgd_input = {'eps': pgd_eps, 'iterations': pgd_iterations, 'alpha': pgd_alpha, 'randomize': pgd_randomize,
                     'input_limits': input_limits}
        # pgd latent
        latent_input_limits = self.get('drmade').encoder.output_limits
        pgd_latent_eps = hparams.get('pgd_latent/eps', model_config.pretrain_made_pgd_eps)
        pgd_latent_iterations = hparams.get('pgd_latent/iterations', model_config.pretrain_made_pgd_iterations)
        pgd_latent_alpha = hparams.get('pgd_latent/alpha', model_config.pretrain_made_pgd_alpha)
        pgd_latent_randomize = hparams.get('pgd_latent/randomize', model_config.pretrain_made_pgd_randomize)
        pgd_latent = {'eps': pgd_latent_eps, 'iterations': pgd_latent_iterations, 'alpha': pgd_latent_alpha,
                      'randomize': pgd_latent_randomize, 'input_limits': latent_input_limits}

        lr_decay = hparams.get('lr_decay', model_config.lr_decay)
        lr_schedule = hparams.get('lr_schedule', model_config.lr_schedule)

        # freezing unnecessary model layers
        print('freezing decoder')
        for parameter in self.get('drmade').decoder.parameters():
            parameter.requires_grad = False

        print('unfreezing encoder')
        for parameter in self.get('drmade').encoder.parameters():
            parameter.requires_grad = True

        freeze_encoder = hparams.get('freeze_encoder', False)
        made_only, freeze_encoder_name = self.get('drmade').encoder.freeze(freeze_encoder)
        if made_only:
            freeze_encoder_name = 'freezed'
            # turning off unnecessary evaluational functions
            hparams['track_extreme_reconstructions'] = hparams.get('track_extreme_reconstructions', 0)
            hparams['embedding_interval'] = hparams.get('embedding_interval', 0)
            hparams['submit_latent_interval'] = hparams.get('submit_latent_interval', 0)
            hparams['track_jacobian_interval'] = hparams.get('track_jacobian_interval', 0)

        print('unfreezing made')
        for parameter in self.get('drmade').made.parameters():
            parameter.requires_grad = True

        # optimizers and schedulers
        def lr_multiplicative_function(epoch):
            return 0.5 if lr_schedule and ((epoch + 1) % lr_schedule) == 0 else lr_decay

        self.set('lr_multiplicative_factor_lambda', lr_multiplicative_function)

        print(f'initializing learning rate scheduler - lr_decay:{lr_decay} schedule:{lr_schedule}')
        optimizer = hparams.get('optimizer', Adam)
        optimizer_hparams = hparams.get('optimizer_hparams', {'lr': model_config.base_lr, })
        print(f'initializing optimizer {optimizer.__name__} -',
              ",".join(f"{i}:{str(j)}" for i, j in optimizer_hparams.items()))

        made_optimizer = optimizer(self.get('drmade').made.parameters(), **optimizer_hparams)

        self.add_optimizer('made', made_optimizer)
        made_scheduler = lr_scheduler.MultiplicativeLR(
            made_optimizer, lr_lambda=lr_multiplicative_function, last_epoch=-1)
        self.add_scheduler('made', made_scheduler)

        if not made_only:
            encoder_optimizer = optimizer(self.get('drmade').encoder.parameters(), **optimizer_hparams)
            self.add_optimizer('encoder', encoder_optimizer)
            encoder_scheduler = lr_scheduler.MultiplicativeLR(
                encoder_optimizer, lr_lambda=lr_multiplicative_function, last_epoch=-1)
            self.add_scheduler('encoder', encoder_scheduler)

        # iterative trainingفهم
        iterative = hparams.get('iterative', False)
        assert not iterative or (iterative and not made_only), \
            'cannot perform iterative training with fixed encoder'

        self.set(constants.TRAINER_NAME, name or '{}-{}{}:{}|{}{}{}{}{}|schedule{}-decay{}'.format(
            self.get(constants.TRAINER_NAME), self.get('drmade').encoder.name,
            f'({freeze_encoder_name})' if freeze_encoder_name else '', self.get('drmade').made.name,
            '' if not pgd_eps else 'pgd-eps{}-iterations{}alpha{}{}|'.format(
                pgd_eps, pgd_iterations, pgd_alpha, 'randomized' if pgd_randomize else '', ),
            '' if not pgd_latent_eps else 'pgd-latent-eps{}-iterations{}alpha{}{}|'.format(
                pgd_latent_eps, pgd_latent_iterations, pgd_latent_alpha, 'randomized' if pgd_randomize else '', ),
            'iterative|' if iterative else '',
            optimizer.__name__, '-{}'.format('-'.join(f"{i}{j}" for i, j in optimizer_hparams.items())),
            lr_schedule, lr_decay, ), replace=True)
        print("Trainer: ", self.get(constants.TRAINER_NAME))

        self.add_loop(RobustMadeFeedLoop(
            name='train-made' if iterative else 'train',
            data_loader=self.get('train_loader'),
            device=self.get(constants.DEVICE),
            optimizers=('made',) if iterative or made_only else ('made', 'encoder'),
            pgd_input=pgd_input,
            pgd_latent=pgd_latent,
            log_interval=hparams.get('log_interval', config.log_data_feed_loop_interval)))
        if iterative:
            self.add_loop(RobustMadeFeedLoop(
                name='train-encoder',
                data_loader=self.get('train_loader'),
                device=self.get(constants.DEVICE),
                optimizers=('encoder',),
                pgd_input=pgd_input,
                pgd_latent=pgd_latent,
                log_interval=hparams.get('log_interval', config.log_data_feed_loop_interval)))
        self.add_loop(RobustMadeFeedLoop(
            name='validation',
            data_loader=self.get('validation_loader'),
            device=self.get(constants.DEVICE),
            optimizers=tuple(),
            pgd_input=pgd_input,
            pgd_latent=pgd_latent,
            interval=hparams.get('validation_interval', model_config.validation_interval),
            log_interval=hparams.get('log_interval', config.log_data_feed_loop_interval)))
Exemplo n.º 16
0
def multiplicative(optimizer: Optimizer) -> _LRScheduler:
    return lr_scheduler.MultiplicativeLR(
        optimizer, lr_lambda=lambda epoch: 0.5)  # type: ignore
Exemplo n.º 17
0
def reproduce(n_epochs=457,
              batch_size=128,
              log_dir="/tmp/run",
              device="cuda",
              debug_loader=None):
    """Training script with defaults to reproduce results.

    The code inside this function is self contained and can be used as a top level
    training script, e.g. by copy/pasting it into a Jupyter notebook.

    Args:
        n_epochs: Number of epochs to train for.
        batch_size: Batch size to use for training and evaluation.
        log_dir: Directory where to log trainer state and TensorBoard summaries.
        device: Device to train on (either 'cuda' or 'cpu').
        debug_loader: Debug DataLoader which replaces the default training and
            evaluation loaders if not 'None'. Do not use unless you're writing unit
            tests.
    """
    from torch import optim
    from torch.nn import functional as F
    from torch.optim import lr_scheduler

    from pytorch_generative import datasets
    from pytorch_generative import models
    from pytorch_generative import trainer

    train_loader, test_loader = debug_loader, debug_loader
    if train_loader is None:
        train_loader, test_loader = datasets.get_cifar10_loaders(
            batch_size, normalize=True)

    model = models.VQVAE2(
        in_channels=3,
        out_channels=3,
        hidden_channels=128,
        residual_channels=64,
        n_residual_blocks=2,
        n_embeddings=512,
        embedding_dim=64,
    )
    optimizer = optim.Adam(model.parameters(), lr=2e-4)
    scheduler = lr_scheduler.MultiplicativeLR(optimizer,
                                              lr_lambda=lambda _: 0.999977)

    def loss_fn(x, _, preds):
        preds, vq_loss = preds
        recon_loss = F.mse_loss(preds, x)
        loss = recon_loss + 0.25 * vq_loss

        return {
            "vq_loss": vq_loss,
            "reconstruction_loss": recon_loss,
            "loss": loss,
        }

    model_trainer = trainer.Trainer(
        model=model,
        loss_fn=loss_fn,
        optimizer=optimizer,
        train_loader=train_loader,
        eval_loader=test_loader,
        lr_scheduler=scheduler,
        log_dir=log_dir,
        device=device,
    )
    model_trainer.interleaved_train_and_eval(n_epochs)
Exemplo n.º 18
0

    fmnist_train = FashionMNIST(args.save_dir, train=True, transform=train_tfm, download=True)
    fmnist_test = FashionMNIST(args.save_dir, train=False, transform=test_tfm, download=True)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    clf = ResNet18(nc=1)
    clf.to(device)

    optimizer = optim.SGD(clf.parameters(), lr=args.lr, weight_decay=args.wd, momentum=args.mom)
    criterion = nn.CrossEntropyLoss()

    # Multiplies the LR with 0.1 at epoch 100 and 150 as mentioned in the paper
    lmd = lambda x: 0.1 if x in [100,150] else 1
    scheduler = lr_scheduler.MultiplicativeLR(optimizer, lr_lambda=lmd)

    trainloader = DataLoader(fmnist_train, batch_size=args.batch_size, shuffle=True)
    testloader = DataLoader(fmnist_test, batch_size=args.batch_size, shuffle=False)

    best_loss = np.inf

    for epoch in range(args.epochs):

        t_loss, t_acc = train(epoch, trainloader, clf, criterion, optimizer, scheduler=None, msda=args.msda)
        
        print('Epoch {}/{} (train) || Loss: {:.4f} Acc: {:.4f} LR: {(optimizer.param_groups[0]['lr']):.5f}'.format(epoch+1, EPOCHS, t_loss, t_acc, lr))

        test_loss, test_acc = test(epoch, testloader, clf, criterion)
        
        print('Epoch {}/{} (test) || Loss: {:.4f} Acc: {:.4f}'.format(epoch+1, EPOCHS, test_loss, test_acc))
Exemplo n.º 19
0
    def train_discriminator(self, config, real_train_set, gen_train_set,
                            real_val_set, gen_val_set):

        self.disc.train()

        # Training objects
        optimizer = networks.optimizers[config["disc_optimizer"]](
            self.disc.parameters(), lr=config["disc_lr"])
        mult_func = (lambda x: config["disc_lr_decay"])
        scheduler = schedulers.MultiplicativeLR(optimizer, lr_lambda=mult_func)

        real_tab_dataset = TabularDataset(real_train_set)
        gen_tab_dataset = TabularDataset(gen_train_set)
        real_loader = torch_data.DataLoader(real_tab_dataset,
                                            batch_size=config["batch_size"],
                                            shuffle=True,
                                            num_workers=constants.LOAD_WORKERS)
        gen_loader = torch_data.DataLoader(gen_tab_dataset,
                                           batch_size=config["batch_size"],
                                           shuffle=True,
                                           num_workers=constants.LOAD_WORKERS)

        # Highest divergence (on val.-set) so far
        best_divergence = None
        div_save_path = os.path.join(wandb.run.dir, "best_{}_params.pt".format(
            self.divergence))  # Path to save best params to

        for epoch_i in range(config["epochs"]):
            epoch_loss = []

            # Train one epoch
            for (x_real,
                 real_batch), (x_gen,
                               gen_batch) in zip(real_loader, gen_loader):
                optimizer.zero_grad()

                # Concat. and move all to correct device
                real_input = torch.cat((x_real, real_batch),
                                       dim=1).to(self.device)
                gen_input = torch.cat((x_gen, gen_batch),
                                      dim=1).to(self.device)

                real_logits = self.disc(real_input)
                gen_logits = self.disc(gen_input)

                loss = self.disc_loss(real_logits, gen_logits)
                loss.backward()

                if config["clip_grad"]:
                    nn.utils.clip_grad_norm_(self.disc.parameters(),
                                             config["clip_grad"])

                optimizer.step()
                epoch_loss.append(loss.item())

            wandb.log({
                "{}_epoch_i".format(self.divergence):
                epoch_i,
                "{}_train_divergence".format(self.divergence):
                (-1.) * np.mean(epoch_loss),
            })

            # Evaluate
            if (epoch_i + 1) % config["val_interval"] == 0:
                val_divergence = self.compute_divergence(
                    real_val_set, gen_val_set, config["eval_batch_size"])
                wandb.log({
                    "{}_val_divergence".format(self.divergence):
                    val_divergence,
                })

                if (best_divergence
                        == None) or (val_divergence > best_divergence):
                    # Best divergence so far, save parameters
                    model_params = {
                        "disc": self.disc.state_dict(),
                    }
                    torch.save(model_params, div_save_path)

                    best_divergence = val_divergence

            scheduler.step()

        # Restore best parameters (early stopping)
        self.load_params_from_file(div_save_path)
Exemplo n.º 20
0
def train():
    global model
    validation_losses = []
    train_losses = []
    print('starting training')
    # starting up data loaders
    print("loading training data")
    dataset_train = DatasetSelection(train=True, classes=config.normal_classes)
    print('loading validation data')
    dataset_validation = DatasetSelection(train=False, classes=config.normal_classes)
    print('loading test data')
    dataset_test = DatasetSelection(train=False, classes=config.test_classes)

    train_sampler = None
    validation_sampler = None
    test_sampler = None
    if config.use_tpu:
        print('creating tpu sampler')
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            dataset_train,
            num_replicas=xm.xrt_world_size(),
            rank=xm.get_ordinal(),
            shuffle=True
        )
        validation_sampler = torch.utils.data.distributed.DistributedSampler(
            dataset_validation,
            num_replicas=xm.xrt_world_size(),
            rank=xm.get_ordinal(),
            shuffle=True
        )
        test_sampler = torch.utils.data.distributed.DistributedSampler(
            dataset_test,
            num_replicas=xm.xrt_world_size(),
            rank=xm.get_ordinal(),
            shuffle=False
        )
        print('tpu samplers created')
    train_loader = dataset_train.get_dataloader(sampler=train_sampler, shuffle=not config.use_tpu)
    validation_loader = dataset_validation.get_dataloader(sampler=validation_sampler, shuffle=not config.use_tpu, )
    test_loader = dataset_test.get_dataloader(sampler=test_sampler, shuffle=False, )

    input_shape = dataset_validation.input_shape()
    loss_function = get_loss_function(input_shape)

    # setting up tensorboard data summerizer
    writer = SummaryWriter(log_dir=os.path.join(config.log_dir, config.model_name))

    # initializing model
    model = init_model(input_shape)

    print("initializing optimizer & scheduler")
    optimizer = Adam(model.parameters(), lr=config.lr)
    scheduler = lr_scheduler.MultiplicativeLR(optimizer, lr_lambda=config.lr_multiplicative_factor_lambda,
                                      last_epoch=config.start_epoch - 1)

    def train_loop(data_loader, writes=0):
        if torch.cuda.is_available():
            torch.cuda.synchronize(device=config.device)
        train_loss = 0.
        last_train_loss = 0.
        new_writes = 0
        time_ = time.time()
        if config.use_tpu:
            tracker = xm.RateTracker()
        model.train()
        for batch_idx, (input, _) in enumerate(data_loader):
            input = input.to(config.device, non_blocking=True)
            if config.noising_factor is not None:
                false_input = input + config.noising_factor * config.noise_function(input.shape)
                false_input.clamp_(min=-1, max=1)
                output = model(false_input)
            else:
                output = model(input)
            loss = loss_function(input, output)
            optimizer.zero_grad()
            loss.backward()
            if config.use_tpu:
                xm.optimizer_step(optimizer)
                tracker.add(config.batch_size)
            else:
                optimizer.step()
            train_loss += loss
            if config.print_every and (batch_idx + 1) % config.print_every == 0 :
                deno = config.print_every * config.batch_size * np.prod(input_shape) * np.log(2.)
                if not config.use_tpu:
                    writer.add_scalar('train/bpd', (train_loss / deno), writes + new_writes)

                print('\t{:3d}/{:3d} - loss : {:.4f}, time : {:.3f}s'.format(
                    batch_idx // config.print_every + 1,
                    len(train_loader) // config.print_every,
                    (train_loss / deno),
                    (time.time() - time_)
                ))
                last_train_loss = train_loss
                train_loss = 0.
                new_writes += 1
                time_ = time.time()
            del input, _, loss, output

        return new_writes, (last_train_loss / deno)

    def validation_loop(data_loader, writes=0):
        if torch.cuda.is_available():
            torch.cuda.synchronize(device=config.device)
        model.eval()
        test_loss = 0.
        with torch.no_grad():
            for batch_idx, (input, _) in enumerate(data_loader):
                input = input.to(config.device, non_blocking=True)
                output = model(input)
                loss = loss_function(input, output)
                test_loss += loss
                del loss, output

            deno = batch_idx * config.batch_size * np.prod(input_shape) * np.log(2.)
            writer.add_scalar('validation/bpd', (test_loss / deno), writes)
            print('\t{}epoch {:4} validation loss : {:.4f}'.format(
                '' if not config.use_tpu else xm.get_ordinal(),
                epoch,
                (test_loss / deno)
            ),
                flush=True
            )

            if config.save_interval and (epoch + 1) % config.save_interval == 0:
                torch.save(model.state_dict(), config.models_dir + '/{}_{}.pth'.format(config.model_name, epoch))
                print('\tsampling epoch {:4}'.format(
                    epoch
                ))
                sample_t = sample(model, input_shape)
                sample_t = rescaling_inv(sample_t)
                utils.save_image(sample_t, config.samples_dir + '/{}_{}.png'.format(config.model_name, epoch),
                                 nrow=5, padding=0)
            return test_loss / deno

    try:
        writes = 0
        for epoch in range(config.start_epoch, config.max_epochs):
            print('epoch {:4} - lr: {}'.format(epoch, optimizer.param_groups[0]["lr"]))
            if config.use_tpu:
                para_loader = pl.ParallelLoader(train_loader, [config.device])
                train_loop(para_loader.per_device_loader(config.device), writes)
                xm.master_print("\tFinished training epoch {}".format(epoch))
            else:
                new_writes, train_loss = train_loop(train_loader, writes)
                train_losses.append(train_loss)
                writes += new_writes

            # learning rate schedule
            scheduler.step(epoch)

            if config.use_tpu:
                para_loader = pl.ParallelLoader(validation_loader, [config.device])
                validation_loop(para_loader.per_device_loader(config.device), writes)
            else:
                validation_loss = validation_loop(validation_loader, writes)
                validation_losses.append(validation_loss)
                model_name = f'{"DCNNpp" if config.noising_factor is not None else "PCNNpp"}-E{epoch}'
                # evaluation and loss tracking
                if config.plot_every and (epoch + 1) % config.plot_every == 0:
                    plot_loss(
                        train_losses,
                        validation_losses,
                        model_name=f'{"DCNNpp" if config.noising_factor is not None else "PCNNpp"}-{optimizer.param_groups[0]["lr"]:.7f}'
                        , save_path=config.losses_dir + f'/Losses{model_name}.png',
                    )

                if config.evaluate_every and (epoch + 1) % config.evaluate_every == 0:
                    eval_data = evaluate(model, dataset_test, test_loader)
                    plot_evaluation(
                        eval_data,
                        model_name=f'{"DCNNpp" if config.noising_factor is not None else "PCNNpp"}-E{epoch}',
                        save_path=config.evaluation_dir + f'/EvalPlot{model_name}.png'
                    )
                    show_extreme_cases(
                        eval_data,
                        model_name=model_name,
                        save_dir=config.extreme_cases_dir
                    )

            writes += 1
    except KeyboardInterrupt:
        pass
    return model, train_losses, validation_losses
def reproduce(n_epochs=457,
              batch_size=128,
              log_dir="/tmp/run",
              device="cuda",
              n_channels=1,
              n_pixel_snail_blocks=1,
              n_residual_blocks=1,
              attention_value_channels=1,
              attention_key_channels=1,
              evalFlag=False,
              evaldir="/tmp/run",
              sampling_part=1):
    """Training script with defaults to reproduce results.

    The code inside this function is self contained and can be used as a top level
    training script, e.g. by copy/pasting it into a Jupyter notebook.

    Args:
        n_epochs: Number of epochs to train for.
        batch_size: Batch size to use for training and evaluation.
        log_dir: Directory where to log trainer state and TensorBoard summaries.
        device: Device to train on (either 'cuda' or 'cpu').
        debug_loader: Debug DataLoader which replaces the default training and
            evaluation loaders if not 'None'. Do not use unless you're writing unit
            tests.
    """
    from torch import optim
    from torch.nn import functional as F
    from torch.optim import lr_scheduler
    from torch.utils import data
    from torchvision import datasets
    from torchvision import transforms

    from pytorch_generative import trainer
    from pytorch_generative import models

    ####################################################################################################################
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~EB~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Load ImageGPT Data :

    import gmpm

    train = gmpm.train
    test = gmpm.test

    train_loader = data.DataLoader(
        data.TensorDataset(torch.Tensor(train), torch.rand(len(train))),
        batch_size=batch_size,
        shuffle=True,
        num_workers=8,
    )
    test_loader = data.DataLoader(
        data.TensorDataset(torch.Tensor(test), torch.rand(len(test))),
        batch_size=batch_size,
        num_workers=8,
    )
    attention_value_channels = attention_value_channels
    attention_key_channels = attention_key_channels

    model = models.PixelSNAIL(
        ####################################################################################################################
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~EB~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Change Input / Output size :

        # 3 channels - image after clusters mapping function as input to NN :
        in_channels=3,

        # 512 channels - each pixel get probability to get value from 0 to 511
        out_channels=512,

        ####################################################################################################################
        n_channels=n_channels,
        n_pixel_snail_blocks=n_pixel_snail_blocks,
        n_residual_blocks=n_residual_blocks,
        attention_value_channels=attention_value_channels,
        attention_key_channels=attention_key_channels,
    )
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    scheduler = lr_scheduler.MultiplicativeLR(optimizer,
                                              lr_lambda=lambda _: 0.999977)

    def loss_fn(x, _, preds):

        ####################################################################################################################
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~EB~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Update loss function to CrossEntropyLoss :

        x = x.long()
        criterion = nn.NLLLoss()
        B, C, D = preds.size()
        preds_2d = preds.view(B, C, D, -1)
        x_2d = x.view(B, D, -1)
        loss = criterion(preds_2d, x_2d.long())

        ####################################################################################################################

        return loss

    _model = model.to(device)
    trainer = trainer.Trainer(
        model=model,
        loss_fn=loss_fn,
        optimizer=optimizer,
        train_loader=train_loader,
        eval_loader=test_loader,
        lr_scheduler=scheduler,
        log_dir=log_dir,
        device=device,
        sample_epochs=5,
        sample_fn=None,
        n_channels=n_channels,
        n_pixel_snail_blocks=n_pixel_snail_blocks,
        n_residual_blocks=n_residual_blocks,
        attention_value_channels=attention_value_channels,
        attention_key_channels=attention_key_channels,
        evalFlag=evalFlag,
        evaldir=evaldir,
        sampling_part=sampling_part)

    trainer.interleaved_train_and_eval(n_epochs)
Exemplo n.º 22
0
    def train(self, train_set, config, val_func=None):
        tab_dataset = TabularDataset(train_set)
        n_samples = len(tab_dataset)
        train_loader = torch_data.DataLoader(tab_dataset,
                batch_size=config["batch_size"], shuffle=self.shuffle,
                num_workers=constants.LOAD_WORKERS)

        # Set network to train mode
        self.net.train()

        # Keep track of best so far
        best_ll = None # best validation score
        best_mae = None
        best_epoch_i = None
        best_save_path = os.path.join(wandb.run.dir,
                        constants.BEST_PARAMS_FILE ) # Path to save best params to

        # Optimizer
        opt = networks.optimizers[config["optimizer"]](
                self.net.parameters(), lr=config["lr"], weight_decay=config["l2_reg"])

        mult_func = (lambda x: config["lr_decay"])
        scheduler = schedulers.MultiplicativeLR(opt, lr_lambda=mult_func)

        for epoch_i in range(config["epochs"]):
            epoch_loss = []

            for batch_i, (x_batch, y_batch) in enumerate(train_loader):
                batch_size = x_batch.shape[0]

                # Send to correct device
                x_batch = x_batch.to(config["device"])
                y_batch = y_batch.to(config["device"])

                opt.zero_grad()

                # Train network
                net_inputs = self.process_net_input(x_batch, y_batch=y_batch)
                logits = self.net(net_inputs)
                loss = self.loss(logits, y_batch, x_batch=x_batch, batch_i=batch_i)

                loss.backward()
                opt.step()

                # Store loss
                epoch_loss.append(loss.item())

            # Log epoch stats
            wandb.log({
                "epoch": epoch_i,
                "loss": np.mean(epoch_loss)
            })

            if val_func and ((epoch_i+1) % config["val_interval"] == 0):
                evaluation_vals = val_func(self, epoch_i)

                if (best_epoch_i == None) or (evaluation_vals["ll"] > best_ll):
                    best_ll = evaluation_vals["ll"]
                    best_mae = evaluation_vals["mae"]
                    best_epoch_i = epoch_i

                    # Save model parameters for best epoch only
                    model_params = self.net.state_dict()
                    torch.save(model_params, best_save_path)

            scheduler.step()

        # Perform possible additional training
        self.post_training(train_set, config)

        wandb.run.summary["best_epoch"] = best_epoch_i # Save best epoch index to wandb
        wandb.run.summary["log_likelihood"] = best_ll
        wandb.run.summary["mae"] = best_mae

        # Restore best parameters to model (for future testing etc.)
        self.load_params_from_file(best_save_path)
Exemplo n.º 23
0
    def test_MultiplicativeLR(self, debug=True):
        """
    Usage:
        python template_lib/modelarts/scripts/copy_tool.py \
          -s s3://bucket-7001/ZhouPeng/pypi/torch1_7_0 -d /cache/pypi -t copytree
        for filename in /cache/pypi/*.whl; do
            pip install $filename
        done
        proj_root=moco-exp
        python template_lib/modelarts/scripts/copy_tool.py \
          -s s3://bucket-7001/ZhouPeng/codes/$proj_root -d /cache/$proj_root -t copytree -b /cache/$proj_root/code.zip
        cd /cache/$proj_root
        pip install -r requirements.txt

        export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
        export TIME_STR=1
        export PYTHONPATH=./exp:./stylegan2-pytorch:./
        python 	-c "from exp.tests.test_styleganv2 import Testing_stylegan2;\
          Testing_stylegan2().test_train_ffhq_128()"

    :return:
    """
        if 'CUDA_VISIBLE_DEVICES' not in os.environ:
            os.environ['CUDA_VISIBLE_DEVICES'] = '0'
        if 'TIME_STR' not in os.environ:
            os.environ['TIME_STR'] = '0' if utils.is_debugging() else '0'
        from template_lib.v2.config_cfgnode.argparser import \
          (get_command_and_outdir, setup_outdir_and_yaml, get_append_cmd_str, start_cmd_run)

        tl_opts = ' '.join(sys.argv[sys.argv.index('--tl_opts') +
                                    1:]) if '--tl_opts' in sys.argv else ''
        print(f'tl_opts:\n {tl_opts}')

        command, outdir = get_command_and_outdir(
            self, func_name=sys._getframe().f_code.co_name, file=__file__)
        argv_str = f"""
                --tl_config_file none
                --tl_command none
                --tl_outdir {outdir}
                """
        args = setup_outdir_and_yaml(argv_str, return_cfg=True)

        import torch.nn as nn
        from torch.optim import lr_scheduler
        from matplotlib import pyplot as plt

        def plot_lr(scheduler, title='', labels=['base'], nrof_epoch=100):
            lr_li = [[] for _ in range(len(labels))]
            epoch_li = list(range(nrof_epoch))
            for epoch in epoch_li:
                scheduler.step()  # 调用step()方法,计算和更新optimizer管理的参数基于当前epoch的学习率
                lr = scheduler.get_last_lr()  # 获取当前epoch的学习率
                for i in range(len(labels)):
                    lr_li[i].append(lr[i])
            for lr, label in zip(lr_li, labels):
                plt.plot(epoch_li, lr, label=label)
            plt.grid()
            plt.xlabel('epoch')
            plt.ylabel('lr')
            plt.title(title)
            plt.legend()
            plt.show()

        ## StepLR 可视化学习率

        base = nn.Linear(3, 32)
        fc = nn.Linear(32, 10)
        optimizer = SGD(
            [
                {
                    'params': base.parameters()
                },
                {
                    'params': fc.parameters(),
                    'lr': 0.05
                }  # 对 fc的参数设置不同的学习率
            ],
            lr=0.1,
            momentum=0.9)
        lambda_base = lambda epoch: 0.5 if epoch % 10 == 0 else 1
        lambda_fc = lambda epoch: 0.8 if epoch % 10 == 0 else 1
        scheduler = lr_scheduler.MultiplicativeLR(optimizer,
                                                  [lambda_base, lambda_fc])
        plot_lr(scheduler, title='MultiplicativeLR', labels=['base', 'fc'])
        pass
Exemplo n.º 24
0
N_EPOCHS = 427
IN_CHANNELS = 1
N_CHANNELS = 64
N_PIXEL_SNAIL_BLOCKS = 8
N_RESIDUAL_BLOCKS = 2
ATTENTION_VALUE_CHANNELS = N_CHANNELS // 2
ATTENTION_KEY_CHANNELS = ATTENTION_VALUE_CHANNELS // 8

torch.cuda.empty_cache()
model = PixelSNAIL(IN_CHANNELS, N_CHANNELS, N_PIXEL_SNAIL_BLOCKS,
                   N_RESIDUAL_BLOCKS, ATTENTION_KEY_CHANNELS,
                   ATTENTION_VALUE_CHANNELS).to(torch.device("cuda"))
optimizer = optim.Adam(model.parameters(), lr=1e-3)

scheduler = lr_scheduler.MultiplicativeLR(optimizer,
                                          lr_lambda=lambda _: .999977)
bce_loss_fn = nn.BCELoss(reduction='none')


def loss_fn(x, _, preds):
    batch_size = x.shape[0]
    x, preds = x.view((batch_size, -1)), preds.view((batch_size, -1))
    return bce_loss_fn(preds, x).sum(dim=1).mean()


trainer = pg.trainer.Trainer(model=model,
                             loss_fn=loss_fn,
                             optimizer=optimizer,
                             train_loader=train_loader,
                             eval_loader=test_loader,
                             lr_scheduler=scheduler,
Exemplo n.º 25
0
    def __init__(
        self,
        hparams=None,
        name=None,
        model=None,
        device=None,
    ):
        super(RobustAutoEncoderPreTrainer, self).__init__(
            hparams,
            name,
            model,
            device,
        )

        hparams = self.get(constants.HPARAMS_DICT)

        # pgd encoder decoder inputs
        input_limits = self.get('drmade').decoder.output_limits
        pgd_eps = hparams.get('pgd/eps', model_config.pretrain_ae_pgd_eps)
        pgd_iterations = hparams.get('pgd/iterations',
                                     model_config.pretrain_ae_pgd_iterations)
        pgd_alpha = hparams.get('pgd/alpha',
                                model_config.pretrain_ae_pgd_alpha)
        pgd_randomize = hparams.get('pgd/randomize',
                                    model_config.pretrain_ae_pgd_randomize)
        pgd_input = {
            'eps': pgd_eps,
            'iterations': pgd_iterations,
            'alpha': pgd_alpha,
            'randomize': pgd_randomize,
            'input_limits': input_limits
        }

        # pgd decoder inputs
        latent_input_limits = self.get('drmade').encoder.output_limits
        pgd_latent_eps = hparams.get('pgd_latent/eps',
                                     model_config.pretrain_ae_latent_pgd_eps)
        pgd_latent_iterations = hparams.get(
            'pgd_latent/iterations',
            model_config.pretrain_ae_latent_pgd_iterations)
        pgd_latent_alpha = hparams.get(
            'pgd_latent/alpha', model_config.pretrain_ae_latent_pgd_alpha)
        pgd_latent_randomize = hparams.get(
            'pgd_latent/randomize',
            model_config.pretrain_ae_latent_pgd_randomize)
        pgd_latent = {
            'eps': pgd_latent_eps,
            'iterations': pgd_latent_iterations,
            'alpha': pgd_latent_alpha,
            'randomize': pgd_latent_randomize,
            'input_limits': latent_input_limits
        }
        base_lr = hparams.get('base_lr', model_config.base_lr)
        lr_decay = hparams.get('lr_decay', model_config.lr_decay)
        lr_schedule = hparams.get('lr_schedule', model_config.lr_schedule)

        print('freezing made')
        for parameter in self.get('drmade').made.parameters():
            parameter.requires_grad = False

        print('unfreezing encoder & decoder')
        for parameter in self.get('drmade').encoder.parameters():
            parameter.requires_grad = True
        for parameter in self.get('drmade').decoder.parameters():
            parameter.requires_grad = True

        print(f'initializing optimizer Adam - base_lr:{base_lr}')
        optimizer = Adam([{
            'params': self.get('drmade').encoder.parameters()
        }, {
            'params': self.get('drmade').decoder.parameters()
        }],
                         lr=base_lr)

        self.add_optimizer('ae', optimizer)

        print(
            f'initializing learning rate scheduler - lr_decay:{lr_decay} schedule:{lr_schedule}'
        )
        self.set(
            'lr_multiplicative_factor_lambda', lambda epoch: 0.5
            if (epoch + 1) % lr_schedule == 0 else lr_decay)
        self.add_scheduler(
            'ae',
            lr_scheduler.MultiplicativeLR(
                optimizer,
                lr_lambda=self.get('lr_multiplicative_factor_lambda'),
                last_epoch=-1))

        self.set(
            constants.TRAINER_NAME,
            name
            or 'PreTrain-{}-{}:{}|{}{}Adam-lr{}-schedule{}-decay{}'.format(
                self.get(constants.TRAINER_NAME),
                self.get('drmade').encoder.name,
                self.get('drmade').decoder.name, ''
                if not pgd_eps else 'pgd-eps{}-iterations{}alpha{}{}|'.format(
                    pgd_eps,
                    pgd_iterations,
                    pgd_alpha,
                    'randomized' if pgd_randomize else '',
                ), '' if not pgd_latent_eps else
                'pgd-latent-eps{}-iterations{}alpha{}{}|'.format(
                    pgd_latent_eps,
                    pgd_latent_iterations,
                    pgd_latent_alpha,
                    'randomized' if pgd_latent_randomize else '',
                ), base_lr, lr_schedule, lr_decay),
            replace=True)
        print("Pre Trainer: ", self.get(constants.TRAINER_NAME))

        self.add_loop(
            RobustAEFeedLoop(name='train',
                             data_loader=self.context['train_loader'],
                             device=self.context[constants.DEVICE],
                             optimizers=('ae', ),
                             pgd_input=pgd_input,
                             pgd_latent=pgd_latent,
                             log_interval=hparams.get(
                                 'log_interval',
                                 config.log_data_feed_loop_interval)))

        self.add_loop(
            RobustAEFeedLoop(
                name='validation',
                data_loader=self.context['validation_loader'],
                device=self.context[constants.DEVICE],
                optimizers=tuple(),
                pgd_input=pgd_input,
                pgd_latent=pgd_latent,
                interval=hparams.get('validation_interval',
                                     model_config.validation_interval),
                log_interval=hparams.get('log_interval',
                                         config.log_data_feed_loop_interval)))

        self.setup_writer()
Exemplo n.º 26
0
    def train(self, train_set, config, val_func=None):
        tab_dataset = TabularDataset(train_set)
        n_samples = len(tab_dataset)
        train_loader = torch_data.DataLoader(
            tab_dataset,
            batch_size=config["batch_size"],
            shuffle=True,
            num_workers=constants.LOAD_WORKERS)

        # Set models to train mode
        self.gen.train()
        self.disc.train()

        # Keep track of best so far
        best_ll = None  # best validation log-likelihood
        best_mae = None  # MAE where LL is best (not necessarily best MAE)
        best_epoch_i = None
        best_save_path = os.path.join(
            wandb.run.dir,
            constants.BEST_PARAMS_FILE)  # Path to save best params to

        # Optimizers (see GAN-hacks)
        gen_opt = networks.optimizers[config["gen_optimizer"]](
            self.gen.parameters(), lr=config["gen_lr"])
        disc_opt = networks.optimizers[config["disc_optimizer"]](
            self.disc.parameters(), lr=config["disc_lr"])

        # returns multiplicative factor, not new learning rate
        gen_mult_func = (lambda x: config["gen_lr_decay"])
        disc_mult_func = (lambda x: config["disc_lr_decay"])
        gen_scheduler = schedulers.MultiplicativeLR(gen_opt,
                                                    lr_lambda=gen_mult_func)
        disc_scheduler = schedulers.MultiplicativeLR(disc_opt,
                                                     lr_lambda=disc_mult_func)

        for epoch_i in range(config["epochs"]):
            epoch_disc_loss = []
            epoch_gen_loss = []
            epoch_fooling = []

            for batch_i, (x_batch, data_batch) in enumerate(train_loader):
                batch_size = data_batch.shape[0]

                # Send to correct device
                x_batch = x_batch.to(config["device"])
                data_batch = data_batch.to(config["device"])

                disc_opt.zero_grad()

                # Sample noise
                noise_batch = self.noise_dist.sample([batch_size
                                                      ]).to(config["device"])

                # Sample from generator
                gen_input = torch.cat((x_batch, noise_batch), dim=1)
                gen_batch = self.gen(gen_input)

                # Train discriminator
                data_logits = self.disc(torch.cat((x_batch, data_batch),
                                                  dim=1))
                gen_logits = self.disc(torch.cat((x_batch, gen_batch), dim=1))
                disc_loss = self.disc_loss(data_logits, gen_logits)

                disc_loss.backward()
                if config["clip_grad"]:
                    nn.utils.clip_grad_norm_(self.disc.parameters(),
                                             config["clip_grad"])
                disc_opt.step()

                gen_opt.zero_grad()

                # Train generator ("new_" here just means part of G training steps)
                n_gen_samples = batch_size * config["gen_samples"]
                new_noise_batch = self.noise_dist.sample([n_gen_samples]).to(
                    config["device"])

                if config["gen_samples"] > 1:
                    # Repeat each x sample an amount of times
                    # to get multiple generator samples for it
                    x_batch_repeated = torch.repeat_interleave(
                        x_batch, config["gen_samples"], dim=0)
                else:
                    x_batch_repeated = x_batch

                new_gen_input = torch.cat((x_batch_repeated, new_noise_batch),
                                          dim=1)
                new_gen_batch = self.gen(new_gen_input)
                new_gen_logits = self.disc(
                    torch.cat((x_batch_repeated, new_gen_batch), dim=1))

                gen_loss = self.gen_loss(new_gen_logits)

                gen_loss.backward()
                if config["clip_grad"]:
                    nn.utils.clip_grad_norm_(self.gen.parameters(),
                                             config["clip_grad"])
                gen_opt.step()

                # Store loss
                batch_fooling = torch.mean(torch.sigmoid(new_gen_logits))
                epoch_fooling.append(batch_fooling.item())
                epoch_disc_loss.append(disc_loss.item())
                epoch_gen_loss.append(gen_loss.item())

            # Log stats for epoch
            wandb.log({
                "epoch": epoch_i,
                "discriminator_loss": np.mean(epoch_disc_loss),
                "generator_loss": np.mean(epoch_gen_loss),
                "fooling": np.mean(epoch_fooling),
            })
            if val_func and ((epoch_i + 1) % config["val_interval"] == 0):
                # Validate
                evaluation_vals = val_func(self, epoch_i)

                if (best_epoch_i == None) or (evaluation_vals["ll"] > best_ll):
                    best_ll = evaluation_vals["ll"]
                    best_mae = evaluation_vals["mae"]
                    best_epoch_i = epoch_i

                    # Save model parameters for best epoch only
                    model_params = {
                        "gen": self.gen.state_dict(),
                        "disc": self.disc.state_dict(),
                    }
                    torch.save(model_params, best_save_path)

            gen_scheduler.step()
            disc_scheduler.step()

        wandb.run.summary[
            "best_epoch"] = best_epoch_i  # Save best epoch index to wandb
        wandb.run.summary["log_likelihood"] = best_ll
        wandb.run.summary["mae"] = best_mae

        # Restore best parameters to model (for future testing etc.)
        self.load_params_from_file(best_save_path)