示例#1
0
def gen_pic(ckpt, epoch):
    device = torch.device('cuda')
    model = AutoEncoder().to(device)
    model.load_state_dict(torch.load(ckpt))
    model.eval()
    sample = torch.randn(64, 20).to(device)
    sample = model.decoder(sample).cpu()
    save_image(sample.view(64, 1, 28, 28),
               'results/sample_e{:02}.png'.format(epoch))
示例#2
0
def main(args):
    device = torch.device(
        'cuda' if torch.cuda.is_available() and not args.cpu else 'cpu')
    print('Using %s device.' % device)

    world_size = int(
        os.environ[args.env_size]) if args.env_size in os.environ else 1
    local_rank = int(
        os.environ[args.env_rank]) if args.env_rank in os.environ else 0

    if local_rank == 0:
        print(vars(args))

    if world_size > 1:
        print('rank: {}/{}'.format(local_rank + 1, world_size))
        torch.distributed.init_process_group(backend='gloo',
                                             init_method='file://%s' %
                                             args.tmpname,
                                             rank=local_rank,
                                             world_size=world_size)

    train_dataloader, test_dataloader = load_dataset(args, device, world_size)

    net = AutoEncoder(input_dim=1900, nlayers=args.nlayers,
                      latent=100).to(device)

    if world_size > 1:
        net = torch.nn.parallel.DistributedDataParallel(net)

    if args.modelfile:
        net.load_state_dict(torch.load(args.modelfile))

    # define our optimizer and loss function
    optimizer = torch.optim.Adam(net.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    loss_func = nn.MSELoss(reduction='mean')

    test_losses = []

    for epoch in range(args.epochs):
        epoch_start = timeit.default_timer()

        train(train_dataloader, net, optimizer, loss_func, epoch)
        test_loss = test(test_dataloader, net, loss_func)

        print(' %5.2f sec' % (timeit.default_timer() - epoch_start))

        test_losses.append(test_loss)

        if test_loss <= min(test_losses):
            torch.save(net.state_dict(), 'model/%5.3f.pth' % min(test_losses))
示例#3
0
 def __init__(self, args):
     self.device = torch.device(
         'cuda' if torch.cuda.is_available() else 'cpu')
     self.id = args.id
     self.dataset_path = args.dataset
     model = AutoEncoder(args.E, args.D)
     if args.load:
         model.load_state_dict(torch.load(args.load))
     self.model = model.to(self.device)
     self.optimizer = optim.Adam(self.model.parameters(), lr=args.lr)
     self.metric = nn.MSELoss()
     self.epoch_num = args.epoch
     self.batch_size = args.bs
     self.cluster_num = args.cluster_num
     self.save = args.save
     self.testcase = args.testcase
     self.csv = args.csv
     self.record_file = None
     self.best = {'epoch': 0, 'loss': 999}
     self.test_images = get_test_image(args.dataset)
示例#4
0
def main(args):
    # ensures that weight initializations are all the same
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    logging = utils.Logger(args.global_rank, args.save)
    writer = utils.Writer(args.global_rank, args.save)

    # Get data loaders.
    train_queue, valid_queue, num_classes, _ = datasets.get_loaders(args)
    args.num_total_iter = len(train_queue) * args.epochs
    warmup_iters = len(train_queue) * args.warmup_epochs
    swa_start = len(train_queue) * (args.epochs - 1)

    arch_instance = utils.get_arch_cells(args.arch_instance)

    model = AutoEncoder(args, writer, arch_instance)
    model = model.cuda()

    logging.info('args = %s', args)
    logging.info('param size = %fM ', utils.count_parameters_in_M(model))
    logging.info('groups per scale: %s, total_groups: %d',
                 model.groups_per_scale, sum(model.groups_per_scale))

    if args.fast_adamax:
        # Fast adamax has the same functionality as torch.optim.Adamax, except it is faster.
        cnn_optimizer = Adamax(model.parameters(),
                               args.learning_rate,
                               weight_decay=args.weight_decay,
                               eps=1e-3)
    else:
        cnn_optimizer = torch.optim.Adamax(model.parameters(),
                                           args.learning_rate,
                                           weight_decay=args.weight_decay,
                                           eps=1e-3)

    cnn_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        cnn_optimizer,
        float(args.epochs - args.warmup_epochs - 1),
        eta_min=args.learning_rate_min)
    grad_scalar = GradScaler(2**10)

    num_output = utils.num_output(args.dataset, args)
    bpd_coeff = 1. / np.log(2.) / num_output

    # if load
    checkpoint_file = os.path.join(args.save, 'checkpoint.pt')
    if args.cont_training:
        logging.info('loading the model.')
        checkpoint = torch.load(checkpoint_file, map_location='cpu')
        init_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        model = model.cuda()
        cnn_optimizer.load_state_dict(checkpoint['optimizer'])
        grad_scalar.load_state_dict(checkpoint['grad_scalar'])
        cnn_scheduler.load_state_dict(checkpoint['scheduler'])
        global_step = checkpoint['global_step']
    else:
        global_step, init_epoch = 0, 0

    for epoch in range(init_epoch, args.epochs):
        # update lrs.
        if args.distributed:
            train_queue.sampler.set_epoch(global_step + args.seed)
            valid_queue.sampler.set_epoch(0)

        if epoch > args.warmup_epochs:
            cnn_scheduler.step()

        # Logging.
        logging.info('epoch %d', epoch)

        # Training.
        train_nelbo, global_step = train(train_queue, model, cnn_optimizer,
                                         grad_scalar, global_step,
                                         warmup_iters, writer, logging)
        logging.info('train_nelbo %f', train_nelbo)
        writer.add_scalar('train/nelbo', train_nelbo, global_step)

        model.eval()
        # generate samples less frequently
        eval_freq = 1 if args.epochs <= 50 else 20
        if epoch % eval_freq == 0 or epoch == (args.epochs - 1):
            with torch.no_grad():
                num_samples = 16
                n = int(np.floor(np.sqrt(num_samples)))
                for t in [0.7, 0.8, 0.9, 1.0]:
                    logits = model.sample(num_samples, t)
                    output = model.decoder_output(logits)
                    output_img = output.mean if isinstance(
                        output, torch.distributions.bernoulli.Bernoulli
                    ) else output.sample(t)
                    output_tiled = utils.tile_image(output_img, n)
                    writer.add_image('generated_%0.1f' % t, output_tiled,
                                     global_step)

            valid_neg_log_p, valid_nelbo = test(valid_queue,
                                                model,
                                                num_samples=10,
                                                args=args,
                                                logging=logging)
            logging.info('valid_nelbo %f', valid_nelbo)
            logging.info('valid neg log p %f', valid_neg_log_p)
            logging.info('valid bpd elbo %f', valid_nelbo * bpd_coeff)
            logging.info('valid bpd log p %f', valid_neg_log_p * bpd_coeff)
            writer.add_scalar('val/neg_log_p', valid_neg_log_p, epoch)
            writer.add_scalar('val/nelbo', valid_nelbo, epoch)
            writer.add_scalar('val/bpd_log_p', valid_neg_log_p * bpd_coeff,
                              epoch)
            writer.add_scalar('val/bpd_elbo', valid_nelbo * bpd_coeff, epoch)

        save_freq = int(np.ceil(args.epochs / 100))
        if epoch % save_freq == 0 or epoch == (args.epochs - 1):
            if args.global_rank == 0:
                logging.info('saving the model.')
                torch.save(
                    {
                        'epoch': epoch + 1,
                        'state_dict': model.state_dict(),
                        'optimizer': cnn_optimizer.state_dict(),
                        'global_step': global_step,
                        'args': args,
                        'arch_instance': arch_instance,
                        'scheduler': cnn_scheduler.state_dict(),
                        'grad_scalar': grad_scalar.state_dict()
                    }, checkpoint_file)

    # Final validation
    valid_neg_log_p, valid_nelbo = test(valid_queue,
                                        model,
                                        num_samples=1000,
                                        args=args,
                                        logging=logging)
    logging.info('final valid nelbo %f', valid_nelbo)
    logging.info('final valid neg log p %f', valid_neg_log_p)
    writer.add_scalar('val/neg_log_p', valid_neg_log_p, epoch + 1)
    writer.add_scalar('val/nelbo', valid_nelbo, epoch + 1)
    writer.add_scalar('val/bpd_log_p', valid_neg_log_p * bpd_coeff, epoch + 1)
    writer.add_scalar('val/bpd_elbo', valid_nelbo * bpd_coeff, epoch + 1)
    writer.close()
示例#5
0
                    default='b',
                    help='Which way to decode the image, decoder a or b')
parser.add_argument('-out_name',
                    type=str,
                    default='video_swaped',
                    help='The name of the output movie')

args = parser.parse_args()

cap = cv2.VideoCapture(args.original_video)
fps = cap.get(cv2.CAP_PROP_FPS)
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))

model = AutoEncoder(image_channels=3).to(device)
model.load_state_dict(torch.load(args.model_location))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
video_tracked = cv2.VideoWriter('{}.mp4'.format(args.out_name), fourcc, fps,
                                (width, height))
i = 0

while cap.isOpened():
    ret, frame = cap.read()
    if ret:
        try:
            print('\rTracking frame: {}'.format(i + 1), end='')
            i += 1
            # Retrive face from frame, align it, resize it in cv2 to fit into model
            img1_face = extract_face(frame)
            img1_face = np.array(img1_face)
            img1_face = cv2.resize(img1_face, (128, 128))
示例#6
0
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device(
    'cpu')

cd_loss = ChamferDistance()

test_dataset = ShapeNet(partial_path=args.partial_root,
                        gt_path=args.gt_root,
                        split='test')
test_dataloader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=args.num_workers)

network = AutoEncoder()
network.load_state_dict(torch.load('log/lowest_loss.pth'))
network.to(DEVICE)

# testing: evaluate the mean cd loss
network.eval()
with torch.no_grad():
    total_loss, iter_count = 0, 0
    for i, data in enumerate(test_dataloader, 1):
        partial_input, coarse_gt, dense_gt = data

        partial_input = partial_input.to(DEVICE)
        coarse_gt = coarse_gt.to(DEVICE)
        dense_gt = dense_gt.to(DEVICE)
        partial_input = partial_input.permute(0, 2, 1)

        v, y_coarse, y_detail = network(partial_input)
示例#7
0
        x = self.fc2(x)
        return F.log_softmax(x)


    def train(self):
        super(Net, self).train()
        self.vae.train()

    def eval(self):
        super(Net, self).eval()
        self.vae.eval()


vae_model = AutoEncoder()

vae_model.load_state_dict(torch.load("./unsup_weights/best_train.pth"))

model = Net(vae_model, 10)


if torch.cuda.is_available():
    print("cuda available. \n")
    model = model.cuda()
else:
    print("cuda not available. \n")

optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
criterion = nn.CrossEntropyLoss()

train_loss = []
val_loss = []
示例#8
0
val_dataset = ShapeNet(partial_path=args.partial_root,
                       gt_path=args.gt_root,
                       split='val')
train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.num_workers)
val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.num_workers)

network = AutoEncoder()
if args.model is not None:
    print('Loaded trained model from {}.'.format(args.model))
    network.load_state_dict(torch.load(args.model))
else:
    print('Begin training new model.')
network.to(DEVICE)
optimizer = optim.Adam(network.parameters(),
                       lr=args.lr,
                       weight_decay=args.weight_decay)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.7)

max_iter = int(len(train_dataset) / args.batch_size + 0.5)
minimum_loss = 1e4
best_epoch = 0

for epoch in range(1, args.epochs + 1):
    # training
    network.train()
示例#9
0
def main(eval_args):
    # ensures that weight initializations are all the same
    logging = utils.Logger(eval_args.local_rank, eval_args.save)

    # load a checkpoint
    logging.info('loading the model at:')
    logging.info(eval_args.checkpoint)
    checkpoint = torch.load(eval_args.checkpoint, map_location='cpu')
    args = checkpoint['args']

    if not hasattr(args, 'ada_groups'):
        logging.info('old model, no ada groups was found.')
        args.ada_groups = False

    if not hasattr(args, 'min_groups_per_scale'):
        logging.info('old model, no min_groups_per_scale was found.')
        args.min_groups_per_scale = 1

    if not hasattr(args, 'num_mixture_dec'):
        logging.info('old model, no num_mixture_dec was found.')
        args.num_mixture_dec = 10

    logging.info('loaded the model at epoch %d', checkpoint['epoch'])
    arch_instance = utils.get_arch_cells(args.arch_instance)
    model = AutoEncoder(args, None, arch_instance)
    # Loading is not strict because of self.weight_normalized in Conv2D class in neural_operations. This variable
    # is only used for computing the spectral normalization and it is safe not to load it. Some of our earlier models
    # did not have this variable.
    model.load_state_dict(checkpoint['state_dict'], strict=False)
    model = model.cuda()

    logging.info('args = %s', args)
    logging.info('num conv layers: %d', len(model.all_conv_layers))
    logging.info('param size = %fM ', utils.count_parameters_in_M(model))

    if eval_args.eval_mode == 'evaluate':
        # load train valid queue
        args.data = eval_args.data
        train_queue, valid_queue, num_classes = datasets.get_loaders(args)

        if eval_args.eval_on_train:
            logging.info('Using the training data for eval.')
            valid_queue = train_queue

        # get number of bits
        num_output = utils.num_output(args.dataset)
        bpd_coeff = 1. / np.log(2.) / num_output

        valid_neg_log_p, valid_nelbo = test(
            valid_queue,
            model,
            num_samples=eval_args.num_iw_samples,
            args=args,
            logging=logging)
        logging.info('final valid nelbo %f', valid_nelbo)
        logging.info('final valid neg log p %f', valid_neg_log_p)
        logging.info('final valid nelbo in bpd %f', valid_nelbo * bpd_coeff)
        logging.info('final valid neg log p in bpd %f',
                     valid_neg_log_p * bpd_coeff)

    else:
        bn_eval_mode = not eval_args.readjust_bn
        num_samples = 16
        with torch.no_grad():
            n = int(np.floor(np.sqrt(num_samples)))
            set_bn(model,
                   bn_eval_mode,
                   num_samples=36,
                   t=eval_args.temp,
                   iter=500)
            for ind in range(10):  # sampling is repeated.
                torch.cuda.synchronize()
                start = time()
                with autocast():
                    logits = model.sample(num_samples, eval_args.temp)
                output = model.decoder_output(logits)
                output_img = output.mean if isinstance(output, torch.distributions.bernoulli.Bernoulli) \
                    else output.sample()
                torch.cuda.synchronize()
                end = time()

                # save images to 'results/eval-x/images/epochn' where x is exp id and n is epoch muber
                # print("tensor shape: {}".format(output_img.shape))
                # try saving the images one my one
                path_to_images = '/content/gdrive/MyDrive/pipeline_results/NVAE/results/eval-1/images'
                if not os.path.exists(path_to_images):
                    os.makedirs(path_to_images)
                for i in range(output_img.size(0)):
                    vutils.save_image(output_img[i, :, :, :],
                                      '%s/sample_batch%03d_img%03d.png' %
                                      (path_to_images, ind + 1, i + 1),
                                      normalize=True)
示例#10
0
    def test(self, config):
        """Testing routine"""
        # Initialize Dataset for testing.
        test_data = torchvision.datasets.ImageFolder(
            root=os.path.join(config.data_dir, "test"),
            transform=torchvision.transforms.ToTensor())

        # Create data loader for the test dataset with two number of workers and no
        # shuffling.
        te_data_loader = torch.utils.data.DataLoader(
            dataset=test_data,
            batch_size=config.batch_size,
            num_workers=config.numWorker,
            shuffle=False)

        # Create model
        model = AutoEncoder()

        # Move to GPU if you have one.
        if torch.cuda.is_available():
            model = model.cuda()

        # Create loss objects
        data_loss = nn.MSELoss()

        # Fix gpu -> cpu bug
        compute_device = 'cuda' if torch.cuda.is_available() else 'cpu'

        # Load our best model and set model for testing
        load_res = torch.load(os.path.join(config.save_dir, "best_model.pth"),
                              map_location=compute_device)

        model.load_state_dict(load_res["model"])

        model.eval()

        # Implement The Test loop
        prefix = "Testing: "
        te_loss = []
        te_acc = []
        for data in tqdm(te_data_loader, desc=prefix):
            # Split the data
            x, y = data

            # Send data to GPU if we have one
            if torch.cuda.is_available():
                x = x.cuda()
                y = y.cuda()

            # Don't invoke gradient computation
            with torch.no_grad():
                # Compute logits
                logits = model.forward(x)
                # Compute loss and store as numpy
                loss = data_loss(logits, x.float())
                te_loss += [loss.cpu().numpy()]
                # Compute accuracy and store as numpy
                pred = torch.argmax(logits, dim=1)
                acc = torch.mean(torch.eq(pred.vewi(x.size()),
                                          x).float()) * 100.0
                te_acc += [acc.cpu().numpy()]

        # Report Test loss and accuracy
        print("Test Loss = {}".format(np.mean(te_loss)))  # TODO proper logging
        print("Test Accuracy = {}%".format(
            np.mean(te_acc)))  # TODO proper logging
示例#11
0
def main():
    parser = argparse.ArgumentParser(description='AvatarNet by Pytorch')
    parser.add_argument('--batch_size',
                        '-b',
                        type=int,
                        default=4,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=2,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--patch_size',
                        '-p',
                        type=int,
                        default=5,
                        help='Size of extracted patches from style features')
    parser.add_argument('--alpha',
                        '-a',
                        type=float,
                        default=0.8,
                        help='alpha control the fusion degree')
    parser.add_argument('--lam1',
                        type=float,
                        default=0.01,
                        help='lambda1 for perceptual loss')
    parser.add_argument('--lam2',
                        type=float,
                        default=0.01,
                        help='lambda2 for tv loss')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID(nagative value indicate CPU)')
    parser.add_argument('--learning_rate',
                        '-lr',
                        type=float,
                        default=1e-4,
                        help='learning rate for Adam')
    parser.add_argument('--snapshot_interval',
                        type=int,
                        default=10,
                        help='Interval of snapshot to generate image')
    parser.add_argument('--train_content_dir',
                        type=str,
                        default='/data/chen/content',
                        help='content images directory for train')
    parser.add_argument('--train_style_dir',
                        type=str,
                        default='/data/chen/style',
                        help='style images directory for train')
    parser.add_argument('--test_content_dir',
                        type=str,
                        default='/data/chen/content',
                        help='content images directory for test')
    parser.add_argument('--test_style_dir',
                        type=str,
                        default='/data/chen/style',
                        help='style images directory for test')
    parser.add_argument('--save_dir',
                        type=str,
                        default='result',
                        help='save directory for result and loss')
    parser.add_argument('--reuse',
                        default=None,
                        help='model state path to load for reuse')

    args = parser.parse_args()

    # create directory to save
    if not os.path.exists(args.save_dir):
        os.mkdir(args.save_dir)

    loss_dir = f'{args.save_dir}/loss'
    model_state_dir = f'{args.save_dir}/model_state'
    image_dir = f'{args.save_dir}/image'

    if not os.path.exists(loss_dir):
        os.mkdir(loss_dir)
        os.mkdir(model_state_dir)
        os.mkdir(image_dir)

    # set device on GPU if available, else CPU
    if torch.cuda.is_available() and args.gpu >= 0:
        device = torch.device(f'cuda:{args.gpu}')
        print(f'# CUDA available: {torch.cuda.get_device_name(0)}')
    else:
        device = 'cpu'

    print(f'# Minibatch-size: {args.batch_size}')
    print(f'# epoch: {args.epoch}')
    print('')

    # prepare dataset and dataLoader
    train_dataset = PreprocessDataset(args.train_content_dir,
                                      args.train_style_dir)
    test_dataset = PreprocessDataset(args.test_content_dir,
                                     args.test_style_dir)
    iters = len(train_dataset)
    print(f'Length of train image pairs: {iters}')

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True)
    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             shuffle=False)
    test_iter = iter(test_loader)

    # set model and optimizer
    model = AutoEncoder().to(device)
    if args.reuse is not None:
        model.load_state_dict(torch.load(args.reuse))
    optimizer = Adam(model.parameters(), lr=args.learning_rate)

    # start training
    loss_list = []
    for e in range(1, args.epoch + 1):
        print(f'Start {e} epoch')
        for i, (content, style) in tqdm(enumerate(train_loader, 1)):
            content = content.to(device)
            style = style.to(device)
            loss = model(content, style, args.patch_size, args.alpha,
                         args.lam1, args.lam2)
            loss_list.append(loss.item())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print(
                f'[{e}/total {args.epoch} epoch],[{i} /'
                f'total {round(iters/args.batch_size)} iteration]: {loss.item()}'
            )

            if i % args.snapshot_interval == 0:
                content, style = next(test_iter)
                content = content.to(device)
                style = style.to(device)
                with torch.no_grad():
                    out = model.generate(content, style, args.patch_size,
                                         args.alpha)
                content = denorm(content, device)
                style = denorm(style, device)
                out = denorm(out, device)
                res = torch.cat([content, style, out], dim=0)
                res = res.to('cpu')
                save_image(res,
                           f'{image_dir}/{e}_epoch_{i}_iteration.png',
                           nrow=args.batch_size)
        torch.save(model.state_dict(), f'{model_state_dir}/{e}_epoch.pth')
    plt.plot(range(len(loss_list)), loss_list)
    plt.xlabel('iteration')
    plt.ylabel('loss')
    plt.title('train loss')
    plt.savefig(f'{loss_dir}/train_loss.png')
    with open(f'{loss_dir}/loss_log.txt', 'w') as f:
        for l in loss_list:
            f.write(f'{l}\n')
    print(f'Loss saved in {loss_dir}')
示例#12
0
class Trainer(object):
    def __init__(self, train_loader, test_loader, config):
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.config = config
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")

        self.num_epochs = config.num_epochs
        self.lr = config.lr

        self.in_channel = config.in_channel
        self.image_size = config.image_size
        self.hidden_dim = config.hidden_dim
        self.output_dim = config.output_dim

        self.log_interval = config.log_interval
        self.sample_interval = config.sample_interval
        self.ckpt_interval = config.ckpt_interval

        self.sample_folder = config.sample_folder
        self.ckpt_folder = config.ckpt_folder

        self.build_net()
        self.vis = Visualizer()

    def build_net(self):
        # define network
        self.net = AutoEncoder(self.in_channel, self.image_size,
                               self.hidden_dim, self.output_dim)

        if self.config.mode == 'test' and self.config.training_path == '':
            print("[*] Enter model path!")
            exit()

        # if training model exists
        if self.config.training_path != '':
            self.net.load_state_dict(
                torch.load(self.config.training_path,
                           map_location=lambda storage, loc: storage))
            print("[*] Load weight from {}!".format(self.config.training_path))

        self.net.to(self.device)

    # add noise to image
    def add_noise(self, imgs):
        noise = torch.randn(imgs.size()) * 0.4
        noisy_imgs = noise + imgs
        return noisy_imgs

    def train(self):
        # define loss function
        bce_criterion = nn.BCELoss().to(self.device)
        mse_criterion = nn.MSELoss().to(self.device)

        # define optimizer
        optimizer = Adam(self.net.parameters(), self.lr)

        step = 0
        print("[*] Learning started!")

        # get fixed sample
        temp_iter = iter(self.train_loader)
        fixed_imgs, _ = next(temp_iter)
        fixed_imgs = fixed_imgs.to(self.device)

        # save fixed sample image
        x_path = os.path.join(self.sample_folder, 'fixed_input.png')
        save_image(fixed_imgs, x_path, normalize=True)
        print("[*] Save fixed input image!")

        # make fixed noisy sample and save
        fixed_noisy_imgs = self.add_noise(fixed_imgs)
        noisy_x_path = os.path.join(self.sample_folder,
                                    'fixed_noisy_input.png')
        save_image(fixed_noisy_imgs, noisy_x_path, normalize=True)
        print("[*] Save fixed noisy input image!")

        # flatten data tensors
        fixed_imgs = fixed_imgs.view(fixed_imgs.size(0), -1)
        fixed_noisy_imgs = fixed_noisy_imgs.view(fixed_imgs.size(0), -1)

        for epoch in range(self.num_epochs):
            for i, (imgs, _) in enumerate(self.train_loader):
                self.net.train()

                imgs = imgs.view(imgs.size(0), -1)  # original images
                noisy_imgs = self.add_noise(imgs)  # add noise
                noisy_imgs = noisy_imgs.to(self.device)

                # forwarding
                outputs = self.net(noisy_imgs)  # use noisy image as input
                bce_loss = bce_criterion(outputs, imgs)
                mse_loss = mse_criterion(outputs, imgs)

                # backwarding
                optimizer.zero_grad()
                bce_loss.backward()  # backward BCE loss
                optimizer.step()

                # do logging
                if (step + 1) % self.log_interval == 0:
                    print("[{}/{}] [{}/{}] BCE loss: {:3f}, MSE loss:{:3f}".
                          format(epoch + 1, self.num_epochs, i + 1,
                                 len(self.train_loader),
                                 bce_loss.item() / len(imgs),
                                 mse_loss.item() / len(imgs)))
                    self.vis.plot("BCE Loss plot", bce_loss.item() / len(imgs))
                    self.vis.plot("MSE Loss plot", mse_loss.item() / len(imgs))

                # do sampling
                if (step + 1) % self.sample_interval == 0:
                    outputs = self.net(fixed_noisy_imgs)
                    x_hat = outputs.cpu().data.view(outputs.size(0), -1,
                                                    self.image_size,
                                                    self.image_size)
                    x_hat_path = os.path.join(
                        self.sample_folder,
                        'output_epoch{}.png'.format(epoch + 1))
                    save_image(x_hat, x_hat_path, normalize=True)

                    print("[*] Save sample images!")

                step += 1

            if (epoch + 1) % self.ckpt_interval == 0:
                ckpt_path = os.path.join(self.ckpt_folder,
                                         'ckpt_epoch{}.pth'.format(epoch + 1))
                torch.save(self.net.state_dict(), ckpt_path)
                print("[*] Checkpoint saved!")

        print("[*] Learning finished!")
        ckpt_path = os.path.join(self.ckpt_folder, 'final_model.pth')
        torch.save(self.net.state_dict(), ckpt_path)
        print("[*] Final weight saved!")
示例#13
0
        d_loss = dis_loss(real_dis, validity)
        optimizer_b.zero_grad()
        d_loss.backward(retain_graph=True)
        optimizer_b.step()
    return _loss


print('training for {} steps'.format(args.n_steps))

for epoch in range(args.n_steps):
    # for idx, (images, _) in enumerate(dataloader):
    a = next(itera)
    b = next(iterb)
    images_a = torch.tensor(a, device=device).float()
    images_a = images_a.to(device)

    images_b = torch.tensor(b, device=device).float()
    images_b = images_b.to(device)
    loss_a = train_step(images_a, version='a')
    loss_b = train_step(images_b, version='b')
    to_print = "Epoch[{}/{}] Loss A:{}, Loss B:{}".format(epoch+1, args.n_steps, loss_a.data, loss_b.data)
    if epoch % 1000 == 0:
        print(to_print)
        model_state_dict = model.state_dict()
        torch.save(model_state_dict, '{}/{}.pt'.format(args.saved_dir, args.model_name))
if save:
    model_state_dict = model.state_dict()
    torch.save(model_state_dict, '{}/model.pt'.format(args.saved_dir))
else:
    model.load_state_dict(torch.load('{}/model.pt'.format(args.saved_dir)))
示例#14
0
文件: evaluate.py 项目: AleZonta/NVAE
def main(eval_args):
    # ensures that weight initializations are all the same
    logging = utils.Logger(eval_args.local_rank, eval_args.save)

    # load a checkpoint
    logging.info('loading the model at:')
    logging.info(eval_args.checkpoint)
    checkpoint = torch.load(eval_args.checkpoint, map_location='cpu')
    args = checkpoint['args']

    logging.info('loaded the model at epoch %d', checkpoint['epoch'])
    arch_instance = utils.get_arch_cells(args.arch_instance)
    model = AutoEncoder(args, None, arch_instance)
    model.load_state_dict(checkpoint['state_dict'])
    model = model.cuda()

    logging.info('args = %s', args)
    logging.info('num conv layers: %d', len(model.all_conv_layers))
    logging.info('param size = %fM ', utils.count_parameters_in_M(model))

    if eval_args.eval_mode == 'evaluate':
        # load train valid queue
        args.data = eval_args.data
        train_queue, valid_queue, num_classes, test_queue = datasets.get_loaders(args)

        if eval_args.eval_on_train:
            logging.info('Using the training data for eval.')
            valid_queue = train_queue
        if eval_args.eval_on_test:
            logging.info('Using the test data for eval.')
            valid_queue = test_queue

        # get number of bits
        num_output = utils.num_output(args.dataset, args)
        bpd_coeff = 1. / np.log(2.) / num_output

        valid_neg_log_p, valid_nelbo = test(valid_queue, model, num_samples=eval_args.num_iw_samples, args=args, logging=logging)
        logging.info('final valid nelbo %f', valid_nelbo)
        logging.info('final valid neg log p %f', valid_neg_log_p)
        logging.info('final valid nelbo in bpd %f', valid_nelbo * bpd_coeff)
        logging.info('final valid neg log p in bpd %f', valid_neg_log_p * bpd_coeff)

    else:
        bn_eval_mode = not eval_args.readjust_bn
        num_samples = 16
        with torch.no_grad():
            n = int(np.floor(np.sqrt(num_samples)))
            set_bn(model, bn_eval_mode, num_samples=36, t=eval_args.temp, iter=500)
            for ind in range(eval_args.repetition):     # sampling is repeated.
                torch.cuda.synchronize()
                start = time()
                with autocast():
                    logits = model.sample(num_samples, eval_args.temp)
                output = model.decoder_output(logits)
                output_img = output.mean if isinstance(output, torch.distributions.bernoulli.Bernoulli) \
                    else output.sample()
                torch.cuda.synchronize()
                end = time()

                # save to file
                total_name = "{}/data_to_save_{}_{}.pickle".format(eval_args.save, eval_args.name_to_save, ind)
                with open(total_name, 'wb') as handle:
                    pickle.dump(output_img.deatach().numpy(), handle, protocol=pickle.HIGHEST_PROTOCOL)

                output_tiled = utils.tile_image(output_img, n).cpu().numpy().transpose(1, 2, 0)
                logging.info('sampling time per batch: %0.3f sec', (end - start))
                output_tiled = np.asarray(output_tiled * 255, dtype=np.uint8)
                output_tiled = np.squeeze(output_tiled)

                plt.imshow(output_tiled)
                plt.savefig("{}/generation_{}_{}".format(eval_args.save, eval_args.name_to_save, ind))
示例#15
0
from torch.utils.data import TensorDataset, DataLoader

from model import AutoEncoder
import torch.nn as nn
import torch
import numpy as np
from tqdm import tqdm

model = AutoEncoder().cuda()
checkpoint = torch.load("autoencoder.pth")
criterion = nn.L1Loss()
model.load_state_dict(checkpoint)
model.eval()

with open("../agent_code/rule_based_agent_auto_encode/states.npy", "rb") as f:
    data = np.load(f, allow_pickle=False)

data = data[:2_500_000]
data_t = torch.Tensor(data)
data_t = data_t.cuda()
dataset = TensorDataset(data_t)

train_len = int(0.7 * len(dataset))
valid_len = (len(dataset) - train_len) // 2
test_len = len(dataset) - train_len - valid_len

train_dataset, valid_dataset, test_dataset = torch.utils.data.random_split(
    dataset, [train_len, valid_len, test_len],
    generator=torch.Generator().manual_seed(42))

batch_size = 32
示例#16
0
    save_image(compare_x.data.cpu(), '{}/sample_image_b_to_a.png'.format(dir_name))

    compare_x = transfer(model, image_a, 'b')
    save_image(compare_x.data.cpu(), '{}/sample_image_a_to_b.png'.format(dir_name))


if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument('-saved_dir', type=str, default='saved_images', help='where to save the images')
    parser.add_argument('-model_name', type=str, default='model', help='model name')
    parser.add_argument('-images_a', type=str, default='a.npy', help='dataset of images a')
    parser.add_argument('-images_b', type=str, default='b.npy', help='dataset of images b')

    args = parser.parse_args()

    model = AutoEncoder(image_channels=3).to('cuda')
    model.load_state_dict(torch.load('saved_models/{}.pt'.format(args.model_name)))

    train_dataset_array_a = np.load(args.images_a)
    train_dataset_array_b = np.load(args.images_b)

    x_a = train_dataset_array_a[randint(1, 10)]
    x_b = train_dataset_array_b[randint(1, 10)]

    a_max = len(train_dataset_array_a)
    b_max = len(train_dataset_array_b)

    write_images(model, x_a, x_b, args.saved_dir)

示例#17
0
def main(eval_args):
    # ensures that weight initializations are all the same
    logging = utils.Logger(eval_args.local_rank, eval_args.save)

    # load a checkpoint
    logging.info('loading the model at:')
    logging.info(eval_args.checkpoint)
    checkpoint = torch.load(eval_args.checkpoint, map_location='cpu')
    args = checkpoint['args']

    if not hasattr(args, 'ada_groups'):
        logging.info('old model, no ada groups was found.')
        args.ada_groups = False

    if not hasattr(args, 'min_groups_per_scale'):
        logging.info('old model, no min_groups_per_scale was found.')
        args.min_groups_per_scale = 1

    if not hasattr(args, 'num_mixture_dec'):
        logging.info('old model, no num_mixture_dec was found.')
        args.num_mixture_dec = 10

    logging.info('loaded the model at epoch %d', checkpoint['epoch'])
    arch_instance = utils.get_arch_cells(args.arch_instance)
    model = AutoEncoder(args, None, arch_instance)
    # Loading is not strict because of self.weight_normalized in Conv2D class in neural_operations. This variable
    # is only used for computing the spectral normalization and it is safe not to load it. Some of our earlier models
    # did not have this variable.
    model.load_state_dict(checkpoint['state_dict'], strict=False)
    model = model.cuda()

    logging.info('args = %s', args)
    logging.info('num conv layers: %d', len(model.all_conv_layers))
    logging.info('param size = %fM ', utils.count_parameters_in_M(model))

    if eval_args.eval_mode == 'evaluate':
        # load train valid queue
        args.data = eval_args.data
        train_queue, valid_queue, num_classes = datasets.get_loaders(args)

        if eval_args.eval_on_train:
            logging.info('Using the training data for eval.')
            valid_queue = train_queue

        # get number of bits
        num_output = utils.num_output(args.dataset)
        bpd_coeff = 1. / np.log(2.) / num_output

        valid_neg_log_p, valid_nelbo = test(
            valid_queue,
            model,
            num_samples=eval_args.num_iw_samples,
            args=args,
            logging=logging)
        logging.info('final valid nelbo %f', valid_nelbo)
        logging.info('final valid neg log p %f', valid_neg_log_p)
        logging.info('final valid nelbo in bpd %f', valid_nelbo * bpd_coeff)
        logging.info('final valid neg log p in bpd %f',
                     valid_neg_log_p * bpd_coeff)

    else:
        bn_eval_mode = not eval_args.readjust_bn
        num_samples = 16
        with torch.no_grad():
            n = int(np.floor(np.sqrt(num_samples)))
            set_bn(model,
                   bn_eval_mode,
                   num_samples=36,
                   t=eval_args.temp,
                   iter=500)
            for ind in range(10):  # sampling is repeated.
                torch.cuda.synchronize()
                start = time()
                with autocast():
                    logits = model.sample(num_samples, eval_args.temp)
                output = model.decoder_output(logits)
                output_img = output.mean if isinstance(output, torch.distributions.bernoulli.Bernoulli) \
                    else output.sample()
                torch.cuda.synchronize()
                end = time()

                output_tiled = utils.tile_image(output_img,
                                                n).cpu().numpy().transpose(
                                                    1, 2, 0)
                logging.info('sampling time per batch: %0.3f sec',
                             (end - start))
                output_tiled = np.asarray(output_tiled * 255, dtype=np.uint8)
                output_tiled = np.squeeze(output_tiled)

                plt.imshow(output_tiled)
                plt.show()
示例#18
0
    def train(self, config):
        """Training routine"""
        # Initialize datasets for both training and validation
        train_data = torchvision.datasets.ImageFolder(
            root=os.path.join(config.data_dir, "train"),
            transform=torchvision.transforms.ToTensor())
        valid_data = torchvision.datasets.ImageFolder(
            root=os.path.join(config.data_dir, "valid"),
            transform=torchvision.transforms.ToTensor())

        # Create data loader for training and validation.
        tr_data_loader = torch.utils.data.DataLoader(
            dataset=train_data,
            batch_size=config.batch_size,
            num_workers=config.numWorker,
            shuffle=True)
        va_data_loader = torch.utils.data.DataLoader(
            dataset=valid_data,
            batch_size=config.batch_size,
            num_workers=config.numWorker,
            shuffle=False)

        # Create model instance.
        #model = Model()
        model = AutoEncoder()

        # Move model to gpu if cuda is available
        if torch.cuda.is_available():
            model = model.cuda()
        # Make sure that the model is set for training
        model.train()

        # Create loss objects
        data_loss = nn.MSELoss()

        # Create optimizier
        optimizer = optim.Adam(model.parameters(), lr=config.learn_rate)
        # No need to move the optimizer (as of PyTorch 1.0), it lies in the same
        # space as the model

        # Create summary writer
        tr_writer = SummaryWriter(
            log_dir=os.path.join(config.log_dir, "train"))
        va_writer = SummaryWriter(
            log_dir=os.path.join(config.log_dir, "valid"))

        # Create log directory and save directory if it does not exist
        if not os.path.exists(config.log_dir):
            os.makedirs(config.log_dir)
        if not os.path.exists(config.save_dir):
            os.makedirs(config.save_dir)

        # Initialize training
        iter_idx = -1  # make counter start at zero
        best_va_acc = 0  # to check if best validation accuracy
        # Prepare checkpoint file and model file to save and load from
        checkpoint_file = os.path.join(config.save_dir, "checkpoint.pth")
        bestmodel_file = os.path.join(config.save_dir, "best_model.pth")

        # Check for existing training results. If it existst, and the configuration
        # is set to resume `config.resume==True`, resume from previous training. If
        # not, delete existing checkpoint.
        if os.path.exists(checkpoint_file):
            if config.resume:
                # Use `torch.load` to load the checkpoint file and the load the
                # things that are required to continue training. For the model and
                # the optimizer, use `load_state_dict`. It's actually a good idea
                # to code the saving part first and then code this part.
                print("Checkpoint found! Resuming")  # TODO proper logging
                # Read checkpoint file.

                # Fix gpu -> cpu bug
                compute_device = 'cuda' if torch.cuda.is_available() else 'cpu'
                load_res = torch.load(checkpoint_file,
                                      map_location=compute_device)

                # Resume iterations
                iter_idx = load_res["iter_idx"]
                # Resume best va result
                best_va_acc = load_res["best_va_acc"]
                # Resume model
                model.load_state_dict(load_res["model"])

                # Resume optimizer
                optimizer.load_state_dict(load_res["optimizer"])
                # Note that we do not resume the epoch, since we will never be able
                # to properly recover the shuffling, unless we remember the random
                # seed, for example. For simplicity, we will simply ignore this,
                # and run `config.num_epoch` epochs regardless of resuming.
            else:
                os.remove(checkpoint_file)

        # Training loop
        for epoch in range(config.num_epoch):
            # For each iteration
            prefix = "Training Epoch {:3d}: ".format(epoch)

            for data in tqdm(tr_data_loader, desc=prefix):
                # Counter
                iter_idx += 1

                # Split the data
                # x is img, y is label
                x, y = data
                #print(x)
                # Send data to GPU if we have one
                if torch.cuda.is_available():
                    x = x.cuda()
                    y = y.cuda()

                # Apply the model to obtain scores (forward pass)
                logits = model.forward(x)
                # Compute the loss
                loss = data_loss(logits, x.float())
                # Compute gradients
                loss.backward()
                # Update parameters
                optimizer.step()
                # Zero the parameter gradients in the optimizer
                optimizer.zero_grad()

                # Monitor results every report interval
                if iter_idx % config.rep_intv == 0:
                    # Compute accuracy (No gradients required). We'll wrapp this
                    # part so that we prevent torch from computing gradients.
                    with torch.no_grad():
                        pred = torch.argmax(logits, dim=1)
                        acc = torch.mean(
                            torch.eq(pred.view(x.size()), x).float()) * 100.0
                    # Write loss and accuracy to tensorboard, using keywords `loss`
                    # and `accuracy`.
                    tr_writer.add_scalar("loss", loss, global_step=iter_idx)
                    tr_writer.add_scalar("accuracy", acc, global_step=iter_idx)

                    # Save
                    torch.save(
                        {
                            "iter_idx": iter_idx,
                            "best_va_acc": best_va_acc,
                            "model": model.state_dict(),
                            "optimizer": optimizer.state_dict(),
                            "loss": loss,
                            "epoch": epoch,
                            "acc": acc
                        }, checkpoint_file)

                # Validate results every validation interval
                if iter_idx % config.val_intv == 0:
                    # List to contain all losses and accuracies for all the
                    # training batches
                    va_loss = []
                    va_acc = []
                    # Set model for evaluation
                    model = model.eval()
                    for data in va_data_loader:

                        # Split the data
                        x, y = data

                        # Send data to GPU if we have one
                        if torch.cuda.is_available():
                            x = x.cuda()
                            y = y.cuda()

                        # Apply forward pass to compute the losses
                        # and accuracies for each of the validation batches
                        with torch.no_grad():
                            # Compute logits
                            logits = model.forward(x)
                            # Compute loss and store as numpy
                            loss = data_loss(logits, x.float())
                            va_loss += [loss.cpu().numpy()]
                            # Compute accuracy and store as numpy
                            pred = torch.argmax(logits, dim=1)
                            acc = torch.mean(
                                torch.eq(pred.view(x.size()),
                                         x).float()) * 100.0
                            va_acc += [acc.cpu().numpy()]
                    # Set model back for training
                    model = model.train()
                    # Take average
                    va_loss = np.mean(va_loss)
                    va_acc = np.mean(va_acc)

                    # Write to tensorboard using `va_writer`
                    va_writer.add_scalar("loss", va_loss, global_step=iter_idx)
                    va_writer.add_scalar("accuracy",
                                         va_acc,
                                         global_step=iter_idx)
                    # Check if best accuracy
                    if va_acc > best_va_acc:
                        best_va_acc = va_acc
                        # Save best model using torch.save. Similar to previous
                        # save but at location defined by `bestmodel_file`
                        torch.save(
                            {
                                "iter_idx": iter_idx,
                                "best_va_acc": best_va_acc,
                                "model": model.state_dict(),
                                "optimizer": optimizer.state_dict(),
                                "loss": loss,
                                "acc": acc
                            }, bestmodel_file)