Exemplo n.º 1
0
 def __init__(self, checkpoint_dir, model, optimizer, cfg):
     self.module_dict_params = {
         f"{cfg['method']}_model": model,
         f"optimizer": optimizer,
         f"{cfg['method']}_config": cfg['model'],
     }
     self.checkpoint_dir = checkpoint_dir
     utils.cond_mkdir(checkpoint_dir)
def train(validation=True):
    root_path = os.path.join(opt.logging_root, opt.experiment_name)
    utils.cond_mkdir(root_path)

    fn = dataio.polynomial_1
    integral_fn = dataio.polynomial_1_integral
    train_dataset = dataio.Implicit1DWrapper(range=(-1, 2),
                                             fn=fn,
                                             integral_fn=integral_fn,
                                             sampling_density=1000,
                                             train_every=250)

    train_dataloader = DataLoader(train_dataset,
                                  shuffle=True,
                                  batch_size=opt.batch_size,
                                  pin_memory=True,
                                  num_workers=0)

    if opt.activation != 'sine':
        num_pe_functions = 4  # cos + sin
    else:
        num_pe_functions = 0

    model = modules.CoordinateNet(nl=opt.activation,
                                  in_features=1,
                                  out_features=1,
                                  hidden_features=opt.hidden_features,
                                  num_hidden_layers=opt.hidden_layers,
                                  w0=opt.w0,
                                  use_grad=True,
                                  num_pe_fns=num_pe_functions,
                                  input_processing_fn=lambda x: x,
                                  grad_var='coords')
    model.cuda()

    # Define the loss
    loss_fn = loss_functions.function_mse
    summary_fn = partial(utils.write_simple_1D_function_summary, train_dataset)

    # Save command-line parameters log directory.
    p.write_config_file(opt, [os.path.join(root_path, 'config.ini')])
    with open(os.path.join(root_path, "params.txt"), "w") as out_file:
        out_file.write('\n'.join(
            ["%s: %s" % (key, value) for key, value in vars(opt).items()]))

    # Save text summary of model into log directory.
    with open(os.path.join(root_path, "model.txt"), "w") as out_file:
        out_file.write(str(model))

    training.train(model=model,
                   train_dataloader=train_dataloader,
                   epochs=opt.num_epochs,
                   lr=opt.lr,
                   steps_til_summary=opt.steps_til_summary,
                   epochs_til_checkpoint=opt.epochs_til_ckpt,
                   model_dir=root_path,
                   loss_fn=loss_fn,
                   summary_fn=summary_fn)
Exemplo n.º 3
0
def getTestMSE(dataloader, subdir):
    MSEs = []
    total_steps = 0
    utils.cond_mkdir(os.path.join(root_path, subdir))
    utils.cond_mkdir(os.path.join(root_path, 'ground_truth'))

    with tqdm(total=len(dataloader)) as pbar:
        for step, (model_input, gt) in enumerate(dataloader):
            model_input['idx'] = torch.Tensor([model_input['idx']]).long()
            model_input = {
                key: value.cuda()
                for key, value in model_input.items()
            }
            gt = {key: value.cuda() for key, value in gt.items()}

            with torch.no_grad():
                model_output = model(model_input)

            out_img = dataio.lin2img(model_output['model_out'],
                                     image_resolution).squeeze().permute(
                                         1, 2, 0).detach().cpu().numpy()
            out_img += 1
            out_img /= 2.
            out_img = np.clip(out_img, 0., 1.)
            gt_img = dataio.lin2img(gt['img'],
                                    image_resolution).squeeze().permute(
                                        1, 2, 0).detach().cpu().numpy()
            gt_img += 1
            gt_img /= 2.
            gt_img = np.clip(gt_img, 0., 1.)

            sparse_img = model_input['img_sparse'].squeeze().detach().cpu(
            ).permute(1, 2, 0).numpy()
            mask = np.sum((sparse_img == 0), axis=2) == 3
            sparse_img += 1
            sparse_img /= 2.
            sparse_img = np.clip(sparse_img, 0., 1.)
            sparse_img[mask, ...] = 1.

            imageio.imwrite(
                os.path.join(root_path, subdir,
                             str(total_steps) + '_sparse.png'),
                to_uint8(sparse_img))
            imageio.imwrite(
                os.path.join(root_path, subdir,
                             str(total_steps) + '.png'), to_uint8(out_img))
            imageio.imwrite(
                os.path.join(root_path, 'ground_truth',
                             str(total_steps) + '.png'), to_uint8(gt_img))

            MSE = np.mean((out_img - gt_img)**2)
            MSEs.append(MSE)

            pbar.update(1)
            total_steps += 1

    return MSEs
Exemplo n.º 4
0
def main(cfg, num_workers):
    # Shortened
    out_dir = cfg['training']['out_dir']
    batch_size = cfg['training']['batch_size']
    utils.save_config(os.path.join(out_dir, 'config.yml'), cfg)

    model_selection_metric = cfg['training']['model_selection_metric']
    model_selection_sign = 1 if cfg['training'][
        'model_selection_mode'] == 'maximize' else -1

    # Output directory
    utils.cond_mkdir(out_dir)

    # Dataset
    test_dataset = config.get_dataset('test', cfg)

    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=batch_size,
                                              num_workers=num_workers,
                                              shuffle=False)

    # Model
    model = config.get_model(cfg)
    trainer = config.get_trainer(model, None, cfg)

    # Print model
    print(model)
    logger = logging.getLogger(__name__)
    logger.info(
        f'Total number of parameters: {sum(p.numel() for p in model.parameters())}'
    )

    ckp = checkpoints.CheckpointIO(out_dir, model, None, cfg)
    try:
        load_dict = ckp.load('model_best.pt')
        logger.info('Model loaded')
    except FileExistsError:
        logger.info('Model NOT loaded')
        load_dict = dict()

    metric_val_best = load_dict.get('loss_val_best',
                                    -model_selection_sign * np.inf)

    logger.info(
        f'Current best validation metric ({model_selection_metric}): {metric_val_best:.6f}'
    )

    eval_dict = trainer.evaluate(test_loader)
    metric_val = eval_dict[model_selection_metric]
    logger.info(
        f'Validation metric ({model_selection_metric}): {metric_val:.8f}')

    eval_dict_path = os.path.join(out_dir, 'eval_dict.yml')
    with open(eval_dict_path, 'w') as f:
        yaml.dump(config, f)

    print(f'Results saved in {eval_dict_path}')
Exemplo n.º 5
0
def plot(stats_list):
    plt.figure()
    avg_reward = np.array([stats[0] for stats in stats_list])
    std_reward = np.array([stats[1] for stats in stats_list])
    num_wins = np.array([stats[2] for stats in stats_list])
    num_loss = np.array([stats[3] for stats in stats_list])

    episode = np.arange(1, len(avg_reward)+1)

    plt.plot(episode, avg_reward)
    reward_upper = avg_reward + std_reward
    reward_lower = avg_reward - std_reward
    plt.fill_between(episode, reward_lower, reward_upper, color='grey', alpha=.2,
                     label=r'$\pm$ 1 std. dev.')
    utils.cond_mkdir('./plots/')
    plt.savefig('./plots/plot')
featExNets = models.featExtractionNets()
upSamplingNets = models.upSamplingNets()
refineNets = models.refineNets()

if torch.cuda.is_available():
    featExNets = featExNets.cuda()
    upSamplingNets = upSamplingNets.cuda()
    refineNets = refineNets.cuda()

# Create Optimizer
opt_feature = torch.optim.Adam(featExNets.parameters(), lr=lr)
opt_upSampling = torch.optim.Adam(upSamplingNets.parameters(), lr=lr)
opt_refine = torch.optim.Adam(refineNets.parameters(), lr=lr)

# Create Logging dir
utils.cond_mkdir(opt.logging_root + '/kmeans')
utils.cond_mkdir(opt.logging_root + '/models')
# Save command-line parameters to log directory.
with open(opt.logging_root + '/params.txt', "w") as out_file:
    out_file.write('\n'.join(
        ["%s: %s" % (key, value) for key, value in vars(opt).items()]))

# Start Training
ori_psnr = 0

for epoch in range(max_epochs):
    utils.adjust_learning_rate(opt_feature, epoch, lr)
    utils.adjust_learning_rate(opt_upSampling, epoch, lr)
    utils.adjust_learning_rate(opt_refine, epoch, lr)

    avg_err, avg_psnr = 0, 0
Exemplo n.º 7
0
def main(cfg, num_workers):
    # Shortened
    out_dir = cfg['training']['out_dir']
    batch_size = cfg['training']['batch_size']
    backup_every = cfg['training']['backup_every']
    utils.save_config(os.path.join(out_dir, 'config.yml'), cfg)

    model_selection_metric = cfg['training']['model_selection_metric']
    model_selection_sign = 1 if cfg['training'][
        'model_selection_mode'] == 'maximize' else -1

    # Output directory
    utils.cond_mkdir(out_dir)

    # Dataset
    train_dataset = config.get_dataset('train', cfg)
    val_dataset = config.get_dataset('val', cfg)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               num_workers=num_workers,
                                               shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             num_workers=num_workers,
                                             shuffle=False)

    # Model
    model = config.get_model(cfg)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    trainer = config.get_trainer(model, optimizer, cfg)

    # Print model
    print(model)
    logger = logging.getLogger(__name__)
    logger.info(
        f'Total number of parameters: {sum(p.numel() for p in model.parameters())}'
    )

    # load pretrained model
    tb_logger = tensorboardX.SummaryWriter(os.path.join(out_dir, 'logs'))
    ckp = checkpoints.CheckpointIO(out_dir, model, optimizer, cfg)
    try:
        load_dict = ckp.load('model_best.pt')
        logger.info('Model loaded')
    except FileExistsError:
        logger.info('Model NOT loaded')
        load_dict = dict()

    epoch_it = load_dict.get('epoch_it', -1)
    it = load_dict.get('it', -1)
    metric_val_best = load_dict.get('loss_val_best',
                                    -model_selection_sign * np.inf)

    logger.info(
        f'Current best validation metric ({model_selection_metric}): {metric_val_best:.6f}'
    )

    # Shortened
    print_every = cfg['training']['print_every']
    validate_every = cfg['training']['validate_every']
    max_iterations = cfg['training']['max_iterations']
    max_epochs = cfg['training']['max_epochs']

    while True:
        epoch_it += 1

        for batch in train_loader:
            it += 1
            loss_dict = trainer.train_step(batch)
            loss = loss_dict['total_loss']
            for k, v in loss_dict.items():
                tb_logger.add_scalar(f'train/{k}', v, it)

            # Print output
            if print_every > 0 and (it % print_every) == 0:
                logger.info(
                    f'[Epoch {epoch_it:02d}] it={it:03d}, loss={loss:.8f}')

            # Backup if necessary
            if backup_every > 0 and (it % backup_every) == 0:
                logger.info('Backup checkpoint')
                ckp.save(f'model_{it:d}.pt',
                         epoch_it=epoch_it,
                         it=it,
                         loss_val_best=metric_val_best)

            # Run validation
            if validate_every > 0 and (it % validate_every) == 0:
                eval_dict = trainer.evaluate(val_loader)
                print('eval_dict=\n', eval_dict)
                metric_val = eval_dict[model_selection_metric]
                logger.info(
                    f'Validation metric ({model_selection_metric}): {metric_val:.8f}'
                )

                for k, v in eval_dict.items():
                    tb_logger.add_scalar(f'val/{k}', v, it)

                if model_selection_sign * (metric_val - metric_val_best) > 0:
                    metric_val_best = metric_val
                    logger.info(f'New best model (loss {metric_val_best:.8f}')
                    ckp.save('model_best.pt',
                             epoch_it=epoch_it,
                             it=it,
                             loss_val_best=metric_val_best)

            if (0 < max_iterations <= it) or (0 < max_epochs <= epoch_it):
                logger.info(
                    f'Maximum iteration/epochs ({epoch_it}/{it}) reached. Exiting.'
                )
                ckp.save(f'model_{it:d}.pt',
                         epoch_it=epoch_it,
                         it=it,
                         loss_val_best=metric_val_best)
                exit(3)
Exemplo n.º 8
0
def train_wchunks(models,
                  train_dataloader,
                  epochs,
                  lr,
                  steps_til_summary,
                  epochs_til_checkpoint,
                  model_dir,
                  loss_fn,
                  summary_fn,
                  chunk_lists_from_batch_fn,
                  val_dataloader=None,
                  double_precision=False,
                  clip_grad=False,
                  loss_schedules=None,
                  num_cuts=128,
                  weight_decay=0.0,
                  max_chunk_size=4096,
                  loss_start={},
                  resume_checkpoint={}):

    optims = {
        key: torch.optim.Adam(lr=lr, params=model.parameters())
        for key, model in models.items()
    }
    schedulers = {
        key: torch.optim.lr_scheduler.StepLR(optim, step_size=8000, gamma=0.2)
        for key, optim in optims.items()
    }

    # load optimizer if supplied
    for key in models.keys():
        if key in resume_checkpoint:
            optims[key].load_state_dict(resume_checkpoint[key])
            schedulers = {
                key: torch.optim.lr_scheduler.StepLR(optim,
                                                     step_size=8000,
                                                     gamma=0.2)
                for key, optim in optims.items()
            }

    if os.path.exists(os.path.join(model_dir, 'summaries')):
        val = input("The model directory %s exists. Overwrite? (y/n)" %
                    model_dir)
        if val == 'y':
            if os.path.exists(os.path.join(model_dir, 'summaries')):
                shutil.rmtree(os.path.join(model_dir, 'summaries'))
            if os.path.exists(os.path.join(model_dir, 'checkpoints')):
                shutil.rmtree(os.path.join(model_dir, 'checkpoints'))

    os.makedirs(model_dir, exist_ok=True)

    summaries_dir = os.path.join(model_dir, 'summaries')
    utils.cond_mkdir(summaries_dir)

    checkpoints_dir = os.path.join(model_dir, 'checkpoints')
    utils.cond_mkdir(checkpoints_dir)

    writer = SummaryWriter(summaries_dir)

    total_steps = 0
    if 'total_steps' in resume_checkpoint:
        total_steps = resume_checkpoint['total_steps']

    start_epoch = 0
    if 'epoch' in resume_checkpoint:
        start_epoch = resume_checkpoint['epoch']
        for scheduler in schedulers.values():
            for i in range(start_epoch):
                scheduler.step()

    with tqdm(total=len(train_dataloader) * epochs) as pbar:
        pbar.update(total_steps)
        train_losses = []
        for epoch in range(start_epoch, epochs):
            if not epoch % epochs_til_checkpoint and epoch:
                for key, model in models.items():
                    torch.save(
                        model.state_dict(),
                        os.path.join(
                            checkpoints_dir,
                            'model_' + key + '_epoch_%04d.pth' % epoch))
                    np.savetxt(
                        os.path.join(checkpoints_dir,
                                     'train_losses_epoch_%04d.txt' % epoch),
                        np.array(train_losses))
                for key, optim in optims.items():
                    torch.save(
                        {
                            'epoch': epoch,
                            'total_steps': total_steps,
                            'optimizer_state_dict': optim.state_dict()
                        },
                        os.path.join(
                            checkpoints_dir,
                            'optim_' + key + '_epoch_%04d.pth' % epoch))

            for step, (model_input, meta, gt,
                       misc) in enumerate(train_dataloader):
                start_time = time.time()

                for optim in optims.values():
                    optim.zero_grad()

                list_chunked_model_input, list_chunked_meta, list_chunked_gt = \
                    chunk_lists_from_batch_fn(model_input, meta, gt, max_chunk_size)

                num_chunks = len(list_chunked_gt)
                batch_avged_losses = {}
                batch_avged_tot_loss = 0.
                for chunk_idx, (chunked_model_input, chunked_meta, chunked_gt) \
                        in enumerate(zip(list_chunked_model_input, list_chunked_meta, list_chunked_gt)):
                    chunked_model_input = dict2cuda(chunked_model_input)
                    chunked_meta = dict2cuda(chunked_meta)
                    chunked_gt = dict2cuda(chunked_gt)

                    # forward pass through model
                    chunk_model_outputs = {
                        key: model(chunked_model_input)
                        for key, model in models.items()
                    }
                    losses = loss_fn(chunk_model_outputs,
                                     chunked_gt,
                                     dataloader=train_dataloader)

                    # loss from forward pass
                    train_loss = 0.
                    for loss_name, loss in losses.items():

                        # slowly apply loss if less than start iter
                        if loss_name in loss_start:
                            if total_steps < loss_start[loss_name]:
                                loss = (total_steps /
                                        loss_start[loss_name])**2 * loss

                        single_loss = loss.mean()
                        train_loss += single_loss / num_chunks

                        batch_avged_tot_loss += float(single_loss / num_chunks)
                        if loss_name in batch_avged_losses:
                            batch_avged_losses[
                                loss_name] += single_loss / num_chunks
                        else:
                            batch_avged_losses.update(
                                {loss_name: single_loss / num_chunks})

                    if weight_decay > 0:
                        for model in models.values():
                            train_loss += weight_decay * weight_decay_loss(
                                model)
                    train_loss.backward()

                for loss_name, loss in batch_avged_losses.items():
                    writer.add_scalar(loss_name, loss, total_steps)
                train_losses.append(batch_avged_tot_loss)
                writer.add_scalar("total_train_loss", batch_avged_tot_loss,
                                  total_steps)

                if clip_grad:
                    for model in models.values():
                        if isinstance(clip_grad, bool):
                            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                           max_norm=0.1)
                        else:
                            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                           max_norm=clip_grad)

                for optim in optims.values():
                    optim.step()

                if not total_steps % steps_til_summary:
                    for key, model in models.items():
                        torch.save(
                            model.state_dict(),
                            os.path.join(checkpoints_dir,
                                         'model_' + key + '_current.pth'))
                    for key, optim in optims.items():
                        torch.save(
                            {
                                'epoch': epoch,
                                'total_steps': total_steps,
                                'optimizer_state_dict': optim.state_dict()
                            },
                            os.path.join(checkpoints_dir,
                                         'optim_' + key + '_current.pth'))
                    summary_fn(models, train_dataloader, val_dataloader,
                               loss_fn, optims, meta, gt, misc, writer,
                               total_steps)

                pbar.update(1)

                if not total_steps % steps_til_summary:
                    tqdm.write(
                        "Epoch %d, Total loss %0.6f, iteration time %0.6f" %
                        (epoch, train_loss, time.time() - start_time))

                total_steps += 1

            for scheduler in schedulers.values():
                scheduler.step()

        for key, model in models.items():
            torch.save(
                model.state_dict(),
                os.path.join(checkpoints_dir, 'model_' + key + '_final.pth'))
        np.savetxt(os.path.join(checkpoints_dir, 'train_losses_final.txt'),
                   np.array(train_losses))
Exemplo n.º 9
0
def train(model, train_dataloader, epochs, lr, steps_til_summary, epochs_til_checkpoint, model_dir,
          loss_fn, pruning_fn, summary_fn, double_precision=False, clip_grad=False,
          loss_schedules=None, resume_checkpoint={}, objs_to_save={}, epochs_til_pruning=4):
    optim = torch.optim.Adam(lr=lr, params=model.parameters())

    # load optimizer if supplied
    if 'optimizer_state_dict' in resume_checkpoint:
        optim.load_state_dict(resume_checkpoint['optimizer_state_dict'])

    for g in optim.param_groups:
        g['lr'] = lr

    if os.path.exists(os.path.join(model_dir, 'summaries')):
        val = input("The model directory %s exists. Overwrite? (y/n)" % model_dir)
        if val == 'y':
            if os.path.exists(os.path.join(model_dir, 'summaries')):
                shutil.rmtree(os.path.join(model_dir, 'summaries'))
            if os.path.exists(os.path.join(model_dir, 'checkpoints')):
                shutil.rmtree(os.path.join(model_dir, 'checkpoints'))

    os.makedirs(model_dir, exist_ok=True)

    summaries_dir = os.path.join(model_dir, 'summaries')
    utils.cond_mkdir(summaries_dir)

    checkpoints_dir = os.path.join(model_dir, 'checkpoints')
    utils.cond_mkdir(checkpoints_dir)

    writer = SummaryWriter(summaries_dir)
    total_steps = 0
    if 'total_steps' in resume_checkpoint:
        total_steps = resume_checkpoint['total_steps']

    start_epoch = 0
    if 'epoch' in resume_checkpoint:
        start_epoch = resume_checkpoint['epoch']

    with tqdm(total=len(train_dataloader) * epochs) as pbar:
        pbar.update(total_steps)
        train_losses = []
        for epoch in range(start_epoch, epochs):
            if not epoch % epochs_til_checkpoint and epoch:
                torch.save(model.state_dict(),
                           os.path.join(checkpoints_dir, 'model_%06d.pth' % total_steps))
                np.savetxt(os.path.join(checkpoints_dir, 'train_losses_%06d.txt' % total_steps),
                           np.array(train_losses))
                save_dict = {'epoch': epoch,
                             'total_steps': total_steps,
                             'optimizer_state_dict': optim.state_dict()}
                save_dict.update(objs_to_save)
                torch.save(save_dict, os.path.join(checkpoints_dir, 'optim_%06d.pth' % total_steps))

            # prune
            if not epoch % epochs_til_pruning and epoch:
                pruning_fn(model, train_dataloader.dataset)

            if not (epoch + 1) % epochs_til_pruning:
                retile = False
            else:
                retile = True

            for step, (model_input, gt) in enumerate(train_dataloader):
                start_time = time.time()

                tmp = {}
                for key, value in model_input.items():
                    if isinstance(value, torch.Tensor):
                        tmp.update({key: value.cuda()})
                    else:
                        tmp.update({key: value})
                model_input = tmp

                tmp = {}
                for key, value in gt.items():
                    if isinstance(value, torch.Tensor):
                        tmp.update({key: value.cuda()})
                    else:
                        tmp.update({key: value})
                gt = tmp

                if double_precision:
                    model_input = {key: value.double() for key, value in model_input.items()}
                    gt = {key: value.double() for key, value in gt.items()}

                model_output = model(model_input)
                losses = loss_fn(model_output, gt, total_steps, retile=retile)

                train_loss = 0.
                for loss_name, loss in losses.items():
                    single_loss = loss.mean()

                    if loss_schedules is not None and loss_name in loss_schedules:
                        writer.add_scalar(loss_name + "_weight", loss_schedules[loss_name](total_steps), total_steps)
                        single_loss *= loss_schedules[loss_name](total_steps)

                    writer.add_scalar(loss_name, single_loss, total_steps)
                    train_loss += single_loss

                train_losses.append(train_loss.item())
                writer.add_scalar("total_train_loss", train_loss, total_steps)

                optim.zero_grad()
                train_loss.backward()

                if clip_grad:
                    if isinstance(clip_grad, bool):
                        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.)
                    else:
                        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=clip_grad)

                optim.step()

                pbar.update(1)

                if not total_steps % steps_til_summary:
                    tqdm.write("Epoch %d, Total loss %0.6f, iteration time %0.6f" % (epoch, train_loss, time.time() - start_time))
                    summary_fn(model, model_input, gt, model_output, writer, total_steps)

                total_steps += 1

            # after epoch
            tqdm.write("Epoch %d, Total loss %0.6f, iteration time %0.6f" % (epoch, train_loss, time.time() - start_time))

        # save model at end of epoch
        torch.save(model.state_dict(),
                   os.path.join(checkpoints_dir, 'model_final_%06d.pth' % total_steps))
        np.savetxt(os.path.join(checkpoints_dir, 'train_losses_final_%06d.txt' % total_steps),
                   np.array(train_losses))
        save_dict = {'epoch': epoch,
                     'total_steps': total_steps,
                     'optimizer_state_dict': optim.state_dict()}
        save_dict.update(objs_to_save)
        torch.save(save_dict, os.path.join(checkpoints_dir, 'optim_final_%06d.pth' % total_steps))
Exemplo n.º 10
0
def main():
    if opt.dataset == 'camera':
        img_dataset = dataio.Camera()
    elif opt.dataset == 'pluto':
        pluto_url = "https://upload.wikimedia.org/wikipedia/commons/e/ef/Pluto_in_True_Color_-_High-Res.jpg"
        img_dataset = dataio.ImageFile('../data/pluto.jpg', url=pluto_url, grayscale=opt.grayscale)
    elif opt.dataset == 'tokyo':
        img_dataset = dataio.ImageFile('../data/tokyo.tif', grayscale=opt.grayscale)
    elif opt.dataset == 'mars':
        img_dataset = dataio.ImageFile('../data/mars.tif', grayscale=opt.grayscale)

    if len(opt.patch_size) == 1:
        opt.patch_size = 3*opt.patch_size

    # set up dataset
    coord_dataset = dataio.Patch2DWrapperMultiscaleAdaptive(img_dataset,
                                                            sidelength=opt.res,
                                                            patch_size=opt.patch_size[1:], jitter=True,
                                                            num_workers=opt.num_workers, length=opt.steps_til_tiling,
                                                            scale_init=opt.scale_init, max_patches=opt.max_patches)

    opt.num_epochs = opt.num_iters // coord_dataset.__len__()

    image_resolution = (opt.res, opt.res)

    dataloader = DataLoader(coord_dataset, shuffle=False, batch_size=1, pin_memory=True,
                            num_workers=opt.num_workers)

    if opt.resume is not None:
        path, iter = opt.resume
        iter = int(iter)
        assert(os.path.isdir(path))
        assert opt.config is not None, 'Specify config file'

    # Define the model.
    if opt.grayscale:
        out_features = 1
    else:
        out_features = 3

    if opt.model_type == 'multiscale':
        model = modules.ImplicitAdaptivePatchNet(in_features=3, out_features=out_features,
                                                 num_hidden_layers=opt.hidden_layers,
                                                 hidden_features=opt.hidden_features,
                                                 feature_grid_size=(opt.patch_size[0], opt.patch_size[1], opt.patch_size[2]),
                                                 sidelength=opt.res,
                                                 num_encoding_functions=10,
                                                 patch_size=opt.patch_size[1:])

    elif opt.model_type == 'siren':
        model = modules.ImplicitNet(opt.res, in_features=2,
                                    out_features=out_features,
                                    num_hidden_layers=4,
                                    hidden_features=1536,
                                    mode='siren', w0=opt.w0)
    elif opt.model_type == 'pe':
        model = modules.ImplicitNet(opt.res, in_features=2,
                                    out_features=out_features,
                                    num_hidden_layers=4,
                                    hidden_features=1536,
                                    mode='pe')
    else:
        raise NotImplementedError('Only model types multiscale, siren, and pe are implemented')

    model.cuda()

    # print number of model parameters
    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    print(f'Num. Parameters: {params}')

    # Define the loss
    loss_fn = partial(loss_functions.image_mse,
                      tiling_every=opt.steps_til_tiling,
                      dataset=coord_dataset,
                      model_type=opt.model_type)
    summary_fn = partial(utils.write_image_patch_multiscale_summary, image_resolution, opt.patch_size[1:], coord_dataset, model_type=opt.model_type, skip=opt.skip_logging)

    # Define the pruning function
    pruning_fn = partial(pruning_functions.no_pruning,
                         pruning_every=1)

    # if we are resuming from a saved checkpoint
    if opt.resume is not None:
        print('Loading checkpoints')
        model_dict = torch.load(path + '/checkpoints/' + f'model_{iter:06d}.pth')
        model.load_state_dict(model_dict)

        # load optimizers
        try:
            resume_checkpoint = {}
            optim_dict = torch.load(path + '/checkpoints/' + f'optim_{iter:06d}.pth')
            for g in optim_dict['optimizer_state_dict']['param_groups']:
                g['lr'] = opt.lr
            resume_checkpoint['optimizer_state_dict'] = optim_dict['optimizer_state_dict']
            resume_checkpoint['total_steps'] = optim_dict['total_steps']
            resume_checkpoint['epoch'] = optim_dict['epoch']

            # initialize model state_dict
            print('Initializing models')
            coord_dataset.quadtree.__load__(optim_dict['quadtree'])
            coord_dataset.synchronize()

        except FileNotFoundError:
            print('Unable to load optimizer checkpoints')
    else:
        resume_checkpoint = {}

    if opt.eval:
        run_eval(model, coord_dataset)
    else:
        # Save command-line parameters log directory.
        root_path = os.path.join(opt.logging_root, opt.experiment_name)
        utils.cond_mkdir(root_path)
        p.write_config_file(opt, [os.path.join(root_path, 'config.ini')])

        # Save text summary of model into log directory.
        with open(os.path.join(root_path, "model.txt"), "w") as out_file:
            out_file.write(str(model))

        objs_to_save = {'quadtree': coord_dataset.quadtree}

        training.train(model=model, train_dataloader=dataloader, epochs=opt.num_epochs, lr=opt.lr,
                       steps_til_summary=opt.steps_til_summary, epochs_til_checkpoint=opt.epochs_til_ckpt,
                       model_dir=root_path, loss_fn=loss_fn, pruning_fn=pruning_fn, summary_fn=summary_fn, objs_to_save=objs_to_save,
                       resume_checkpoint=resume_checkpoint)
Exemplo n.º 11
0
def run_eval(model, coord_dataset):
    # get checkpoint directory
    checkpoint_dir = os.path.join(os.path.dirname(opt.config), 'checkpoints')

    # make eval directory
    eval_dir = os.path.join(os.path.dirname(opt.config), 'eval')
    utils.cond_mkdir(eval_dir)

    # get model & optim files
    model_files = sorted([f for f in os.listdir(checkpoint_dir) if re.search(r'model_[0-9]+.pth', f)], reverse=True)
    optim_files = sorted([f for f in os.listdir(checkpoint_dir) if re.search(r'optim_[0-9]+.pth', f)], reverse=True)

    # extract iterations
    iters = [int(re.search(r'[0-9]+', f)[0]) for f in model_files]

    # append beginning of path
    model_files = [os.path.join(checkpoint_dir, f) for f in model_files]
    optim_files = [os.path.join(checkpoint_dir, f) for f in optim_files]

    # iterate through model and optim files
    metrics = {}
    saved_gt = False
    for curr_iter, model_path, optim_path in zip(tqdm(iters), model_files, optim_files):

        # load model and optimizer files
        print('Loading models')
        model_dict = torch.load(model_path)
        optim_dict = torch.load(optim_path)

        # initialize model state_dict
        print('Initializing models')
        model.load_state_dict(model_dict)
        coord_dataset.quadtree.__load__(optim_dict['quadtree'])
        coord_dataset.synchronize()

        # save image and calculate psnr
        coord_dataset.toggle_eval()
        model_input, gt = coord_dataset[0]
        coord_dataset.toggle_eval()

        # convert to cuda and add batch dimension
        tmp = {}
        for key, value in model_input.items():
            if isinstance(value, torch.Tensor):
                tmp.update({key: value[None, ...].cpu()})
            else:
                tmp.update({key: value})
        model_input = tmp

        tmp = {}
        for key, value in gt.items():
            if isinstance(value, torch.Tensor):
                tmp.update({key: value[None, ...].cpu()})
            else:
                tmp.update({key: value})
        gt = tmp

        # run the model on uniform samples
        print('Running forward pass')
        n_channels = gt['img'].shape[-1]
        start = time()
        with torch.no_grad():
            pred_img = utils.process_batch_in_chunks(model_input, model, max_chunk_size=512)['model_out']['output']
        torch.cuda.synchronize()
        print(f'Model: {time() - start:.02f}')

        # get pixel idx for each coordinate
        start = time()
        coords = model_input['fine_abs_coords'].detach().cpu().numpy()
        pixel_idx = np.zeros_like(coords).astype(np.int32)
        pixel_idx[..., 0] = np.round((coords[..., 0] + 1.)/2. * (coord_dataset.sidelength[0]-1)).astype(np.int32)
        pixel_idx[..., 1] = np.round((coords[..., 1] + 1.)/2. * (coord_dataset.sidelength[1]-1)).astype(np.int32)
        pixel_idx = pixel_idx.reshape(-1, 2)

        # assign predicted image values into a new array
        # need to use numpy since it supports index assignment
        pred_img = pred_img.detach().cpu().numpy().reshape(-1, n_channels)
        display_pred = np.zeros((*coord_dataset.sidelength, n_channels))
        display_pred[[pixel_idx[:, 0]], [pixel_idx[:, 1]]] = pred_img
        display_pred = torch.tensor(display_pred)[None, ...]
        display_pred = display_pred.permute(0, 3, 1, 2)

        if not saved_gt:
            gt_img = gt['img'].detach().cpu().numpy().reshape(-1, n_channels)
            display_gt = np.zeros((*coord_dataset.sidelength, n_channels))
            display_gt[[pixel_idx[:, 0]], [pixel_idx[:, 1]]] = gt_img
            display_gt = torch.tensor(display_gt)[None, ...]
            display_gt = display_gt.permute(0, 3, 1, 2)
        print(f'Reshape: {time() - start:.02f}')

        # record metrics
        start = time()
        psnr, ssim = get_metrics(display_pred, display_gt)
        metrics.update({curr_iter: {'psnr': psnr, 'ssim': ssim}})
        print(f'Metrics: {time() - start:.02f}')
        print(f'Iter: {curr_iter}, PSNR: {psnr:.02f}')

        # save images
        pred_out = np.clip((display_pred.squeeze().numpy()/2.) + 0.5, a_min=0., a_max=1.).transpose(1, 2, 0)*255
        pred_out = pred_out.astype(np.uint8)
        pred_fname = os.path.join(eval_dir, f'pred_{curr_iter:06d}.png')
        print('Saving image')
        cv2.imwrite(pred_fname, cv2.cvtColor(pred_out, cv2.COLOR_RGB2BGR))

        if not saved_gt:
            print('Saving gt')
            gt_out = np.clip((display_gt.squeeze().numpy()/2.) + 0.5, a_min=0., a_max=1.).transpose(1, 2, 0)*255
            gt_out = gt_out.astype(np.uint8)
            gt_fname = os.path.join(eval_dir, 'gt.png')
            cv2.imwrite(gt_fname, cv2.cvtColor(gt_out, cv2.COLOR_RGB2BGR))
            saved_gt = True

        # save tiling
        tiling_fname = os.path.join(eval_dir, f'tiling_{curr_iter:06d}.pdf')
        coord_dataset.quadtree.draw()
        plt.savefig(tiling_fname)

        # save metrics
        metric_fname = os.path.join(eval_dir, f'metrics_{curr_iter:06d}.npy')
        np.save(metric_fname, metrics)
Exemplo n.º 12
0
def train(model,
          train_dataloader,
          epochs,
          lr,
          steps_til_summary,
          epochs_til_checkpoint,
          model_dir,
          loss_fn,
          summary_fn,
          val_dataloader=None,
          double_precision=False,
          clip_grad=False,
          use_lbfgs=False,
          loss_schedules=None):
    optim = torch.optim.Adam(lr=lr, params=model.parameters())

    if os.path.exists(model_dir):
        val = input("The model directory %s exists. Overwrite? (y/n)" %
                    model_dir)
        #val = 'y'
        if val == 'y':
            shutil.rmtree(model_dir)

    os.makedirs(model_dir)

    summaries_dir = os.path.join(model_dir, 'summaries')
    utils.cond_mkdir(summaries_dir)

    checkpoints_dir = os.path.join(model_dir, 'checkpoints')
    utils.cond_mkdir(checkpoints_dir)

    writer = SummaryWriter(summaries_dir)

    total_steps = 0
    with tqdm(total=len(train_dataloader) * epochs) as pbar:
        train_losses = []
        for epoch in range(epochs):
            if not epoch % epochs_til_checkpoint and epoch:
                torch.save(
                    model.state_dict(),
                    os.path.join(checkpoints_dir,
                                 'model_epoch_%04d.pth' % epoch))
                np.savetxt(
                    os.path.join(checkpoints_dir,
                                 'train_losses_epoch_%04d.txt' % epoch),
                    np.array(train_losses))

            for step, batch in enumerate(train_dataloader):
                start_time = time.time()

                model_input, gt = convert_metadata(batch['train'])
                test_model_input, test_gt = convert_metadata(batch['test'])

                _, train_targets = batch['train']
                _, test_targets = batch['test']

                num_tasks = test_targets.size(0)
                num_adaptation_steps = 10
                step_size = 0.001
                results = {
                    'num_tasks':
                    num_tasks,
                    'inner_losses':
                    np.zeros((num_adaptation_steps, num_tasks),
                             dtype=np.float32),
                    'outer_losses':
                    np.zeros((num_tasks, ), dtype=np.float32),
                    'mean_outer_loss':
                    0.
                }
                mean_outer_loss = torch.tensor(0.).cuda()

                for task_num in range(train_targets.shape[0]):
                    params, adaptation_results = adapt(
                        model,
                        loss_fn,
                        model_input,
                        gt,
                        num_adaptation_steps=num_adaptation_steps,
                        step_size=step_size,
                        writer=writer,
                        summary_fn=summary_fn)
                    results['inner_losses'][:, task_num] = adaptation_results[
                        'inner_losses']

                    # do the same processing with the test dataset
                    test_model_output = model(test_model_input,
                                              test=True,
                                              params=params)
                    outer_loss = loss_fn(test_model_output, test_gt)
                    img_loss = outer_loss['img_loss']
                    results['outer_losses'][task_num] = img_loss
                    mean_outer_loss += img_loss
                mean_outer_loss.div_(task_num)

                results['mean_outer_loss'] = mean_outer_loss.item()
                writer.add_scalar('mean_outer_loss',
                                  results['mean_outer_loss'], step)

                mean_outer_loss.backward()
                optim.step()

                ###################
                train_loss = 0.0
                for loss_name, loss in outer_loss.items():

                    single_loss = loss.mean()
                    writer.add_scalar(loss_name, single_loss, total_steps)
                    train_loss += single_loss

                train_losses.append(train_loss.item())

                if not total_steps % steps_til_summary:
                    torch.save(
                        model.state_dict(),
                        os.path.join(checkpoints_dir, 'model_current.pth'))
                    summary_fn(model,
                               test_model_input,
                               test_gt,
                               test_model_output,
                               writer,
                               total_steps,
                               inner=False)

                # if not use_lbfgs:
                #     optim.zero_grad()
                #     train_loss.backward()
                #
                #     optim.step()

                pbar.update(1)

                if not total_steps % steps_til_summary:
                    tqdm.write(
                        "Epoch %d, Total loss %0.6f, iteration time %0.6f" %
                        (epoch, train_loss, time.time() - start_time))

                total_steps += 1

        torch.save(model.state_dict(),
                   os.path.join(checkpoints_dir, 'model_final.pth'))
        np.savetxt(os.path.join(checkpoints_dir, 'train_losses_final.txt'),
                   np.array(train_losses))
Exemplo n.º 13
0
def getTestMSE(dataloader, subdir):
    MSEs = []
    PSNRs = []
    total_steps = 0
    utils.cond_mkdir(os.path.join(root_path, subdir))
    utils.cond_mkdir(os.path.join(root_path, 'ground_truth'))

    with tqdm(total=len(dataloader)) as pbar:
        for step, (model_input, gt) in enumerate(dataloader):
            model_input['idx'] = torch.Tensor([model_input['idx']]).long()
            model_input = {
                key: value.cuda()
                for key, value in model_input.items()
            }
            gt = {key: value.cuda() for key, value in gt.items()}

            with torch.no_grad():
                model_output = model(model_input)

            out_img = dataio.lin2img(model_output['model_out'],
                                     image_resolution).squeeze().permute(
                                         1, 2, 0).detach().cpu().numpy()
            out_img += 1
            out_img /= 2.
            out_img = np.clip(out_img, 0., 1.)
            gt_img = dataio.lin2img(gt['img'],
                                    image_resolution).squeeze().permute(
                                        1, 2, 0).detach().cpu().numpy()
            gt_img += 1
            gt_img /= 2.
            gt_img = np.clip(gt_img, 0., 1.)

            sparse_img = np.ones((image_resolution[0], image_resolution[1], 3))
            coords_sub = model_input['coords_sub'].squeeze().detach().cpu(
            ).numpy()
            rgb_sub = model_input['img_sub'].squeeze().detach().cpu().numpy()
            for index in range(0, coords_sub.shape[0]):
                r = int(round((coords_sub[index][0] + 1) / 2 * 31))
                c = int(round((coords_sub[index][1] + 1) / 2 * 31))
                sparse_img[r, c, :] = np.clip((rgb_sub[index, :] + 1) / 2, 0.,
                                              1.)

            imageio.imwrite(
                os.path.join(root_path, subdir,
                             str(total_steps) + '_sparse.png'),
                to_uint8(sparse_img))
            imageio.imwrite(
                os.path.join(root_path, subdir,
                             str(total_steps) + '.png'), to_uint8(out_img))
            imageio.imwrite(
                os.path.join(root_path, 'ground_truth',
                             str(total_steps) + '.png'), to_uint8(gt_img))

            MSE = np.mean((out_img - gt_img)**2)
            MSEs.append(MSE)

            PSNR = skimage.measure.compare_psnr(out_img, gt_img, data_range=1)
            PSNRs.append(PSNR)

            pbar.update(1)
            total_steps += 1

    return MSEs, PSNRs
Exemplo n.º 14
0
def train_with_signed_distance(model,
                               train_dataloader,
                               val_dataloader,
                               epochs,
                               lr,
                               steps_til_summary,
                               epochs_til_checkpoint,
                               model_dir,
                               supervision='dense'):
    
    assert (supervision in ['levelset', 'dense'])
    
    optim = torch.optim.Adam(lr=lr, params=model.parameters())

    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    summaries_dir = os.path.join(model_dir, 'summaries')
    utils.cond_mkdir(summaries_dir)

    checkpoints_dir = os.path.join(model_dir, 'checkpoints')
    utils.cond_mkdir(checkpoints_dir)

    writer = SummaryWriter(summaries_dir)

    total_steps = 0
    with tqdm(total=len(train_dataloader) * epochs) as pbar:
        train_losses = []
        for epoch in range(epochs):
            for step, (model_input, gt) in enumerate(train_dataloader):
                start_time = time.time()
            
                model_input = {key: value.cuda() for key, value in model_input.items()}
                gt = {key: value.cuda() for key, value in gt.items()}
                
                if supervision=='levelset': # Use level_set only
                    model_input_level_set = model_input.copy()
                    model_input_level_set['coords'] = model_input_level_set['level_set']
                    model_input_level_set = {key: value.cuda() for key, value in model_input_level_set.items()}
                    pred_sd, z = model.legacy_forward(**model_input_level_set)
                    loss = modules.sdf_loss(pred_sd, gt['ls_sds']) + torch.mean(z ** 2)
                elif supervision=='dense': # Use standard coords
                    pred_sd, z = model.legacy_forward(**model_input)
                    loss = modules.sdf_loss(pred_sd, gt['sds']) + torch.mean(z ** 2)

                train_losses.append(loss.item())
                writer.add_scalar("train_loss", loss, total_steps)

                optim.zero_grad()
                loss.backward()
                optim.step()
                pbar.update(1)

                if not total_steps % steps_til_summary:
                    corrected_loss = utils.evaluate_model(model, train_dataloader)
                    writer.add_scalar('corrected_loss', corrected_loss, total_steps)
                    pred_sd, z = model.legacy_forward(**model_input)
                    tqdm.write("Epoch %d, Total loss %0.6f, iteration time %0.6f" % (epoch, corrected_loss, time.time() - start_time))
                    utils.write_summaries(pred_sd, model_input, gt, writer, total_steps, 'train_')
                    
                    if val_dataloader is not None:
                        print("Running validation set...")
                        model.eval()
                        with torch.no_grad():
                            val_losses = []
                            for meta_batch in val_dataloader:
                                pred_sd = model(meta_batch)
                                val_loss = modules.sdf_loss(pred_sd, meta_batch['test'][1].cuda())
                                val_losses.append(val_loss)

                            writer.add_scalar("val_loss", np.mean(val_losses), total_steps)
                            utils.write_meta_summaries(pred_sd, meta_batch, writer, total_steps, 'val_')
                        model.train()

                total_steps += 1

            if not epoch % epochs_til_checkpoint and epoch:
                
                torch.save(model.state_dict(),
                           os.path.join(checkpoints_dir, 'model_final.pth'))
                np.savetxt(os.path.join(checkpoints_dir, 'train_losses_final.txt'),
                           np.array(train_losses))

        torch.save(model.state_dict(),
                   os.path.join(checkpoints_dir, 'model_final.pth'))
        np.savetxt(os.path.join(checkpoints_dir, 'train_losses_final.txt'),
                   np.array(train_losses))
Exemplo n.º 15
0
def train_with_signed_distance_meta(model,
                                    train_dataloader,
                                    val_dataloader,
                                    epochs,
                                    lr,
                                    steps_til_summary,
                                    epochs_til_checkpoint,
                                    model_dir):
    optim = torch.optim.Adam(lr=lr, params=model.parameters())

    if os.path.exists(model_dir):
        val = input("The model directory %s exists. Overwrite? (y/n)"%model_dir)
        if val == 'y':
            shutil.rmtree(model_dir)

    os.makedirs(model_dir)

    summaries_dir = os.path.join(model_dir, 'summaries')
    utils.cond_mkdir(summaries_dir)

    checkpoints_dir = os.path.join(model_dir, 'checkpoints')
    utils.cond_mkdir(checkpoints_dir)
    
    num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print (f'\n\nTraining model with {num_parameters} parameters\n\n')

    writer = SummaryWriter(summaries_dir)

    total_steps = 0
    with tqdm(total=len(train_dataloader) * epochs) as pbar:
        train_losses = []
        for epoch in range(epochs):
            for step, meta_batch in enumerate(train_dataloader):
                start_time = time.time()
                pred_sd, _ = model(meta_batch)
                loss = modules.sdf_loss(pred_sd, meta_batch['test'][1].cuda())

                train_losses.append(loss)
                writer.add_scalar("train_loss", loss, total_steps)

                optim.zero_grad()
                loss.backward()
                optim.step()
                pbar.update(1)

                tqdm.write(
                    "Epoch %d, Total loss %0.6f, iteration time %0.6f" %
                    (epoch, loss, time.time() - start_time))

                if not total_steps % steps_til_summary:
                    utils.write_meta_summaries(pred_sd, meta_batch, writer, total_steps, 'train_')

                    print("Running validation set...")
                    model.eval()
                    with torch.no_grad():
                        val_losses = []
                        for val_idx, meta_batch in enumerate(val_dataloader):
                            pred_sd, _ = model(meta_batch)
                            val_loss = modules.sdf_loss(pred_sd, meta_batch['test'][1].cuda())
                            val_losses.append(val_loss.cpu().numpy())

                            if not val_idx:
                                utils.write_meta_summaries(pred_sd, meta_batch, writer, total_steps, 'val_')

                        writer.add_scalar("val_loss", np.mean(val_losses), total_steps)
                        tqdm.write("Validation loss %0.6e" % loss)
                    model.train()

                total_steps += 1

            if not epoch % epochs_til_checkpoint:
                torch.save(model.state_dict(),
                           os.path.join(checkpoints_dir, 'epoch_%03d.pth'%epoch))
                np.savetxt(os.path.join(checkpoints_dir, 'train_losses_epoch_%03d.txt'%epoch),
                           np.array(train_losses))

        torch.save(model.state_dict(),
                   os.path.join(checkpoints_dir, 'model_final.pth'))
        np.savetxt(os.path.join(checkpoints_dir, 'train_losses_final.txt'),
                   np.array(train_losses))
Exemplo n.º 16
0
def train(model,
          train_dataloader,
          epochs,
          lr,
          steps_til_summary,
          epochs_til_checkpoint,
          model_dir,
          loss_fn,
          summary_fn,
          val_dataloader=None,
          double_precision=False,
          clip_grad=False,
          use_lbfgs=False,
          loss_schedules=None):

    optim = torch.optim.Adam(lr=lr, params=model.parameters())

    # copy settings from Raissi et al. (2019) and here
    # https://github.com/maziarraissi/PINNs
    if use_lbfgs:
        optim = torch.optim.LBFGS(lr=lr,
                                  params=model.parameters(),
                                  max_iter=50000,
                                  max_eval=50000,
                                  history_size=50,
                                  line_search_fn='strong_wolfe')

    if os.path.exists(model_dir):
        #val = input("The model directory %s exists. Overwrite? (y/n)"%model_dir)
        val = 'y'
        if val == 'y':
            shutil.rmtree(model_dir)

    os.makedirs(model_dir)

    summaries_dir = os.path.join(model_dir, 'summaries')
    utils.cond_mkdir(summaries_dir)

    checkpoints_dir = os.path.join(model_dir, 'checkpoints')
    utils.cond_mkdir(checkpoints_dir)

    writer = SummaryWriter(summaries_dir)

    total_steps = 0
    with tqdm(total=len(train_dataloader) * epochs) as pbar:
        train_losses = []
        for epoch in range(epochs):
            if not epoch % epochs_til_checkpoint and epoch:
                torch.save(
                    model.state_dict(),
                    os.path.join(checkpoints_dir,
                                 'model_epoch_%04d.pth' % epoch))
                np.savetxt(
                    os.path.join(checkpoints_dir,
                                 'train_losses_epoch_%04d.txt' % epoch),
                    np.array(train_losses))

            for step, (model_input, gt) in enumerate(train_dataloader):
                start_time = time.time()

                model_input = {
                    key: value.cuda()
                    for key, value in model_input.items()
                }
                gt = {key: value.cuda() for key, value in gt.items()}

                model_output = model(model_input)
                losses = loss_fn(model_output, gt)

                train_loss = 0.

                for loss_name, loss in losses.items():

                    single_loss = loss.mean()

                    if loss_schedules is not None and loss_name in loss_schedules:
                        writer.add_scalar(
                            loss_name + "_weight",
                            loss_schedules[loss_name](total_steps),
                            total_steps)
                        single_loss *= loss_schedules[loss_name](total_steps)

                    writer.add_scalar(loss_name, single_loss, total_steps)
                    train_loss += single_loss

                train_losses.append(train_loss.item())
                writer.add_scalar("total_train_loss", train_loss, total_steps)

                if not total_steps % steps_til_summary:
                    torch.save(
                        model.state_dict(),
                        os.path.join(checkpoints_dir, 'model_current.pth'))
                    summary_fn(model, model_input, gt, model_output, writer,
                               total_steps)

                if not use_lbfgs:
                    optim.zero_grad()
                    train_loss.backward()

                    optim.step()

                pbar.update(1)

                if not total_steps % steps_til_summary:
                    tqdm.write(
                        "Epoch %d, Total loss %0.6f, iteration time %0.6f" %
                        (epoch, train_loss, time.time() - start_time))

                    if val_dataloader is not None:
                        print("Running validation set...")
                        model.eval()
                        with torch.no_grad():
                            val_losses = []
                            for (model_input, gt) in val_dataloader:
                                model_output = model(model_input)
                                val_loss = loss_fn(model_output, gt)
                                val_losses.append(val_loss)

                            writer.add_scalar("val_loss", np.mean(val_losses),
                                              total_steps)
                        model.train()

                total_steps += 1

        torch.save(model.state_dict(),
                   os.path.join(checkpoints_dir, 'model_final.pth'))
        np.savetxt(os.path.join(checkpoints_dir, 'train_losses_final.txt'),
                   np.array(train_losses))
Exemplo n.º 17
0
def train(model,
          train_dataloader,
          epochs,
          lr,
          steps_til_summary,
          epochs_til_checkpoint,
          model_dir,
          loss_fn,
          summary_fn=None,
          val_dataloader=None,
          double_precision=False,
          clip_grad=False,
          use_lbfgs=False,
          loss_schedules=None,
          validation_fn=None,
          start_epoch=0):

    optim = torch.optim.Adam(lr=lr, params=model.parameters())

    # copy settings from Raissi et al. (2019) and here
    # https://github.com/maziarraissi/PINNs
    if use_lbfgs:
        optim = torch.optim.LBFGS(lr=lr,
                                  params=model.parameters(),
                                  max_iter=50000,
                                  max_eval=50000,
                                  history_size=50,
                                  line_search_fn='strong_wolfe')

    # Load the checkpoint if required
    if start_epoch > 0:
        # Load the model and start training from that point onwards
        model_path = os.path.join(model_dir, 'checkpoints',
                                  'model_epoch_%04d.pth' % start_epoch)
        checkpoint = torch.load(model_path)
        model.load_state_dict(checkpoint['model'])
        model.train()
        optim.load_state_dict(checkpoint['optimizer'])
        optim.param_groups[0]['lr'] = lr
        assert (start_epoch == checkpoint['epoch'])
    else:
        # Start training from scratch
        if os.path.exists(model_dir):
            val = input("The model directory %s exists. Overwrite? (y/n)" %
                        model_dir)
            if val == 'y':
                shutil.rmtree(model_dir)
        os.makedirs(model_dir)

    summaries_dir = os.path.join(model_dir, 'summaries')
    utils.cond_mkdir(summaries_dir)

    checkpoints_dir = os.path.join(model_dir, 'checkpoints')
    utils.cond_mkdir(checkpoints_dir)

    writer = SummaryWriter(summaries_dir)

    total_steps = 0
    with tqdm(total=len(train_dataloader) * epochs) as pbar:
        train_losses = []
        for epoch in range(start_epoch, epochs):
            if not epoch % epochs_til_checkpoint and epoch:
                # Saving the optimizer state is important to produce consistent results
                checkpoint = {
                    'epoch': epoch,
                    'model': model.state_dict(),
                    'optimizer': optim.state_dict()
                }
                torch.save(
                    checkpoint,
                    os.path.join(checkpoints_dir,
                                 'model_epoch_%04d.pth' % epoch))
                # torch.save(model.state_dict(),
                #            os.path.join(checkpoints_dir, 'model_epoch_%04d.pth' % epoch))
                np.savetxt(
                    os.path.join(checkpoints_dir,
                                 'train_losses_epoch_%04d.txt' % epoch),
                    np.array(train_losses))
                if validation_fn is not None:
                    validation_fn(model, checkpoints_dir, epoch)

            for step, (model_input, gt) in enumerate(train_dataloader):
                start_time = time.time()

                if torch.cuda.is_available():
                    model_input = {
                        key: value.cuda()
                        for key, value in model_input.items()
                    }
                    gt = {key: value.cuda() for key, value in gt.items()}
                else:
                    model_input = {
                        key: value.cpu()
                        for key, value in model_input.items()
                    }
                    gt = {key: value.cpu() for key, value in gt.items()}

                if double_precision:
                    model_input = {
                        key: value.double()
                        for key, value in model_input.items()
                    }
                    gt = {key: value.double() for key, value in gt.items()}

                if use_lbfgs:

                    def closure():
                        optim.zero_grad()
                        model_output = model(model_input)
                        losses = loss_fn(model_output, gt)
                        train_loss = 0.
                        for loss_name, loss in losses.items():
                            train_loss += loss.mean()
                        train_loss.backward()
                        return train_loss

                    optim.step(closure)

                model_output = model(model_input)
                losses = loss_fn(model_output, gt)

                # import ipdb; ipdb.set_trace()

                train_loss = 0.
                for loss_name, loss in losses.items():
                    single_loss = loss.mean()

                    if loss_schedules is not None and loss_name in loss_schedules:
                        writer.add_scalar(
                            loss_name + "_weight",
                            loss_schedules[loss_name](total_steps),
                            total_steps)
                        single_loss *= loss_schedules[loss_name](total_steps)

                    writer.add_scalar(loss_name, single_loss, total_steps)
                    train_loss += single_loss

                train_losses.append(train_loss.item())
                writer.add_scalar("total_train_loss", train_loss, total_steps)

                if not total_steps % steps_til_summary:
                    torch.save(
                        model.state_dict(),
                        os.path.join(checkpoints_dir, 'model_current.pth'))
                    # summary_fn(model, model_input, gt, model_output, writer, total_steps)

                if not use_lbfgs:
                    optim.zero_grad()
                    train_loss.backward()

                    if clip_grad:
                        if isinstance(clip_grad, bool):
                            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                           max_norm=1.)
                        else:
                            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                           max_norm=clip_grad)

                    optim.step()

                pbar.update(1)

                if not total_steps % steps_til_summary:
                    tqdm.write(
                        "Epoch %d, Total loss %0.6f, iteration time %0.6f" %
                        (epoch, train_loss, time.time() - start_time))

                    if val_dataloader is not None:
                        print("Running validation set...")
                        model.eval()
                        with torch.no_grad():
                            val_losses = []
                            for (model_input, gt) in val_dataloader:
                                model_output = model(model_input)
                                val_loss = loss_fn(model_output, gt)
                                val_losses.append(val_loss)

                            writer.add_scalar("val_loss", np.mean(val_losses),
                                              total_steps)
                        model.train()

                total_steps += 1

        torch.save(model.state_dict(),
                   os.path.join(checkpoints_dir, 'model_final.pth'))
        np.savetxt(os.path.join(checkpoints_dir, 'train_losses_final.txt'),
                   np.array(train_losses))
Exemplo n.º 18
0
def plot(stats_list):
    avg_reward_1 = np.array([stats[0] for stats in stats_list])
    std_reward_1 = np.array([stats[1] for stats in stats_list])
    avg_reward_2 = np.array([stats[2] for stats in stats_list])
    std_reward_2 = np.array([stats[3] for stats in stats_list])
    avg_reward_team = np.array([stats[4] for stats in stats_list])
    std_reward_team = np.array([stats[5] for stats in stats_list])

    num_wins_1 = np.array([stats[6] for stats in stats_list])
    num_wins_2 = np.array([stats[7] for stats in stats_list])
    num_wins_team = np.array([stats[8] for stats in stats_list])

    episode = np.arange(1, len(avg_reward_1) + 1)

    utils.cond_mkdir('./plots_2_agent/')

    plt.figure()
    plt.plot(episode, avg_reward_1)
    reward_upper = avg_reward_1 + std_reward_1
    reward_lower = avg_reward_1 - std_reward_1
    plt.fill_between(episode,
                     reward_lower,
                     reward_upper,
                     color='grey',
                     alpha=.2,
                     label=r'$\pm$ 1 std. dev.')
    plt.xlabel('Evaluation #')
    plt.ylabel('Reward')
    plt.title('Average Reward of Player 1')
    plt.savefig('./plots_2_agent/reward_1')

    plt.figure()
    plt.plot(episode, avg_reward_2)
    reward_upper = avg_reward_2 + std_reward_2
    reward_lower = avg_reward_2 - std_reward_2
    plt.fill_between(episode,
                     reward_lower,
                     reward_upper,
                     color='grey',
                     alpha=.2,
                     label=r'$\pm$ 1 std. dev.')
    plt.xlabel('Evaluation #')
    plt.ylabel('Reward')
    plt.title('Average Reward of Player 2')
    plt.savefig('./plots_2_agent/reward_2')

    plt.figure()
    plt.plot(episode, avg_reward_team)
    reward_upper = avg_reward_team + std_reward_team
    reward_lower = avg_reward_team - std_reward_team
    plt.fill_between(episode,
                     reward_lower,
                     reward_upper,
                     color='grey',
                     alpha=.2,
                     label=r'$\pm$ 1 std. dev.')
    plt.xlabel('Evaluation #')
    plt.ylabel('Reward')
    plt.title('Average Reward of Team')
    plt.savefig('./plots_2_agent/reward_team')

    plt.figure()
    plt.plot(episode, 100 * num_wins_1 / env.config.EVAL_EPISODE)
    plt.xlabel('Evaluation #')
    plt.ylabel('Win (%)')
    plt.title('Win % of Player 1')
    plt.savefig('./plots_2_agent/wins_1')

    plt.figure()
    plt.plot(episode, 100 * num_wins_2 / env.config.EVAL_EPISODE)
    plt.xlabel('Evaluation #')
    plt.ylabel('Win (%)')
    plt.title('Win % of Player 2')
    plt.savefig('./plots_2_agent/wins_2')

    plt.figure()
    plt.plot(episode, 100 * num_wins_team / env.config.EVAL_EPISODE)
    plt.xlabel('Evaluation #')
    plt.ylabel('Win (%)')
    plt.title('Win % of Team')
    plt.savefig('./plots_2_agent/wins_team')
Exemplo n.º 19
0
            memory.push(
                torch.tensor(state, device=device).unsqueeze(0),
                torch.tensor(action, device=device),
                torch.tensor(next_state, device=device).unsqueeze(0),
                torch.tensor(reward, device=device))

            # print('THIS HAPPENS')
            # Perform one step of the optimization (on the target network)
            optimize_model(input_stack, env)
            if done:
                break
            env.render()
        # Update the target network, copying all weights and biases in Tron_DQN
        if e % env.config.TARGET_UPDATE_FREQUENCY == 0:
            target_net.load_state_dict(policy_net.state_dict())

        if e % env.config.MODEL_EVAL_FREQUENCY == 0:
            stats_list.append(evaluate(policy_net))
            plot(stats_list)

        if e % env.config.MODEL_SAVE_FREQUENCY == 0:
            print('Saving model')
            utils.cond_mkdir('./models/')
            torch.save(policy_net,
                       os.path.join('./models/', 'episode_%d.pth' % (e)))

    print('Complete')
    env.render()
    plot(stats_list)
    # env.close()
Exemplo n.º 20
0
def main():
    root_path = os.path.join(opt.logging_root, opt.experiment_name)
    utils.cond_mkdir(root_path)

    point_cloud_dataset = dataio.OccupancyDataset(opt.pc_filepath)

    coord_dataset = dataio.Block3DWrapperMultiscaleAdaptive(
        point_cloud_dataset,
        sidelength=opt.res,
        octant_size=opt.octant_size,
        jitter=True,
        max_octants=opt.max_octants,
        num_workers=opt.num_workers,
        length=opt.steps_til_tiling,
        scale_init=opt.scale_init)

    model = modules.ImplicitAdaptiveOctantNet(
        in_features=3 + 1,
        out_features=1,
        num_hidden_layers=opt.hidden_layers,
        hidden_features=opt.hidden_features,
        feature_grid_size=feature_grid_size,
        octant_size=opt.octant_size)
    model.cuda()

    resume_checkpoint = {}
    if opt.load is not None:
        resume_checkpoint = load_from_checkpoint(opt.load, model,
                                                 coord_dataset)

    if opt.export:
        assert opt.load is not None, 'Need to specify which model to export with --load'

        export_mesh(model, coord_dataset, opt.upsample, opt.mc_threshold)
        return

    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"\n\nTrainable Parameters: {num_params}\n\n")

    dataloader = DataLoader(coord_dataset,
                            shuffle=False,
                            batch_size=1,
                            pin_memory=True,
                            num_workers=opt.num_workers)

    # Define the loss
    loss_fn = partial(loss_functions.occupancy_bce,
                      tiling_every=opt.steps_til_tiling,
                      dataset=coord_dataset)

    summary_fn = partial(utils.write_occupancy_multiscale_summary,
                         (opt.res, opt.res, opt.res),
                         coord_dataset,
                         output_mrc=f'{opt.experiment_name}.mrc',
                         skip=opt.skip_logging)

    # Define the pruning
    pruning_fn = partial(pruning_functions.pruning_occupancy,
                         threshold=opt.pruning_threshold)

    # Save command-line parameters log directory.
    p.write_config_file(opt, [os.path.join(root_path, 'config.ini')])

    # Save text summary of model into log directory.
    with open(os.path.join(root_path, "model.txt"), "w") as out_file:
        out_file.write(str(model))

    objs_to_save = {'octtree': coord_dataset.octtree}

    training.train(model=model,
                   train_dataloader=dataloader,
                   epochs=opt.num_epochs,
                   lr=opt.lr,
                   steps_til_summary=opt.steps_til_summary,
                   epochs_til_checkpoint=opt.epochs_til_ckpt,
                   model_dir=root_path,
                   loss_fn=loss_fn,
                   summary_fn=summary_fn,
                   objs_to_save=objs_to_save,
                   pruning_fn=pruning_fn,
                   epochs_til_pruning=opt.epochs_til_pruning,
                   resume_checkpoint=resume_checkpoint)
Exemplo n.º 21
0
def train(model,
          train_dataloader,
          epochs,
          lr,
          steps_til_summary,
          epochs_til_checkpoint,
          model_dir,
          loss_fn,
          summary_fn,
          prefix_model_dir='',
          val_dataloader=None,
          double_precision=False,
          clip_grad=False,
          use_lbfgs=False,
          loss_schedules=None,
          params=None):

    if params is None:
        optim = torch.optim.Adam(lr=lr,
                                 params=model.parameters(),
                                 amsgrad=True)
    else:
        optim = torch.optim.Adam(lr=lr, params=params, amsgrad=True)

    if use_lbfgs:
        optim = torch.optim.LBFGS(lr=lr,
                                  params=model.parameters(),
                                  max_iter=50000,
                                  max_eval=50000,
                                  history_size=50,
                                  line_search_fn='strong_wolfe')

    if os.path.exists(model_dir):
        pass
    else:
        os.makedirs(model_dir)

    model_dir_postfixed = os.path.join(model_dir, prefix_model_dir)

    summaries_dir = os.path.join(model_dir_postfixed, 'summaries')
    utils.cond_mkdir(summaries_dir)

    checkpoints_dir = os.path.join(model_dir_postfixed, 'checkpoints')
    utils.cond_mkdir(checkpoints_dir)

    writer = SummaryWriter(summaries_dir)

    total_steps = 0
    with tqdm(total=len(train_dataloader) * epochs) as pbar:
        train_losses = []
        for epoch in range(epochs):
            if not epoch % epochs_til_checkpoint and epoch:
                torch.save(
                    model.state_dict(),
                    os.path.join(checkpoints_dir,
                                 'model_epoch_%04d.pth' % epoch))
                np.savetxt(
                    os.path.join(checkpoints_dir,
                                 'train_losses_epoch_%04d.txt' % epoch),
                    np.array(train_losses))

            for step, (model_input, gt) in enumerate(train_dataloader):
                start_time = time.time()

                tmp = {}
                for key, value in model_input.items():
                    if isinstance(value, torch.Tensor):
                        tmp.update({key: value.cuda()})
                    else:
                        tmp.update({key: value})
                model_input = tmp

                gt = {key: value.cuda() for key, value in gt.items()}

                if double_precision:
                    model_input = {
                        key: value.double()
                        for key, value in model_input.items()
                    }
                    gt = {key: value.double() for key, value in gt.items()}

                if use_lbfgs:

                    def closure():
                        optim.zero_grad()
                        model_output = model(model_input)
                        losses = loss_fn(model_output, gt)
                        train_loss = 0.
                        for loss_name, loss in losses.items():
                            train_loss += loss.mean()
                        train_loss.backward()
                        return train_loss

                    optim.step(closure)

                model_output = model(model_input)
                losses = loss_fn(model_output, gt)

                train_loss = 0.
                for loss_name, loss in losses.items():
                    single_loss = loss.mean()

                    if loss_schedules is not None and loss_name in loss_schedules:
                        writer.add_scalar(
                            loss_name + "_weight",
                            loss_schedules[loss_name](total_steps),
                            total_steps)
                        single_loss *= loss_schedules[loss_name](total_steps)

                    writer.add_scalar(loss_name, single_loss, total_steps)
                    train_loss += single_loss

                train_losses.append(train_loss.item())
                writer.add_scalar("total_train_loss", train_loss, total_steps)

                if not total_steps % steps_til_summary:
                    torch.save(
                        model.state_dict(),
                        os.path.join(checkpoints_dir, 'model_current.pth'))
                    summary_fn(model, model_input, gt, model_output, writer,
                               total_steps)

                if not use_lbfgs:
                    optim.zero_grad()
                    train_loss.backward()

                    if clip_grad:
                        if isinstance(clip_grad, bool):
                            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                           max_norm=1.)
                        else:
                            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                           max_norm=clip_grad)

                    optim.step()

                pbar.update(1)

                if not total_steps % steps_til_summary:
                    # summary_fn(model_input, gt, model_output, writer, total_steps)
                    tqdm.write(
                        "Epoch %d, Total loss %0.6f, iteration time %0.6f" %
                        (epoch, train_loss, time.time() - start_time))

                    if val_dataloader is not None:
                        print("Running validation set...")
                        model.eval()
                        with torch.no_grad():
                            val_losses = []
                            for (model_input, gt) in val_dataloader:
                                model_output = model(model_input)
                                val_loss = loss_fn(model_output, gt)
                                val_losses.append(val_loss)

                            writer.add_scalar("val_loss", np.mean(val_losses),
                                              total_steps)
                        model.train()

                total_steps += 1

        torch.save(model.state_dict(),
                   os.path.join(checkpoints_dir, 'model_final.pth'))
        np.savetxt(os.path.join(checkpoints_dir, 'train_losses_final.txt'),
                   np.array(train_losses))
Exemplo n.º 22
0
def train(validation=True):
    root_path = os.path.join(opt.logging_root, opt.experiment_name)
    utils.cond_mkdir(root_path)
    ''' Training dataset '''
    if opt.dataset == 'deepvoxels':
        dataset = dataio.DeepVoxelDataset(opt.dv_dataset_path,
                                          mode='train',
                                          resize_to=2 * (opt.img_size, ))
        use_ndc = False
    elif opt.dataset == 'llff':
        dataset = dataio.LLFFDataset(opt.llff_dataset_path, mode='train')
        use_ndc = True
    elif opt.dataset == 'blender':
        dataset = dataio.NerfBlenderDataset(
            opt.nerf_dataset_path,
            splits=['train'
                    ],  # which split to load: either 'train', 'val', 'test'
            mode='train',  # which split to train on (must be in splits)
            resize_to=2 * (opt.img_size, ),
            ref_rot=None,
            d_rot=None)
        use_ndc = False
    else:
        raise NotImplementedError('dataset not implemented')

    coords_dataset = dataio.Implicit6DMultiviewDataWrapper(
        dataset,
        dataset.get_img_shape(),
        dataset.get_camera_params(),
        samples_per_ray=opt.samples_per_ray,
        samples_per_view=opt.samples_per_view,
        use_ndc=use_ndc)
    ''' Validation dataset '''
    if validation:
        if opt.dataset == 'deepvoxels':
            val_dataset = dataio.DeepVoxelDataset(opt.dv_dataset_path,
                                                  mode='val',
                                                  idcs=dataset.val_idcs,
                                                  resize_to=2 *
                                                  (opt.img_size, ))
        elif opt.dataset == 'llff':
            val_dataset = dataio.LLFFDataset(opt.llff_dataset_path, mode='val')
        elif opt.dataset == 'blender':
            val_dataset = dataio.NerfBlenderDataset(
                opt.nerf_dataset_path,
                splits=[
                    'val'
                ],  # which split to load: either 'train', 'val', 'test'
                mode='val',  # which split to train on (must be in splits)
                resize_to=2 * (opt.img_size, ),
                ref_rot=None,
                d_rot=None)

        val_coords_dataset = dataio.Implicit6DMultiviewDataWrapper(
            val_dataset,
            val_dataset.get_img_shape(),
            val_dataset.get_camera_params(),
            samples_per_ray=opt.samples_per_ray,
            samples_per_view=np.prod(val_dataset.get_img_shape()[:2]),
            num_workers=opt.num_workers,
            sobol_ray_sampling=opt.use_sobol_ray_sampling,
            use_ndc=use_ndc)
    ''' Dataloaders'''
    dataloader = DataLoader(
        coords_dataset,
        shuffle=True,
        batch_size=opt.batch_size,  # num of views in a batch
        pin_memory=True,
        num_workers=opt.num_workers)

    if validation:
        val_dataloader = DataLoader(val_coords_dataset,
                                    shuffle=True,
                                    batch_size=1,
                                    pin_memory=True,
                                    num_workers=opt.num_workers)
    else:
        val_dataloader = None

    # get model paths
    if opt.resume is not None:
        path, epoch = opt.resume
        epoch = int(epoch)
        assert (os.path.isdir(path))
        assert opt.config is not None, 'Specify config file'

    if opt.use_sampler:
        cam_params = dataset.get_camera_params()
        sampler = modules.SamplingNet(Nt=opt.samples_per_ray,
                                      ncuts=opt.num_cuts,
                                      sampling_interval=(cam_params['near'],
                                                         cam_params['far']))
    else:
        sampler = None

    add_pe_ray_samples = 10  # 10 cos + sin
    add_pe_orientations = 4  # 4 cos + sin
    nl_types = opt.activation

    model_sigma = modules.RadianceNet(
        out_features=1,
        hidden_layers=opt.hidden_layers,
        hidden_features=opt.hidden_features,
        nl=nl_types,
        use_grad=opt.use_grad,
        input_name=['ray_samples', 'ray_orientations'],
        input_processing_fn=modules.input_processing_fn,
        input_pe_params={
            'ray_samples': add_pe_ray_samples,
            'ray_orientations': add_pe_orientations
        },
        sampler=sampler,
        normalize_pe=opt.normalize_pe)
    model_sigma.cuda()

    model_rgb = modules.RadianceNet(
        out_features=3,
        hidden_layers=opt.hidden_layers,
        hidden_features=opt.hidden_features,
        nl=nl_types,
        use_grad=opt.use_grad,
        input_name=['ray_samples', 'ray_orientations'],
        input_processing_fn=modules.input_processing_fn,
        input_pe_params={
            'ray_samples': add_pe_ray_samples,
            'ray_orientations': add_pe_orientations
        },
        sampler=sampler,
        normalize_pe=opt.normalize_pe)
    model_rgb.cuda()

    if opt.resume is not None:
        if (epoch > 0):
            model_path_sigma = path + '/checkpoints/' + f'model_sigma_epoch_{epoch:04d}.pth'
            model_path_rgb = path + '/checkpoints/' + f'model_rgb_epoch_{epoch:04d}.pth'
        else:
            model_path_sigma = path + '/checkpoints/' + 'model_sigma_current.pth'
            model_path_rgb = path + '/checkpoints/' + 'model_rgb_current.pth'
        print('Loading checkpoints')
        ckpt_dict = torch.load(model_path_sigma)
        state_dict = translate_saved_weights(ckpt_dict, model_sigma)
        model_sigma.load_state_dict(state_dict, strict=True)
        ckpt_dict = torch.load(model_path_rgb)
        state_dict = translate_saved_weights(ckpt_dict, model_rgb)
        model_rgb.load_state_dict(state_dict, strict=True)

        # load optimizers
        try:
            if (epoch > 0):
                optim_path_sigma = path + '/checkpoints/' + f'optim_sigma_epoch_{epoch:04d}.pth'
                optim_path_rgb = path + '/checkpoints/' + f'optim_rgb_epoch_{epoch:04d}.pth'
            else:
                optim_path_sigma = path + '/checkpoints/' + 'optim_sigma_current.pth'
                optim_path_rgb = path + '/checkpoints/' + 'optim_rgb_current.pth'
            resume_checkpoint = {}
            sigma_ckpt = torch.load(optim_path_sigma)
            for g in sigma_ckpt['optimizer_state_dict']['param_groups']:
                g['lr'] = opt.lr
            resume_checkpoint['sigma'] = sigma_ckpt['optimizer_state_dict']
            rgb_ckpt = torch.load(optim_path_rgb)
            for g in rgb_ckpt['optimizer_state_dict']['param_groups']:
                g['lr'] = opt.lr
            resume_checkpoint['rgb'] = rgb_ckpt['optimizer_state_dict']
            resume_checkpoint['total_steps'] = rgb_ckpt['total_steps']
            resume_checkpoint['epoch'] = rgb_ckpt['epoch']
        except FileNotFoundError:
            print('Unable to load optimizer checkpoints')
    else:
        resume_checkpoint = {}
    models = {'sigma': model_sigma, 'rgb': model_rgb}

    # Define the loss
    loss_fn = partial(loss_functions.tomo_radiance_sigma_rgb_loss,
                      use_piecewise_model=opt.use_piecewise_model,
                      num_cuts=opt.num_cuts)
    summary_fn = partial(utils.write_tomo_radiance_summary,
                         chunk_size_eval=opt.chunk_size_eval,
                         num_views_to_disp_at_training=1,
                         use_piecewise_model=opt.use_piecewise_model,
                         num_cuts=opt.num_cuts,
                         use_coarse_fine=False)
    chunk_lists_from_batch_fn = dataio.chunk_lists_from_batch_reduce_to_raysamples_fn

    # Save command-line parameters log directory.
    p.write_config_file(opt, [os.path.join(root_path, 'config.ini')])
    with open(os.path.join(root_path, "params.txt"), "w") as out_file:
        out_file.write('\n'.join(
            ["%s: %s" % (key, value) for key, value in vars(opt).items()]))

    # Save text summary of model into log directory.
    with open(os.path.join(root_path, "model.txt"), "w") as out_file:
        for model_name, model in models.items():
            out_file.write(model_name)
            out_file.write(str(model))

    training.train_wchunks(models=models,
                           train_dataloader=dataloader,
                           epochs=opt.num_epochs,
                           lr=opt.lr,
                           steps_til_summary=opt.steps_til_summary,
                           epochs_til_checkpoint=opt.epochs_til_ckpt,
                           model_dir=root_path,
                           loss_fn=loss_fn,
                           summary_fn=summary_fn,
                           val_dataloader=val_dataloader,
                           chunk_lists_from_batch_fn=chunk_lists_from_batch_fn,
                           max_chunk_size=opt.chunk_size_train,
                           num_cuts=opt.num_cuts,
                           clip_grad=opt.clip_grad,
                           resume_checkpoint=resume_checkpoint)
Exemplo n.º 23
0
               help='Options are "mlp" or "nerf"')
p.add_argument('--resolution', type=int, default=1600)

opt = p.parse_args()


class SDFDecoder(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # Define the model.
        if opt.mode == 'mlp':
            self.model = modules.SingleBVPNet(type=opt.model_type, final_layer_factor=1, in_features=3)
        elif opt.mode == 'nerf':
            self.model = modules.SingleBVPNet(type='relu', mode='nerf', final_layer_factor=1, in_features=3)
        #print(datetime.datetime.now())
        self.model.load_state_dict(torch.load(opt.checkpoint_path))
        #print(datetime.datetime.now())
        self.model.cuda()

    def forward(self, coords):
        model_in = {'coords': coords}
        return self.model(model_in)['model_out']


sdf_decoder = SDFDecoder()

root_path = os.path.join(opt.logging_root, opt.experiment_name)
utils.cond_mkdir(root_path)

sdf_meshing.create_mesh(sdf_decoder, os.path.join(root_path, 'test'), N=opt.resolution)
Exemplo n.º 24
0
def train(model,
          train_dataloader,
          epochs,
          lr,
          steps_til_summary,
          epochs_til_checkpoint,
          model_dir,
          loss_schedules=None,
          is_train=True,
          **kwargs):

    print('Training Info:')
    print('data_path:\t\t', kwargs['point_cloud_path'])
    print('num_instances:\t\t', kwargs['num_instances'])
    print('batch_size:\t\t', kwargs['batch_size'])
    print('epochs:\t\t\t', epochs)
    print('learning rate:\t\t', lr)
    for key in kwargs:
        if 'loss' in key:
            print(key + ':\t', kwargs[key])

    if is_train:
        optim = torch.optim.Adam(lr=lr, params=model.parameters())
    else:
        embedding = model.latent_codes(torch.zeros(1).long().cuda()).clone(
        ).detach()  # initialization for evaluation stage
        embedding.requires_grad = True
        optim = torch.optim.Adam(lr=lr, params=[embedding])

    if not os.path.isdir(model_dir):
        os.makedirs(model_dir)

    summaries_dir = os.path.join(model_dir, 'summaries')
    utils.cond_mkdir(summaries_dir)

    checkpoints_dir = os.path.join(model_dir, 'checkpoints')
    utils.cond_mkdir(checkpoints_dir)

    writer = SummaryWriter(summaries_dir)

    total_steps = 0
    with tqdm(total=len(train_dataloader) * epochs) as pbar:
        train_losses = []
        for epoch in range(epochs):
            if not epoch % epochs_til_checkpoint and epoch:

                if is_train:
                    torch.save(
                        model.module.state_dict(),
                        os.path.join(checkpoints_dir,
                                     'model_epoch_%04d.pth' % epoch))
                else:
                    embed_save = embedding.detach().squeeze().cpu().numpy()
                    np.savetxt(
                        os.path.join(checkpoints_dir,
                                     'embedding_epoch_%04d.txt' % epoch),
                        embed_save)

                np.savetxt(
                    os.path.join(checkpoints_dir,
                                 'train_losses_epoch_%04d.txt' % epoch),
                    np.array(train_losses))

            for step, (model_input, gt) in enumerate(train_dataloader):
                start_time = time.time()

                model_input = {
                    key: value.cuda()
                    for key, value in model_input.items()
                }
                gt = {key: value.cuda() for key, value in gt.items()}

                if is_train:
                    losses = model(model_input, gt, **kwargs)
                else:
                    losses = model.embedding(embedding, model_input, gt)

                train_loss = 0.
                for loss_name, loss in losses.items():
                    single_loss = loss.mean()

                    if loss_schedules is not None and loss_name in loss_schedules:
                        writer.add_scalar(
                            loss_name + "_weight",
                            loss_schedules[loss_name](total_steps),
                            total_steps)
                        single_loss *= loss_schedules[loss_name](total_steps)

                    writer.add_scalar(loss_name, single_loss, total_steps)
                    train_loss += single_loss

                train_losses.append(train_loss.item())
                writer.add_scalar("total_train_loss", train_loss, total_steps)

                if not total_steps % steps_til_summary:
                    if is_train:
                        torch.save(
                            model.module.state_dict(),
                            os.path.join(checkpoints_dir, 'model_current.pth'))

                optim.zero_grad()
                train_loss.backward()
                optim.step()

                pbar.update(1)

                if not total_steps % steps_til_summary:
                    tqdm.write(
                        "Epoch %d, Total loss %0.6f, iteration time %0.6f" %
                        (epoch, train_loss, time.time() - start_time))

                total_steps += 1

        if is_train:
            torch.save(model.module.cpu().state_dict(),
                       os.path.join(checkpoints_dir, 'model_final.pth'))
        else:
            embed_save = embedding.detach().squeeze().cpu().numpy()
            np.savetxt(
                os.path.join(checkpoints_dir,
                             'embedding_epoch_%04d.txt' % epoch), embed_save)
            sdf_meshing.create_mesh(model,
                                    os.path.join(checkpoints_dir, 'test'),
                                    embedding=embedding,
                                    N=256,
                                    level=0,
                                    get_color=False)

        np.savetxt(os.path.join(checkpoints_dir, 'train_losses_final.txt'),
                   np.array(train_losses))