Пример #1
0
    def fit(self, envs, num_iterations, callback=False):
        for epoch in range(num_iterations):
            losses = [
                self.loss(self.network(x), y) for x, y in envs["train"]["envs"]
            ]
            gradients = [
                grad(loss, self.parameters(), create_graph=True)
                for loss in losses
            ]
            # average loss and gradients
            avg_loss = sum(losses) / len(losses)
            avg_gradient = grad(avg_loss, self.parameters(), create_graph=True)

            # compute trace penalty
            penalty_value = 0
            for gradient in gradients:
                for gradient_i, avg_grad_i in zip(gradient, avg_gradient):
                    penalty_value += (gradient_i - avg_grad_i).pow(2).sum()

            self.optimizer.zero_grad()
            (avg_loss + self.hparams['penalty'] * penalty_value).backward()
            self.optimizer.step()

            if callback:
                # compute errors
                utils.compute_errors(self, envs)
Пример #2
0
    def fit(self, envs, num_iterations, callback=False):
        for epoch in range(num_iterations):
            losses_env = []
            gradients_env = []
            for x, y in envs["train"]["envs"]:
                losses_env.append(self.loss(self.network(x), y))
                gradients_env.append(
                    grad(losses_env[-1], self.net_dummies, create_graph=True))

            # Average loss across envs
            losses_avg = sum(losses_env) / len(losses_env)
            gradients_avg = grad(losses_avg,
                                 self.net_dummies,
                                 create_graph=True)

            penalty = 0
            for gradients_this_env in gradients_env:
                for g_env, g_avg in zip(gradients_this_env, gradients_avg):
                    if self.version == 1:
                        penalty += g_env.pow(2).sum()
                    else:
                        raise NotImplementedError

            obj = (1 - self.hparams["irm_lambda"]) * losses_avg
            obj += self.hparams["irm_lambda"] * penalty

            self.optimizer.zero_grad()
            obj.backward()
            self.optimizer.step()

            if callback:
                # compute errors
                utils.compute_errors(self, envs)
Пример #3
0
def full_weighted_cv(X, y, Ds, lambda_gtv=np.linspace(.1, 1, 10), lambda_lasso=None, t=50, auto_cv=True, alpha=.9, k=5):
    errors = []
    X_train, X_test, y_train, y_test = temporal_split(X, y, t)
    if alpha<1:
        n = X_train.shape[0]
        weights = np.array([alpha**(n-t) for t in np.arange(1, n+1)])
        X_train = X_train * np.sqrt(weights.reshape(-1,1))
        y_train = y_train * np.sqrt(weights)
    n,p = X_train.shape
    # test errors
    for l1 in lambda_gtv:
        for m in Ds:
            D = Ds[m]
            if auto_cv:
                XD, bigY, invD = augmented_system_lasso(X_train, y_train, D, l1, 0, l1_only=True)
                fit = cvglmnet(x = XD, y = bigY, family = 'gaussian', ptype = 'mse', nfolds = 5)
                b = cvglmnetCoef(fit, s = 'lambda_min')
                l3 = fit['lambda_min'][0]
                beta = [email protected](b.shape[0])[1:]
                mset, r2t = compute_errors(y_train, X_train@beta)
                mse, r2 = compute_errors(y_test, X_test@beta)
                errors.append([m, l1, l3, mset, r2t, mse, r2])
            else:
                for l3 in lambda_lasso:
                    XD, bigY, invD = augmented_system_lasso(X_train, y_train, D, l1/l3, 0, l1_only=True)
                    #XD, bigY, invD = epsilon_system_lasso(X_train, y_train, D, l1)
                    fit = glmnet(x = XD, y = bigY)
                    b = glmnetCoef(fit, s = scipy.float64([l3]), exact = False)
                    beta = [email protected](b.shape[0])[1:]
                    mset, r2t = compute_errors(y_train, X_train@beta)
                    mse, r2 = compute_errors(y_test, X_test@beta)
                    errors.append([m, l1, l3, mset, r2t, mse, r2])
    df = pd.DataFrame(errors, columns=['method', 'lambda_tv', 'lambda_1', 'train_mse', 'train_r2', 'test_mse', 'test_r2'])
    return df
Пример #4
0
    def fit(self, envs, num_iterations, callback=False):
        x = torch.cat([xe for xe, ye in envs["train"]["envs"]])
        y = torch.cat([ye for xe, ye in envs["train"]["envs"]])

        for epoch in range(num_iterations):
            self.optimizer.zero_grad()
            self.loss(self.network(x), y).backward()
            self.optimizer.step()

            if callback:
                # compute errors
                utils.compute_errors(self, envs)
Пример #5
0
    def fit(self, envs, num_iterations, callback=False):
        for epoch in range(num_iterations):
            losses = [
                self.loss(self.network(x), y) for x, y in envs["train"]["envs"]
            ]
            self.mask_step(losses,
                           list(self.parameters()),
                           tau=self.hparams["tau"],
                           wd=self.hparams["wd"],
                           lr=self.hparams["lr"])

            if callback:
                # compute errors
                utils.compute_errors(self, envs)
Пример #6
0
def main():
    # Arguments
    parser = argparse.ArgumentParser(description='High Quality Monocular Depth Estimation via Transfer Learning')
    parser.add_argument('-c', '--configFile', required=True, help='Path to config yaml file', metavar='path/to/config')
    args = parser.parse_args()

    CONFIG_FILE_PATH = args.configFile
    with open(CONFIG_FILE_PATH) as fd:
        config_yaml = oyaml.load(fd)  # Returns an ordered dict. Used for printing

    config = AttrDict(config_yaml)
    print(colored('Config being used for training:\n{}\n\n'.format(oyaml.dump(config_yaml)), 'green'))

    # Create a new directory to save logs
    runs = sorted(glob.glob(os.path.join(config.train.logsDir, 'exp-*')))
    prev_run_id = int(runs[-1].split('-')[-1]) if runs else 0
    MODEL_LOG_DIR = os.path.join(config.train.logsDir, 'exp-{:03d}'.format(prev_run_id + 1))
    CHECKPOINT_DIR = os.path.join(MODEL_LOG_DIR, 'checkpoints')
    os.makedirs(CHECKPOINT_DIR)
    print('Saving logs to folder: ' + colored('"{}"'.format(MODEL_LOG_DIR), 'blue'))

    # Save a copy of config file in the logs
    shutil.copy(CONFIG_FILE_PATH, os.path.join(MODEL_LOG_DIR, 'config.yaml'))

    # Create a tensorboard object and Write config to tensorboard
    writer = SummaryWriter(MODEL_LOG_DIR, comment='create-graph')

    string_out = io.StringIO()
    oyaml.dump(config_yaml, string_out, default_flow_style=False)
    config_str = string_out.getvalue().split('\n')
    string = ''
    for line in config_str:
        string = string + '    ' + line + '\n\r'
    writer.add_text('Config', string, global_step=None)

    # Create model
    model = Model()
    print('Model created.')

    # to continue training from a checkpoint
    if config.train.continueTraining:
        print('Transfer Learning enabled. Model State to be loaded from a prev checkpoint...')
        if not os.path.isfile(config.train.pathPrevCheckpoint):
            raise ValueError('Invalid path to the given weights file for transfer learning.\
                    The file {} does not exist'.format(config.train.pathPrevCheckpoint))

        CHECKPOINT = torch.load(config.train.pathPrevCheckpoint, map_location='cpu')

        if 'model_state_dict' in CHECKPOINT:
            # Newer weights file with various dicts
            print(colored('Continuing training from checkpoint...Loaded data from checkpoint:', 'green'))
            print('Config Used to train Checkpoint:\n', oyaml.dump(CHECKPOINT['config']), '\n')
            print('From Checkpoint: Last Epoch Loss:', CHECKPOINT['epoch_loss'], '\n\n')

            model.load_state_dict(CHECKPOINT['model_state_dict'])
        elif 'state_dict' in CHECKPOINT:
            # reading original authors checkpoints
            if config.train.model != 'rednet':
                # original author deeplab checkpoint
                CHECKPOINT['state_dict'].pop('decoder.last_conv.8.weight')
                CHECKPOINT['state_dict'].pop('decoder.last_conv.8.bias')
            else:
                # rednet checkpoint
                # print(CHECKPOINT['state_dict'].keys())
                CHECKPOINT['state_dict'].pop('final_deconv.weight')
                CHECKPOINT['state_dict'].pop('final_deconv.bias')
                CHECKPOINT['state_dict'].pop('out5_conv.weight')
                CHECKPOINT['state_dict'].pop('out5_conv.bias')
                CHECKPOINT['state_dict'].pop('out4_conv.weight')
                CHECKPOINT['state_dict'].pop('out4_conv.bias')
                CHECKPOINT['state_dict'].pop('out3_conv.weight')
                CHECKPOINT['state_dict'].pop('out3_conv.bias')
                CHECKPOINT['state_dict'].pop('out2_conv.weight')
                CHECKPOINT['state_dict'].pop('out2_conv.bias')

            model.load_state_dict(CHECKPOINT['state_dict'], strict=False)
        else:
            # Old checkpoint containing only model's state_dict()
            model.load_state_dict(CHECKPOINT)

    # Enable Multi-GPU training
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    if torch.cuda.device_count() > 1:
        print('Multiple GPUs being used, can\'t save model graph to Tensorboard')
        # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
        model = nn.DataParallel(model)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    # Training parameters
    optimizer = torch.optim.Adam( model.parameters(), config.train.optimAdam.learningRate )
    batch_size = config.train.batchSize
    prefix = 'densenet_' + str(batch_size)

    # Load data
    train_loader_list = []
    test_loader_list = []
    for dataset in config.train.datasetsTrain:
        train_data = getTrainingTestingData('rgb', 'train', dataset.images, dataset.labels)
        train_loader_list.append(train_data)

    for dataset in config.train.datasetsVal:
        print(dataset.images)
        test_data = getTrainingTestingData('rgb', 'eval', dataset.images, dataset.labels)
        test_loader_list.append(test_data)

    train_loader = DataLoader(torch.utils.data.ConcatDataset(train_loader_list), batch_size, num_workers=config.train.numWorkers, shuffle=True, drop_last=True, pin_memory=True)
    test_loader = DataLoader(torch.utils.data.ConcatDataset(test_loader_list), batch_size, num_workers=config.train.numWorkers, shuffle=False, drop_last=True, pin_memory=True)
    print(len(torch.utils.data.ConcatDataset(train_loader_list)))
    print(len(train_loader))
    print(len(test_loader))

    # Create a tensorboard object and Write config to tensorboard
    writer = SummaryWriter(MODEL_LOG_DIR, comment='create-graph')

    # Loss
    l1_criterion = nn.L1Loss()

    total_iter_num = 0
    # Start training...
    for epoch in range(config.train.numEpochs):
        batch_time = AverageMeter()
        losses = AverageMeter()
        N = len(train_loader)

        # Log the current Epoch Number
        writer.add_scalar('data/Epoch Number', epoch, total_iter_num)

        # Switch to train mode
        model.train()

        end = time.time()

        running_loss = 0.0
        for i, sample_batched in enumerate(train_loader):
            optimizer.zero_grad()
            total_iter_num += 1

            # Prepare sample and target
            image = torch.autograd.Variable(sample_batched['image'].cuda())
            depth = torch.autograd.Variable(sample_batched['depth'].cuda(non_blocking=True))

            # Normalize depth
            depth_n = DepthNorm( depth )

            # Predict
            output = model(image)

            # Compute the loss
            l_depth = l1_criterion(output, depth_n)
            l_ssim = torch.clamp((1 - ssim(output, depth_n, val_range = 1000.0 / 10.0)) * 0.5, 0, 1)

            loss = (1.0 * l_ssim) + (0.1 * l_depth)

            # Update step
            losses.update(loss.data.item(), image.size(0))
            loss.backward()
            optimizer.step()

            # statistics
            running_loss += loss.item()

            # Measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
            eta = str(datetime.timedelta(seconds=int(batch_time.val*(N - i))))

            # Log progress
            niter = epoch*N+i
            if i % 5 == 0:
                # Print to console
                print('Epoch: [{0}][{1}/{2}]\t'
                'Time {batch_time.val:.3f} ({batch_time.sum:.3f})\t'
                'ETA {eta}\t'
                'Loss {loss.val:.4f} ({loss.avg:.4f})'
                .format(epoch, i, N, batch_time=batch_time, loss=losses, eta=eta))

                # Log to tensorboard
                writer.add_scalar('Train/Loss', losses.val, niter)

            if i % 50 == 0:
                LogProgress(model, writer, test_loader, niter)

        # Log Epoch Loss
        epoch_loss = running_loss / (len(train_loader))
        writer.add_scalar('data/Train Epoch Loss', epoch_loss, total_iter_num)
        print('\nTrain Epoch Loss: {:.4f}'.format(epoch_loss))

        metrics = compute_errors(depth_n, output)
        print(metrics)
        for keys, values in metrics.items():
            print(str(keys) + ':' + str(values))

        # Record epoch's intermediate results
        LogProgress(model, writer, test_loader, niter)
        writer.add_scalar('Train/Loss.avg', losses.avg, epoch)

        # Save the model checkpoint every N epochs
        if (epoch % config.train.saveModelInterval) == 0:
            filename = os.path.join(CHECKPOINT_DIR, 'checkpoint-epoch-{:04d}.pth'.format(epoch))
            if torch.cuda.device_count() > 1:
                model_params = model.module.state_dict()  # Saving nn.DataParallel model
            else:
                model_params = model.state_dict()

            torch.save(
                {
                    'model_state_dict': model_params,
                    'optimizer_state_dict': optimizer.state_dict(),
                    'epoch': epoch,
                    'total_iter_num': total_iter_num,
                    'epoch_loss': epoch_loss,
                    'config': config_yaml
                }, filename)
Пример #7
0
        l_ssim = torch.clamp(
            (1 - ssim(outputs, depth_n, val_range=1000.0 / 10.0)) * 0.5, 0, 1)

        loss = (1.0 * l_ssim) + (0.1 * l_depth)

        running_loss += loss.item()

        # Save output images, one at a time, to results
        inputs_tensor = image.detach().cpu()
        output_tensor = outputs.detach().cpu()
        label_tensor = depth_n.detach().cpu()

        depth_metric = depth * (config.train.max_depth / 1000.0)
        outputs_tmp = DepthNorm(outputs)
        outputs_metric = outputs_tmp * (config.train.max_depth / 1000.0)
        metrics = compute_errors(depth_metric, outputs_metric)
        # print(metrics)
        for keys, values in metrics.items():
            print(str(keys) + ': ' + str(values))

        # Extract each tensor within batch and save results
        for iii, sample_batched in enumerate(
                zip(inputs_tensor, output_tensor, label_tensor)):
            input, output, label = sample_batched

            if key == 'real':
                RESULTS_DIR = config.eval.resultsDirReal
            else:
                RESULTS_DIR = config.eval.resultsDirSynthetic

            result_path = os.path.join(
Пример #8
0
def trainAndVal(loader, model, l1_criterion, optimizer=None):
    batch_time = AverageMeter()
    losses = AverageMeter()
    rsmes = AverageMeter()

    if (optimizer):
        # switch to train mode
        model.train()
        print('Train', flush=True)
    else:
        # switch to evaluate mode
        model.eval()
        print('Val', flush=True)

    N = len(loader)

    end = time.time()
    start = end

    if (optimizer is None):
        predictions = []
        testSetDepths = []

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # process epoch
    for i, sample_batched in enumerate(loader):

        # Prepare sample and target
        image = sample_batched['image'].to(device)
        depth = sample_batched['depth'].to(device)

        # Normalize depth
        depth_n = DepthNorm(depth)

        # Predict
        output = model(image)

        # Compute the loss
        l_depth = l1_criterion(output, depth_n)
        l_ssim = torch.clamp(
            (1 - ssim(output, depth_n, val_range=1000.0 / 10.0)) * 0.5, 0, 1)

        loss = (1.0 * l_ssim) + (0.1 * l_depth)

        # measure accuracy and record loss
        losses.update(loss.data, image.size(0))

        rmse = (depth_n.data.cpu() - output.data.cpu())**2
        rmse = np.sqrt(rmse.mean())
        rsmes.update(rmse, image.size(0))

        if (optimizer):
            # compute gradient and do SGD step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        eta = str(datetime.timedelta(seconds=int(batch_time.avg * (N - i))))
        total = str(
            datetime.timedelta(seconds=int((time.time() - start) +
                                           batch_time.avg * (N - i))))

        minDepth = 10
        maxDepth = 1000

        if (optimizer is None):

            predictions.append(output.squeeze().data.cpu())
            testSetDepths.append(depth_n.squeeze().data.cpu())

        if i % 5 == 0:
            p = 100 * i / N
            bar = "[%-10s] %d%%" % ('=' * int(p * 10 / 100) + '.' *
                                    (10 - int(p * 10 / 100)), p)
            print('[{0}/{1}] {2} - '
                  'Batch Time: {batch_time.val:.2f} ({batch_time.avg:.2f}) '
                  'ETA: {eta}/{total} '
                  'Loss: {loss.val:.3f} ({loss.avg:.3f}) '
                  'RSME: {rsme.val:.3f} ({rsme.avg:.3f})'.format(
                      i,
                      N,
                      bar,
                      batch_time=batch_time,
                      eta=eta,
                      total=total,
                      loss=losses,
                      rsme=rsmes),
                  flush=True)

            break

    if (optimizer is None):
        predictions = np.vstack(predictions)
        testSetDepths = np.vstack(testSetDepths)

        e = compute_errors(predictions, testSetDepths)

        print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format(
            'a1', 'a2', 'a3', 'rel', 'rms', 'log_10'))
        print("{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".
              format(e[0], e[1], e[2], e[3], e[4], e[5]))

    return losses.avg
Пример #9
0
def validate_depth_with_gt(val_loader,
                           disp_net,
                           criterion,
                           epoch,
                           logger,
                           tb_writer,
                           global_vars_dict=None):
    device = global_vars_dict['device']
    args = global_vars_dict['args']
    n_iter_val_depth = global_vars_dict['n_iter_val_depth']

    show_samples = copy.deepcopy(args.show_samples)
    for i in range(len(show_samples)):
        show_samples[i] *= len(val_loader)
        show_samples[i] = show_samples[i] // 1

    batch_time = AverageMeter()
    error_names = ['abs_diff', 'abs_rel', 'sq_rel', 'a1', 'a2', 'a3']
    errors = AverageMeter(i=len(error_names), precision=3)

    # switch to evaluate mode
    disp_net.eval()

    end = time.time()
    fig = plt.figure(1, figsize=(8, 6))
    #criterion = MaskedL1Loss().to(device)#l1LOSS 容易优化

    for i, (tgt_img, depth_gt) in enumerate(val_loader):

        tgt_img = tgt_img.to(device)  #BCHW
        depth_gt = depth_gt.to(device)

        output_disp = disp_net(tgt_img)  #BCHW
        if args.spatial_normalize:
            output_disp = spatial_normalize(output_disp)

        output_depth = 255 / output_disp

        #err = compute_errors2(depth_gt.data.squeeze(1),output_depth.data.squeeze(1))
        err = compute_errors(gt=depth_gt.data.squeeze(1),
                             pred=output_depth.data.squeeze(1),
                             crop=False)

        ver_gt = VGSmap(depth_gt)
        ver_pre = VGSmap(output_depth)

        errors.update(err)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        fig = plt.figure(1, figsize=(8, 6))
        if args.img_freq > 0 and i in show_samples:  #output_writers list(3)
            if epoch == 0:  #训练前的validate,目的在于先评估下网络效果
                #1.img
                # 不会执行第二次,注意ref_imgs axis0是batch的索引; axis 1是list(adjacent frame)的索引!
                tb_writer.add_image('epoch 0 Input/sample{}'.format(i),
                                    tensor2array(tgt_img[0]), 0)
                tb_writer.add_image('epoch 0 depth_gt/sample{}'.format(i),
                                    tensor2array(depth_gt[0], colormap='bone'),
                                    0)
                tb_writer.add_image(
                    'Depth Output/sample{}'.format(i),
                    tensor2array(output_depth[0],
                                 max_value=None,
                                 colormap='bone'), 0)

                plt.hist(tensor2array(depth_gt[0], colormap='bone').flatten() *
                         256,
                         256, [0, 256],
                         color='r')
                tb_writer.add_figure(tag='histogram_gt/sample{}'.format(i),
                                     figure=fig,
                                     global_step=0)

            else:
                #2.disp
                # tensor disp_to_show :[1,h,w],0.5~3.1~10
                #disp2show = tensor2array(output_disp[0], max_value=None,colormap='bone')
                depth2show = tensor2array(output_depth[0],
                                          max_value=None,
                                          colormap='bone')
                #tb_writer.add_image('Disp Output/sample{}'.format(i), disp2show, epoch)
                tb_writer.add_image('Depth Output/sample{}'.format(i),
                                    depth2show, epoch)
                #add_figure

                plt.hist(depth2show.flatten() * 256, 256, [0, 256], color='r')
                tb_writer.add_figure(tag='histogram_sample/sample{}'.format(i),
                                     figure=fig,
                                     global_step=epoch)

        # add scalar
        if args.scalar_freq > 0 and n_iter_val_depth % args.scalar_freq == 0:
            pass
            #h_loss =HistgramLoss()(tgt_img,depth_gt)
            #tb_writer.add_scalar('batch/val_h_loss' ,h_loss, n_iter_val_depth)
            #tb_writer.add_scalar('batch/' + error_names[1], errors.val[1], n_iter_val_depth)
            #tb_writer.add_scalar('batch/' + error_names[2], errors.val[2], n_iter_val_depth)
            #tb_writer.add_scalar('batch/' + error_names[3], errors.val[3], n_iter_val_depth)
            #tb_writer.add_scalar('batch/' + error_names[4], errors.val[4], n_iter_val_depth)
            #tb_writer.add_scalar('batch/' + error_names[5], errors.val[5], n_iter_val_depth)

        if args.log_terminal:
            logger.valid_logger_update(batch=i,
                                       time=batch_time,
                                       names=error_names,
                                       values=errors)

        n_iter_val_depth += 1
        #end for
    #if args.log_terminal:
    #    logger.valid_bar.update(len(val_loader))

    global_vars_dict['n_iter_val_depth'] = n_iter_val_depth

    return error_names, errors
Пример #10
0
            _, pose3d_out_, pose3d_gt_, loss_, image_, pose2d_gt_ = sess.run([
                train_op, pose3d_out, pose3d_gt, loss, image,
                sample['pose2d_crop']
            ])

            # Display training status
            epoch_cur = i * opt.batch_size // meta_info.NUM_SAMPLES_H36
            iter_cur = (i * opt.batch_size) % meta_info.NUM_SAMPLES_H36
            t.set_postfix(epoch=epoch_cur,
                          iter_percent="%d %%" %
                          (iter_cur / float(meta_info.NUM_SAMPLES_H36) * 100),
                          loss='%.3f' % loss_)

            # Log numerical reuslts
            if i % opt.freq_log == 0:
                mpjpe_, pa_mpjpe_ = compute_errors(pose3d_out_, pose3d_gt_)
                log(tag='train/loss',
                    step=i,
                    writer=summary_writer,
                    value=loss_)
                log(tag='train/mpjpe',
                    step=i,
                    writer=summary_writer,
                    value=mpjpe_)
                log(tag='train/pa_mpjpe',
                    step=i,
                    writer=summary_writer,
                    value=pa_mpjpe_)

            # Log visual reuslts
            if i % opt.freq_display == 0:
Пример #11
0
def validate(args,
             model,
             test_loader,
             criterion_ueff,
             epoch,
             epochs,
             device='cpu'):
    with torch.no_grad():
        val_si = RunningAverage()
        # val_bins = RunningAverage()
        metrics = utils.RunningAverageDict()
        for batch in tqdm(test_loader,
                          desc=f"Epoch: {epoch + 1}/{epochs}. Loop: Validation"
                          ) if is_rank_zero(args) else test_loader:
            img = batch['image'].to(device)
            depth = batch['depth'].to(device)
            if 'has_valid_depth' in batch:
                if not batch['has_valid_depth']:
                    continue
            depth = depth.squeeze().unsqueeze(0).unsqueeze(0)
            bins, pred = model(img)

            mask = depth > args.min_depth
            l_dense = criterion_ueff(pred,
                                     depth,
                                     mask=mask.to(torch.bool),
                                     interpolate=True)
            val_si.append(l_dense.item())

            pred = nn.functional.interpolate(pred,
                                             depth.shape[-2:],
                                             mode='bilinear',
                                             align_corners=True)

            pred = pred.squeeze().cpu().numpy()
            pred[pred < args.min_depth_eval] = args.min_depth_eval
            pred[pred > args.max_depth_eval] = args.max_depth_eval
            pred[np.isinf(pred)] = args.max_depth_eval
            pred[np.isnan(pred)] = args.min_depth_eval

            gt_depth = depth.squeeze().cpu().numpy()
            valid_mask = np.logical_and(gt_depth > args.min_depth_eval,
                                        gt_depth < args.max_depth_eval)
            if args.garg_crop or args.eigen_crop:
                gt_height, gt_width = gt_depth.shape
                eval_mask = np.zeros(valid_mask.shape)

                if args.garg_crop:
                    eval_mask[int(0.40810811 * gt_height):int(0.99189189 *
                                                              gt_height),
                              int(0.03594771 * gt_width):int(0.96405229 *
                                                             gt_width)] = 1

                elif args.eigen_crop:
                    if args.dataset == 'kitti':
                        eval_mask[int(0.3324324 * gt_height):int(0.91351351 *
                                                                 gt_height),
                                  int(0.0359477 * gt_width):int(0.96405229 *
                                                                gt_width)] = 1
                    else:
                        eval_mask[45:471, 41:601] = 1
            valid_mask = np.logical_and(valid_mask, eval_mask)
            metrics.update(
                utils.compute_errors(gt_depth[valid_mask], pred[valid_mask]))

        return metrics.get_value(), val_si
def train():
    """ Runs data processing scripts to turn raw data from (../raw) into
        cleaned data ready to be analyzed (saved in ../processed).
    """
    logger = logging.getLogger(__name__)
    logger.info('training...')

    # data loader
    train_dataset = make_dataloader(dataset_name='bikenyc',
                                    mode='train',
                                    len_closeness=len_closeness,
                                    len_period=len_period,
                                    len_trend=len_trend)

    # Creating data indices for training and validation splits:
    dataset_size = len(train_dataset)
    indices = list(range(dataset_size))
    split = int(np.floor(validation_split * dataset_size))
    if shuffle_dataset:
        np.random.seed(random_seed)
        np.random.shuffle(indices)
    train_indices, val_indices = indices[split:], indices[:split]
    val_timestamps = [
        train_dataset.timestamp_train[i] for i in indices[:split]
    ]
    val_Y = [train_dataset.Y_data[i] for i in indices[:split]]
    print('training size:', len(train_indices))
    print('val size:', len(val_indices))

    # Creating PT data samplers and loaders:
    train_sampler = SubsetRandomSampler(train_indices)
    valid_sampler = SubsetRandomSampler(val_indices)

    training_generator = data.DataLoader(train_dataset,
                                         **params,
                                         sampler=train_sampler)
    val_generator = data.DataLoader(train_dataset,
                                    **params,
                                    sampler=valid_sampler)

    # Total iterations
    total_iters = np.ceil(len(train_indices) / batch_size) * epoch_nums

    # model
    model = stresnet((len_closeness, nb_flow, map_height, map_width),
                     (len_period, nb_flow, map_height, map_width),
                     (len_trend, nb_flow, map_height, map_width),
                     external_dim=8,
                     nb_residual_unit=nb_residual_unit)
    if LOAD_INITIAL:
        logger.info('\tload initial_checkpoint = %s\n' % initial_checkpoint)
        model.load_state_dict(
            torch.load(initial_checkpoint,
                       map_location=lambda storage, loc: storage))
    #model.apply(weight_init)

    # Loss and optimizer
    loss_fn = nn.MSELoss()  # nn.L1Loss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    loss_fn.to(device)

    # Train the model
    es = EarlyStopping(patience=early_stop_patience,
                       mode='min',
                       model=model,
                       save_path=checkpoint_dir + '/%s/model.best.pth' %
                       (model_name))
    for e in range(epoch_nums):
        for i, (X_c, X_p, X_t, X_meta,
                Y_batch) in enumerate(training_generator):
            #epoch = i * batch_size / len(train_loader)

            # Move tensors to the configured device
            X_c = X_c.type(torch.FloatTensor).to(device)
            X_p = X_p.type(torch.FloatTensor).to(device)
            X_t = X_t.type(torch.FloatTensor).to(device)
            X_meta = X_meta.type(torch.FloatTensor).to(device)
            #print(X_meta[0])
            Y_batch = Y_batch.type(torch.FloatTensor).to(device)

            # Forward pass
            outputs = model(X_c, X_p, X_t, X_meta)
            #print(outputs[0])
            loss = loss_fn(
                outputs.reshape(len(outputs), map_width, map_height), Y_batch)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        its = np.ceil(len(train_indices) / batch_size) * (
            e + 1)  # iterations at specific epochs
        print('Epoch [{}/{}], step [{}/{}], Loss: {:.4f}'.format(
            e + 1, epoch_nums, its, total_iters, loss.item()))

        # valid after each training epoch
        val_loss = valid(model, val_generator, compute_errors, device)
        if es.step(val_loss):
            print('early stopped! With val loss:', val_loss)
            break  # early stop criterion is met, we can stop now

        if e in epoch_save:
            torch.save(model.state_dict(),
                       checkpoint_dir + '/%s/%08d_model.pth' % (model_name, e))
            torch.save(
                {
                    'optimizer': optimizer.state_dict(),
                    'iter': its,
                    'epoch': e,
                }, checkpoint_dir + '/%s/%08d_optimizer.pth' % (model_name, e))

            logger.info(checkpoint_dir + '/%s/%08d_model.pth' %
                        (model_name, e) + ' saved!')

    rmse_list = []
    mse_list = []
    mae_list = []
    for i, (X_c, X_p, X_t, X_meta, Y_batch) in enumerate(training_generator):
        # Move tensors to the configured device
        X_c = X_c.type(torch.FloatTensor).to(device)
        X_p = X_p.type(torch.FloatTensor).to(device)
        X_t = X_t.type(torch.FloatTensor).to(device)
        X_meta = X_meta.type(torch.FloatTensor).to(device)
        #Y_batch = Y_batch.type(torch.FloatTensor).to(device)

        # Forward pass
        outputs = model(X_c, X_p, X_t, X_meta)  #.cpu().data.numpy()
        mse, mae, rmse = compute_errors(
            outputs.cpu().data.numpy(), Y_batch.data.numpy()
        )  #original version, bug has appeared where shape is x,1,32,32 ratehr than x,32,32? this did not happen 3 weeks ago...
        # mse, mae, rmse = compute_errors(outputs.reshape(len(outputs),map_width, map_height), Y_batch.data.numpy())

        rmse_list.append(rmse)
        mse_list.append(mse)
        mae_list.append(mae)

    rmse = np.mean(rmse_list)
    mse = np.mean(mse_list)
    mae = np.mean(mae_list)

    print('Training mse: %.6f mae: %.6f rmse (norm): %.6f, rmse (real): %.6f' %
          (mse, mae, rmse, rmse *
           (train_dataset.mmn._max - train_dataset.mmn._min) / 2. * m_factor))

    if COMPARE_TO_HA:
        print("Preparing Benchmark Scores, this may take a few minutes.....")
        # return compare_to_ha(compute_errors, val_timestamps, val_Y, train_dataset.mmn)
        mse_benchmark, mae_benchmark, rmse_benchmark = compare_to_simple_ha(
            compute_errors, val_timestamps, val_Y, train_dataset.mmn)
        print(
            'Simple HA Benchmark mse: %.6f mae: %.6f rmse (norm): %.6f, rmse (real): %.6f'
            % (mse_benchmark, mae_benchmark, rmse_benchmark, rmse_benchmark *
               (train_dataset.mmn._max - train_dataset.mmn._min) / 2. *
               m_factor))
        mse_benchmark, mae_benchmark, rmse_benchmark = compare_to_tuned_ha(
            compute_errors, val_timestamps, val_Y, train_dataset.mmn)
        print(
            'Tuned HA Benchmark mse: %.6f mae: %.6f rmse (norm): %.6f, rmse (real): %.6f'
            % (mse_benchmark, mae_benchmark, rmse_benchmark, rmse_benchmark *
               (train_dataset.mmn._max - train_dataset.mmn._min) / 2. *
               m_factor))
Пример #13
0
    def eval_depth(self):
        pred_depths = []
        pred_disps = []
        errors = []
        ratios = []

        # Predict
        print('doing evaluation...')
        for i, img_path in enumerate(self.img_paths):
            img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
            img = cv2.resize(img, (self.params.input_w, self.params.input_h))
            img = tf.expand_dims(
                tf.convert_to_tensor(img, tf.float32) / 255., 0)
            outputs = self.val_step(img)
            _, depth = disp_to_depth(outputs['disparity0'],
                                     min_depth=MIN_DEPTH,
                                     max_depth=MAX_DEPTH)
            depth *= 0.54

            pred_depths.append(depth.numpy())
            pred_disps.append(np.squeeze(outputs['disparity0'].numpy()))

        for i in range(len(pred_depths)):
            gt_depth = self.gt_depths[i]
            gt_height, gt_width = gt_depth.shape[:2]

            pred_depth = pred_depths[i][0]
            pred_depth = cv2.resize(pred_depth, (gt_width, gt_height))

            mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH)

            crop = np.array([
                0.40810811 * gt_height, 0.99189189 * gt_height,
                0.03594771 * gt_width, 0.96405229 * gt_width
            ]).astype(np.int32)
            crop_mask = np.zeros(mask.shape)
            crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1
            mask = np.logical_and(mask, crop_mask)

            pred_depth = pred_depth[mask]
            gt_depth = gt_depth[mask]

            # Median scaling
            ratio = np.median(gt_depth) / np.median(pred_depth)
            ratios.append(ratio)
            pred_depth *= ratio

            pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH
            pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH

            errors.append(compute_errors(gt_depth, pred_depth))

        ratios = np.array(ratios)
        med = np.median(ratios)
        print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(
            med, np.std(ratios / med)))

        mean_errors = np.array(errors).mean(0)

        print("\n  " + ("{:>8} | " * 7).format("abs_rel", "sq_rel", "rmse",
                                               "rmse_log", "a1", "a2", "a3"))
        print(("&{: 8.3f}  " * 7).format(*mean_errors.tolist()) + "\\\\")
        print("\n-> Done!\n")