Пример #1
0
    def __init__(self, mode=None, transform=None):
        '''
            Create a map-style dataset for training or testing with
            optinonal transforms. 
            @parameters: 
                mode: one of the followings:'train', test', or 'valid'.
                transform: a callable transform/composed of transforms
                applied to each sample.
        '''

        super().__init__()

        if mode not in ('train', 'test', 'valid'):
            print('invalid mode {}'.format(mode))
            sys.exit(1)

        self.mode = mode

        self.transform = transform

        self.config = read_config()

        dataset_dir = self.config['DATASET']['dataset_dir']

        dataset_name = self.config['DATASET']['dataset_name']

        filename = dataset_name + '_' + self.mode + '_files.txt'

        files_path = os.path.join(dataset_dir, self.mode, filename)

        gt_dir = self.config['DATASET']['gt_dir']

        gt_dir = os.path.join(dataset_dir, self.mode, gt_dir)

        input_dir = self.config['DATASET']['input_dir']

        input_dir = os.path.join(dataset_dir, self.mode, input_dir)

        self.img_gt_paths = read_img_gt_path(
            files_path,
            input_dir,
            gt_dir,
        )
Пример #2
0
class DORNNET():

    config = utils.read_config()

    def __init__(self):

        self.depth_net = DORN()

        path_model = DORNNET.config['MODEL']['kitti']

        print('{0} model is used!'.format('kitti'))

        model_dict = utils.get_model(path_model)

        # load the trained model's parameters
        self.depth_net.load_state_dict(model_dict)

        # move the network to cuda/gpu device if available
        self.depth_net.to(utils.device)

        # bn and dropout layers will work in evaluation mode
        self.depth_net.eval()

    def __call__(self, filename):

        img = Image.open(filename)

        img_tensor = utils.transform_img(img, self.config)

        if self.depth_net.training:
            raise ValueError('Model is in training mode!')

        # disable autograd engine because of no back-propagatio
        with torch.no_grad():
            pred_labels, ord_probs = self.depth_net(img_tensor)

        return pred_labels, ord_probs
Пример #3
0
class Evaluate():

    config = read_config()

    K = config['INPUT'].getint('sid_bins')

    min_ = config['INPUT'].getfloat('min_depth')
    max_ = config['INPUT'].getfloat('max_depth')

    # depth bins according to SID, see (1) in DORN
    expo = np.arange(0, K + 2) / (K + 1)
    bins = min_ * (max_ / min_)**expo

    def __init__(self):

        self.delta1_sum = 0
        self.delta2_sum = 0
        self.delta3_sum = 0

        self.abs_rel_sum = 0

        self.sq_rel_sum = 0

        self.si_log_0 = 0
        self.si_log_1 = 0

        self.inv_sq_sum = 0

        self.count = 0

    @torch.no_grad()
    def compute(self, predict_depth, target_depth):
        '''
            Compute unnormalized error metrics
        '''
        mask = torch.logical_and(target_depth >= self.min_,
                                 target_depth <= self.max_)

        predict_depth = predict_depth[mask]
        target_depth = target_depth[mask]

        thresh = torch.max(target_depth/predict_depth, \
                           predict_depth/target_depth)

        delta1_sum = (thresh < 1.25).float().sum()
        delta2_sum = (thresh < 1.25**2).float().sum()
        delta3_sum = (thresh < 1.25**3).float().sum()

        self.delta1_sum += delta1_sum
        self.delta2_sum += delta2_sum
        self.delta3_sum += delta3_sum

        bias = predict_depth - target_depth

        abs_rel_sum = (bias.abs() / target_depth).sum()

        self.abs_rel_sum += abs_rel_sum

        sq_rel_sum = (bias**2 / target_depth).sum()

        self.sq_rel_sum += sq_rel_sum

        inv_diff = 1 / predict_depth - 1 / target_depth

        self.inv_sq_sum += (inv_diff**2).sum()

        log_err = torch.log(predict_depth / target_depth)

        self.si_log_0 += (log_err**2).sum()

        self.si_log_1 += log_err.sum()

        # number of pixels with gt depths
        self.count += mask.int().sum()

    def get(self):
        '''
            return metrics by name and values.
        '''

        return {
            'delta1': self.delta1.item(),
            'delta2': self.delta2.item(),
            'delta3': self.delta3.item(),
            'abs_rel': self.abs_rel.item(),
            'sq_rel': self.sq_rel.item(),
            'si_log': self.si_log.item(),
            'irmse': self.irmse.item()
        }

    @torch.no_grad()
    def results(self):
        '''
            compute average (final) results
        '''

        self.delta1 = self.delta1_sum / self.count

        self.delta2 = self.delta2_sum / self.count

        self.delta3 = self.delta3_sum / self.count

        self.abs_rel = self.abs_rel_sum / self.count

        self.sq_rel = self.sq_rel_sum / self.count

        self.irmse = torch.sqrt(self.inv_sq_sum / self.count)

        self.si_log = self.si_log_0/self.count - \
                      (self.si_log_1/self.count)**2

    def __str__(self):

        s = 'Error Metrics:\n'

        s += 'delta1={0:0.3f}\ndelta2={1:0.3f}\ndelta3={2:0.3f}\n'.format(
            self.delta1.item(), self.delta2.item(), self.delta3.item())

        s += 'absolute relative error={0:0.3f}\n'.format(self.abs_rel.item())

        s += 'squared relative error={0:0.3f}\n'.format(self.sq_rel.item())

        s += 'inverse root mean square error={0:0.3f}\n'.format(
            self.irmse.item())

        s += 'scale invariant logaritmic error={0:0.3f}\n'.format(
            self.si_log.item())

        return s
Пример #4
0
import torch
import torch.nn as nn
import torch.nn.functional as F
from collections import OrderedDict

from dorn.model import utils
from dorn.model.network.backbone import resnet101

config = utils.read_config()


class FullImageEncoder(nn.Module):
    '''
        captures global texture information as monocular depth cue.
    '''
    def __init__(self):

        super().__init__()

        drop_prob = config['TRAIN'].getfloat('dropout_prob')

        self.global_pooling = nn.AvgPool2d(16, 16, ceil_mode=True)

        self.global_dropout = nn.Dropout2d(p=drop_prob)

        self.global_fc = nn.Linear(2048 * 4 * 5, 512)

        self.conv_depth = nn.Conv2d(512, 512, 1)

        self.interp = nn.UpsamplingBilinear2d((49, 65))
Пример #5
0
import random
import numpy as np
import torch
from PIL import Image
from itertools import product
import torchvision.transforms as transforms
import torchvision.transforms.functional as F
from dorn.model.utils import read_config

# global config parameters
config = read_config()

# initialize random number generator
random.seed()


class Scale():
    def __init__(self):
        '''
            Scale PIL image and npy depth map with the random scale
            factor uniformly sampled in the interval of [1.0, 1.2]
        '''

        # random scale factor
        self.scale = random.uniform(1.0, 1.2)

    def __call__(self, img_depth):

        img = img_depth['img']

        # new_size = (new_w, new_h)
Пример #6
0
Файл: main.py Проект: I3aer/DORN
def main():
    '''
         Train the DORN and after each epoch runs a full validation. The function
         also keeps track of the best performing model (in terms of X performance
         metric), and at the end of training it saves and return that model.
    '''

    config = utils.read_config()

    train_loader, valid_loader = create_loader(config)

    resume_training = config['TRAIN'].getboolean('resume')

    ckpt_path = config['TRAIN']['checkpoint_path']

    # generate model
    depth_net = DORN()

    if resume_training:
        # resume training by loading the saved checkpoint

        if os.path.isfile(ckpt_path):
            print("loading checkpoint {0}".format(ckpt_path))

            checkpoint = torch.load(ckpt_path)

            start_epoch = checkpoint['epoch'] + 1
            # previously saved model state and optimizer state
            optimizer = checkpoint['optimizer']
            model_dict = checkpoint['state_dict']

            # set model and optimizer states to those in checkpoint
            depth_net.load_state_dict(model_dict)
            optimizer.load_state_dict(optimizer)

    else:

        path_model = config['MODEL']['kitti']

        model_dict = utils.get_model(path_model)

        dorn_dict = depth_net.state_dict()

        # overwrite existing parameter values
        dorn_dict.update(model_dict)

        # load the pretrained model
        depth_net.load_state_dict(dorn_dict)

        start_epoch = 0

    # transfer network onto the GPU if available
    # always do this before constructing optimizer
    depth_net = depth_net.to(utils.device)

    # log the network graph
    tb = utils.graph_visualize(depth_net, config)

    # ordinal loss function
    criterion = OrdLoss(config)
    '''
        sets requires_grad attribute of the all params in the
        model to False when we are feature extracting. This is
        because that we only want to compute gradients for the
        the last layer(s) of the DORN, i.e., the 2nd conv layer
        of the scene understanding module. 
    '''
    # fix all network parameters
    for param in depth_net.parameters():
        param.requires_grad = False

    # trainable parameters
    train_params = []

    # scu block where layer(s) is/are fine-tuned
    my_block = depth_net.scu_module.concat_process

    # make the 2nd conv layer of the scu module trainable
    for key, layer in my_block.__dict__['_modules'].items():
        if (key in {'conv_1', 'conv_2', 'conv_comp'}):
            for param in layer.parameters():
                train_params.append(param)
                param.requires_grad = True

    lr = config['OPTIMIZER'].getfloat('learning_rate')
    wd = config['OPTIMIZER'].getfloat('weight_decay')
    omega = config['OPTIMIZER'].getfloat('momentum')

    # dict of parameters block and their lr
    train_params = [{'params': train_params, 'lr': lr}]

    # construct the optimizer:only conv2 in scu module is optimized
    optimizer = \
    optim.SGD(train_params, lr=lr, momentum=omega, weight_decay=wd, nesterov=True)

    # decay the lr by gamma once epoch no reaches one of the milestones
    scheduler = \
    optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5,20], gamma=0.1)

    num_epochs = config['TRAIN'].getint('num_epochs')

    valid_rate = config['TRAIN'].getint('valid_rate')

    best_acc = 0.0

    for epoch in range(start_epoch, num_epochs):

        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        train(depth_net, train_loader, criterion, optimizer, epoch, config, tb)

        # find the best model by running validation
        if (epoch + 1) % valid_rate == 0:
            metrics = validate(depth_net, valid_loader, criterion, epoch,
                               config, tb)

            epoch_acc = metrics['delta1']

            # deep copy the best model
            if epoch_acc > best_acc:

                best_acc = epoch_acc

                best_model_wts = depth_net.state_dict()

                torch.save(best_model_wts, ckpt_path)

        # decay learning rate of params after each epoch
        scheduler.step()

    # write all pending events
    tb.flush()

    tb.close()