Пример #1
0
class Dataset3DME(Dataset):
    """
    
    3D Motion Vectors dataset
    

    Args:
        csv_file (string): Path to the csv file with image names and transformations.
        dataset_path (string): Directory with the images.
        input_size (2-tuple): Size of input images
        crop (float): Cropping factor (from edges before ME)
        
    """
    def __init__(self,
                 csv_file,
                 dataset_path,
                 input_size=(1080, 1920),
                 crop=0.2,
                 use_conf=False):

        self.h, self.w = input_size
        self.use_conf = use_conf
        self.pairs = pd.read_csv(csv_file)
        h_cropped = int(self.h - self.h * crop)
        w_cropped = int(self.w - self.w * crop)
        if self.pairs.iloc[0, 0].endswith('.npy') or self.pairs.iloc[
                0, 0].endswith('.npz'):
            self.calculate_me = False
        else:
            self.calculate_me = True
        if self.calculate_me:
            self.img_L_names = self.pairs.iloc[:, 0]
            self.img_R_names = self.pairs.iloc[:, 1]
            self.affine_simple_values = self.pairs.iloc[:, 2:].values.astype(
                'float')
            self.me_handler = MEHandler(h_cropped,
                                        w_cropped,
                                        loss_metric='colorindependent',
                                        runs_to_warm_up=1)
            self.crop = crop
        else:
            self.mv_names = self.pairs.iloc[:, 0]
            self.affine_simple_values = self.pairs.iloc[:, 1:].values.astype(
                'float')
        self.dataset_path = dataset_path
        self.grid = np.stack(np.indices((h_cropped, w_cropped),
                                        dtype=np.float32)[::-1],
                             axis=0)[..., ::4, ::4]

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        if self.calculate_me:
            if self.use_conf:
                raise NotImplementedError(
                    'Confidence calculating in model has not implemented yet')
            # read images
            image_L = io.imread(
                os.path.join(self.dataset_path, self.img_L_names[idx]))
            image_R = io.imread(
                os.path.join(self.dataset_path, self.img_R_names[idx]))

            # Calculate Motion Vectors
            mv_L2R, mv_R2L = self.me_handler.calculate_disparity(
                image_L, image_R)

        else:
            # read .npz file with Motion Vectors
            reader = np.load(
                os.path.join(self.dataset_path, self.mv_names[idx]))
            mv_L2R = reader['l2r']
            mv_R2L = reader['r2l']
            if self.use_conf:
                conf = reader['conf']
                if conf.ndim == 2:
                    conf = conf[None, ...]

        mv_L2R = torch.Tensor(mv_L2R.astype(np.float32))
        mv_R2L = torch.Tensor(mv_R2L.astype(np.float32))
        grid = torch.Tensor(self.grid)
        affine_simple_values = torch.Tensor(
            self.affine_simple_values[idx, :].astype(np.float32))

        sample = {
            'mv_L2R': mv_L2R,
            'mv_R2L': mv_R2L,
            'grid': grid,
            'affine_simple_values': affine_simple_values
        }

        if self.use_conf:
            conf = torch.Tensor(conf.astype(np.float32))
            sample['confidence'] = conf

        return sample
class MEDataset(Dataset):
    """
    
    Motion Vectors dataset between image pair 

    
    Args:
            dataset_csv_path (string): Path to the csv file with image names and transformations.
            dataset_csv_file (string): Filename of the csv file with image names and transformations.
            dataset_image_path (string): Directory with all the images.
            h, w (int): size of input images for ME initialization.
            crop (float): crop factor after image warping.
            
    Returns:
            Dict: {
                    'mv_L2R': Motion Vectors from source (assumed as Left view) to warped (Right view),
                    'mv_R2L': Motion Vectors backward,
                    'theta_GT': desired transformation
                  }
            
    """
    def __init__(self,
                 dataset_csv_path,
                 dataset_csv_file,
                 dataset_image_path,
                 input_height,
                 input_width,
                 crop,
                 use_conf,
                 use_random_patch,
                 normalize_inputs,
                 geometric_model='affine_simple_4',
                 random_sample=True,
                 load_images=False):

        # read csv file
        self.csv = pd.read_csv(os.path.join(dataset_csv_path,
                                            dataset_csv_file))
        self.random_sample = random_sample
        self.use_random_patch = use_random_patch
        self.normalize_inputs = normalize_inputs

        h_cropped = int(input_height - input_height * crop)
        w_cropped = int(input_width - input_width * crop)
        self.grid = np.stack(np.indices((h_cropped, w_cropped),
                                        dtype=np.float32)[::-1],
                             axis=0)[..., ::4, ::4]

        if self.csv.iloc[0, 0].endswith('.npy') or self.csv.iloc[
                0, 0].endswith('.npz'):
            self.image_input = False
            if not self.random_sample:
                self.mv_names = self.csv.iloc[:, 0]
                self.theta = self.csv.iloc[:, 1:].values.astype('float')
            else:
                raise ValueError('Incorrect attempt for using ME Dataset')
        else:
            ### Reading images and calculating ME (+conf) for them
            ### Not used for now.
            warnings.warn("Using ME Dataset with images as input.")
            self.img_L_names = self.csv.iloc[:, 0]
            if not self.random_sample:
                self.img_R_names = self.csv.iloc[:, 1]
                self.theta = self.csv.iloc[:, 2:].values.astype('float')
            self.me_handler = MEHandler(h_cropped,
                                        w_cropped,
                                        loss_metric='colorindependent',
                                        runs_to_warm_up=1)

        # copy args
        self.dataset_path = dataset_image_path
        self.geometric_model = geometric_model
        self.use_conf = use_conf
        self.crop = crop
        self.load_images = load_images

    def __len__(self):
        return len(self.csv)

    def __getitem__(self, idx):
        if not self.image_input:
            # read .npz file with Motion Vectors
            reader = np.load(
                os.path.join(self.dataset_path, self.mv_names[idx]))
            mv_L2R = reader['l2r']
            mv_R2L = reader['r2l']
            if self.use_conf:
                conf_L = reader['conf_l']
                conf_R = reader['conf_r']
                if conf_L.ndim == 2:
                    conf_L = conf_L[None, ...]
                if conf_R.ndim == 2:
                    conf_R = conf_R[None, ...]
            if self.load_images:
                img_R_orig = io.imread(
                    os.path.join(
                        self.dataset_path, 'images', '.'.join([
                            '_'.join(self.mv_names[idx].split('.')[0].split(
                                '_')[:-1] + ['0']), 'png'
                        ])))
                img_R = io.imread(
                    os.path.join(
                        self.dataset_path, 'images',
                        '.'.join([self.mv_names[idx].split('.')[0], 'png'])))
            theta = self.theta[idx, :]
        else:
            if self.use_conf:
                raise NotImplementedError(
                    'Calculating confidence in ME dataset has not implemented yet'
                )
            image_L = io.imread(
                os.path.join(self.dataset_path, self.img_L_names[idx]))
            if self.random_sample:
                # Warp with random transformations
                h, w, _ = image_L.shape
                warper = Warper(h,
                                w,
                                geometric_model=self.geometric_model,
                                crop=self.crop)
                image_L, image_R = warper.warp(image_L, image_L.copy())
                theta = warper.get_theta()
            else:
                image_R = io.imread(
                    os.path.join(self.dataset_path, self.img_R_names[idx]))
                theta = self.theta[idx, :]
            # Calculate Motion Vectors
            mv_L2R, mv_R2L = self.me_handler.calculate_disparity(
                image_L, image_R)

        grid = self.grid

        if self.normalize_inputs:
            _, h, w = mv_L2R.shape
            space_w = np.linspace(-1.0, 1.0, num=w)
            space_h = np.linspace(-1.0, 1.0, num=h)
            grid = np.stack(np.meshgrid(space_w, space_h))

            mv_L2R[0] /= w / 2
            mv_L2R[1] /= h / 2

            mv_R2L[0] /= w / 2
            mv_R2L[1] /= h / 2

        # make arrays float tensor for subsequent processing
        grid_L2R = torch.Tensor((grid + mv_L2R).astype(np.float32))
        grid_R2L = torch.Tensor((grid + mv_R2L).astype(np.float32))
        mv_L2R = torch.Tensor(mv_L2R.astype(np.float32))
        mv_R2L = torch.Tensor(mv_R2L.astype(np.float32))
        grid = torch.Tensor(grid)
        theta = torch.Tensor(theta.astype(np.float32))

        sample = {
            'mv_L2R': mv_L2R,
            'mv_R2L': mv_R2L,
            'grid': grid,
            'grid_L2R': grid_L2R,
            'grid_R2L': grid_R2L,
            'theta_GT': theta,
            'affine_simple_values': theta,
        }

        if self.use_conf:
            conf_L = torch.Tensor(conf_L.astype(np.float32))
            conf_R = torch.Tensor(conf_R.astype(np.float32))
            sample['conf_L'] = conf_L
            sample['conf_R'] = conf_R

        if self.load_images:
            img_R_orig = torch.Tensor(img_R_orig.astype(np.float32) / 255.0)
            img_R = torch.Tensor(img_R.astype(np.float32) / 255.0)
            sample['img_R_orig'] = torch.unsqueeze(img_R_orig, dim=0)
            sample['img_R'] = torch.unsqueeze(img_R, dim=0)

        return sample
Пример #3
0
class SynthDatasetME(Dataset):
    """
    
    Motion Vectors dataset between synthetically generated image pair for training with strong supervision
    
    Args:
            dataset_csv_path (string): Path to the csv file with image names and transformations.
            dataset_csv_file (string): Filename of the csv file with image names and transformations.
            dataset_image_path (string): Directory with all the images.
            h, w (int): size of input images for ME initialization.
            crop (float): crop factor after image warping.
            
    Returns:
            Dict: {
                    'mv_L2R': Motion Vectors from source (assumed as Left view) to warped (Right view),
                    'mv_R2L': Motion Vectors backward,
                    'theta_GT': desired transformation
                  }
            
    """
    def __init__(self,
                 dataset_csv_path,
                 dataset_csv_file,
                 dataset_image_path,
                 h,
                 w,
                 crop,
                 use_conf,
                 geometric_model='affine_simple_4',
                 random_sample=True):

        # read csv file
        self.train_data = pd.read_csv(
            os.path.join(dataset_csv_path, dataset_csv_file))
        self.random_sample = random_sample
        self.use_conf = use_conf
        self.img_names = self.train_data.iloc[:, 0]
        h_cropped = int(h - h * crop)
        w_cropped = int(w - w * crop)
        if self.random_sample == False:
            self.theta_array = self.train_data.iloc[:,
                                                    1:].values.astype('float')
        else:
            self.me_handler = MEHandler(h_cropped,
                                        w_cropped,
                                        loss_metric='colorindependent',
                                        runs_to_warm_up=1)
            self.crop = crop

        self.grid = np.stack(np.indices((h_cropped, w_cropped),
                                        dtype=np.float32)[::-1],
                             axis=0)[..., ::4, ::4]
        # copy arguments
        self.dataset_image_path = dataset_image_path
        self.geometric_model = geometric_model

    def __len__(self):
        return len(self.train_data)

    def __getitem__(self, idx):
        if self.random_sample:
            if self.use_conf:
                raise NotImplementedError(
                    'Calculating confidence in synth dataset has not implemented yet'
                )
            # read image
            img_name = os.path.join(self.dataset_image_path,
                                    self.img_names[idx])
            image_L = io.imread(img_name)

            # Warp with random transformations
            h, w, _ = image_L.shape
            warper = Warper(h,
                            w,
                            geometric_model=self.geometric_model,
                            crop=self.crop)
            image_L, image_R = warper.warp(image_L, image_L.copy())
            theta = warper.get_theta()

            # Calculate Motion Vectors
            mv_L2R, mv_R2L = self.me_handler.calculate_disparity(
                image_L, image_R)

            # permute order to CHW
            # mv_L2R = mv_L2R.transpose(2,0,1)
            # mv_R2L = mv_R2L.transpose(2,0,1)
        else:
            # read .npz file with Motion Vectors
            reader = np.load(
                os.path.join(self.dataset_image_path, self.img_names[idx]))
            mv_L2R = reader['l2r']
            mv_R2L = reader['r2l']
            if self.use_conf:
                conf = reader['conf']
                if conf.ndim == 2:
                    conf = conf[None, ...]
            theta = self.theta_array[idx, :]

        # make arrays float tensor for subsequent processing
        mv_L2R = torch.Tensor(mv_L2R.astype(np.float32))
        mv_R2L = torch.Tensor(mv_R2L.astype(np.float32))
        grid = torch.Tensor(self.grid)
        theta = torch.Tensor(theta.astype(np.float32))

        sample = {
            'mv_L2R': mv_L2R,
            'mv_R2L': mv_R2L,
            'grid': grid,
            'theta_GT': theta
        }

        if self.use_conf:
            conf = torch.Tensor(conf.astype(np.float32))
            sample['confidence'] = conf

        return sample