def cityscapes_train(resize_height, resize_width, crop_height, crop_width, batch_size, num_workers): """A loader that loads images and ground truth for segmentation from the cityscapes training set. """ labels = labels_cityscape_seg.getlabels() num_classes = len(labels_cityscape_seg.gettrainid2label()) transforms = [ tf.RandomHorizontalFlip(), tf.CreateScaledImage(), tf.Resize((resize_height, resize_width)), tf.RandomRescale(1.5), tf.RandomCrop((crop_height, crop_width)), tf.ConvertSegmentation(), tf.CreateColoraug(new_element=True), tf.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, gamma=0.0), tf.RemoveOriginals(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'cityscapes_train_seg'), tf.AddKeyValue('purposes', ('segmentation', 'domain')), tf.AddKeyValue('num_classes', num_classes) ] dataset_name = 'cityscapes' dataset = StandardDataset(dataset=dataset_name, trainvaltest_split='train', video_mode='mono', stereo_mode='mono', labels_mode='fromid', disable_const_items=True, labels=labels, keys_to_load=('color', 'segmentation'), data_transforms=transforms, video_frames=(0, )) loader = DataLoader(dataset, batch_size, True, num_workers=num_workers, pin_memory=True, drop_last=True) print( f" - Can use {len(dataset)} images from the cityscapes train set for segmentation training", flush=True) return loader
def cityscapes_validation(resize_height, resize_width, batch_size, num_workers): """A loader that loads images and ground truth for segmentation from the cityscapes validation set """ labels = labels_cityscape_seg.getlabels() num_classes = len(labels_cityscape_seg.gettrainid2label()) transforms = [ tf.CreateScaledImage(True), tf.Resize((resize_height, resize_width), image_types=('color', )), tf.ConvertSegmentation(), tf.CreateColoraug(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'cityscapes_val_seg'), tf.AddKeyValue('purposes', ('segmentation', )), tf.AddKeyValue('num_classes', num_classes) ] dataset = StandardDataset(dataset='cityscapes', trainvaltest_split='validation', video_mode='mono', stereo_mode='mono', labels_mode='fromid', labels=labels, keys_to_load=['color', 'segmentation'], data_transforms=transforms, disable_const_items=True) loader = DataLoader(dataset, batch_size, False, num_workers=num_workers, pin_memory=True, drop_last=False) print( f" - Can use {len(dataset)} images from the cityscapes validation set for segmentation validation", flush=True) return loader
def check_dataset(dataset_name, split=None, trainvaltest_split='train', keys_to_load=None, folders_to_load=None): """ Loads a dataset and prints name and shape of the first NUM_SAMPLES entries. Performs no transforms other than the necessary ones. :param dataset_name: Name of the dataset :param split: Name of the dataset split, if one exists :param trainvaltest_split: 'train', 'validation' or 'test' :param keys_to_load: keys that are supposed to be loaded, e.g. 'color', 'depth', 'segmentation', ... """ dataset = dataset_name data_transforms = [ mytransforms.CreateScaledImage(), mytransforms.CreateColoraug(), mytransforms.ToTensor(), ] if keys_to_load is not None: if any('depth' in key for key in keys_to_load): data_transforms.insert(0, mytransforms.ConvertDepth()) if any('segmentation' in key for key in keys_to_load): data_transforms.insert(0, mytransforms.ConvertSegmentation()) if any('flow' in key for key in keys_to_load): data_transforms.insert(0, mytransforms.ConvertFlow()) print('\n Loading {} dataset'.format(dataset)) my_dataset = StandardDataset(dataset, split=split, trainvaltest_split=trainvaltest_split, keys_to_load=keys_to_load, data_transforms=data_transforms, folders_to_load=folders_to_load) my_loader = DataLoader(my_dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, drop_last=True) print('Number of elements: {}'.format(len(my_dataset))) print_dataset(my_loader)
def kitti_odom09_validation(img_height, img_width, batch_size, num_workers): """A loader that loads images and depth ground truth for depth validation from the kitti validation set. """ transforms = [ tf.CreateScaledImage(True), tf.Resize( (img_height, img_width), image_types=('color', ) ), tf.CreateColoraug(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'kitti_odom09_val_pose'), tf.AddKeyValue('purposes', ('depth', )), ] dataset = StandardDataset( dataset='kitti', split='odom09_split', trainvaltest_split='test', video_mode='video', stereo_mode='mono', keys_to_load=('color', 'poses'), keys_to_video=('color', ), data_transforms=transforms, video_frames=(0, -1, 1), disable_const_items=True ) loader = DataLoader( dataset, batch_size, False, num_workers=num_workers, pin_memory=True, drop_last=False ) print(f" - Can use {len(dataset)} images from the kitti (odom09 split) validation set for pose validation", flush=True) return loader
def motsynth_validation(img_height, img_width, batch_size, num_workers): """A loader that loads images and depth ground truth for depth validation from the kitti validation set. """ transforms = [ tf.CreateScaledImage(True), tf.Resize((img_height, img_width), image_types=('color', )), tf.ConvertDepth(), tf.CreateColoraug(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'kitti_zhou_val_depth'), tf.AddKeyValue('validation_mask', 'validation_mask_kitti_zhou'), tf.AddKeyValue('validation_clamp', 'validation_clamp_kitti'), tf.AddKeyValue('purposes', ('depth', )), ] dataset = StandardDataset(dataset='kek', trainvaltest_split='validation', video_mode='mono', stereo_mode='mono', keys_to_load=('color', 'depth'), data_transforms=transforms, video_frames=(0, ), simple_mode=True, labels_mode='fromid', seq_to_load=['001']) loader = DataLoader(dataset, batch_size, False, num_workers=num_workers, pin_memory=True, drop_last=False) print( f" - Can use {len(dataset)} images from the motsynth validation set for depth validation", flush=True) return loader
def kitti_2015_train(img_height, img_width, batch_size, num_workers): """A loader that loads images and depth ground truth for depth evaluation from the kitti_2015 training set (but for evaluation). """ transforms = [ tf.CreateScaledImage(True), tf.Resize((img_height, img_width), image_types=('color', )), tf.ConvertDepth(), tf.CreateColoraug(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'kitti_2015_train_depth'), tf.AddKeyValue('validation_mask', 'validation_mask_kitti_kitti'), tf.AddKeyValue('validation_clamp', 'validation_clamp_kitti'), tf.AddKeyValue('purposes', ('depth', )), ] dataset = StandardDataset(dataset='kitti_2015', trainvaltest_split='train', video_mode='mono', stereo_mode='mono', keys_to_load=('color', 'depth'), data_transforms=transforms, video_frames=(0, ), disable_const_items=True) loader = DataLoader(dataset, batch_size, False, num_workers=num_workers, pin_memory=True, drop_last=False) print( f" - Can use {len(dataset)} images from the kitti_2015 test set for depth evaluation", flush=True) return loader
# validation.json and test.json files. In this example, we use all available keys for cityscapes. keys_to_load = [ 'color', 'color_right', 'depth', 'segmentation', 'camera_intrinsics', 'camera_intrinsics_right', 'velocity' ] # When loading an image, some data transforms are performed on it. These transform will alter all # image category in the same way. At minimum, the CreateScaledImage() and CreateColoraug() have to # be included. For each image category like depth and segmentation, the corresponding Convert-tranform # ist also necessary. data_transforms = [ mytransforms.CreateScaledImage(), mytransforms.RemoveOriginals(), mytransforms.RandomCrop((1024, 2048)), mytransforms.RandomHorizontalFlip(), mytransforms.CreateColoraug(new_element=True), mytransforms.ColorJitter(brightness=0.2, contrast=0.5, saturation=0.5, hue=0.5, gamma=0.5, fraction=1.0), # values for visualization only mytransforms.ConvertDepth( ), # The convert transforms should come after the mytransforms.ConvertSegmentation(), # Scaling/Rotating/Cropping mytransforms.ToTensor(), ] # With the parameters specified above, a StandardDataset can now be created. You can interate through # it using the PyTorch DataLoader class. # There are several optional arguments in the my_dataset class that are not featured here for the sake of
def __init__(self, options, model=None): if __name__ == "__main__": print(" -> Executing script", os.path.basename(__file__)) self.opt = options self.device = torch.device("cpu" if self.opt.no_cuda else "cuda") # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # LABELS # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ assert self.opt.train_set in {1, 2, 3, 12, 123}, "Invalid train_set!" assert self.opt.task_to_val in {0, 1, 2, 3, 12, 123}, "Invalid task!" keys_to_load = ['color', 'segmentation'] # Labels labels = self._get_labels_cityscapes() # Train IDs self.train_ids = set([labels[i].trainId for i in range(len(labels))]) self.train_ids.remove(255) self.train_ids = sorted(list(self.train_ids)) self.num_classes_model = len(self.train_ids) # Task handling if self.opt.task_to_val != 0: labels_task = self._get_task_labels_cityscapes() train_ids_task = set( [labels_task[i].trainId for i in range(len(labels_task))]) train_ids_task.remove(255) self.task_low = min(train_ids_task) self.task_high = max(train_ids_task) + 1 labels = labels_task self.train_ids = sorted(list(train_ids_task)) else: self.task_low = 0 self.task_high = self.num_classes_model self.opt.task_to_val = self.opt.train_set # Number of classes for the SegmentationRunningScore self.num_classes_score = self.task_high - self.task_low # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # DATASET DEFINITIONS # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Data augmentation test_data_transforms = [ mytransforms.CreateScaledImage(), mytransforms.Resize((self.opt.height, self.opt.width), image_types=['color']), mytransforms.ConvertSegmentation(), mytransforms.CreateColoraug(new_element=True, scales=self.opt.scales), mytransforms.RemoveOriginals(), mytransforms.ToTensor(), mytransforms.NormalizeZeroMean(), ] # If hyperparameter search, only load the respective validation set. Else, load the full validation set. if self.opt.hyperparameter: trainvaltest_split = 'train' folders_to_load = CitySet.get_city_set(-1) else: trainvaltest_split = 'validation' folders_to_load = None test_dataset = CityscapesDataset(dataset='cityscapes', split=self.opt.dataset_split, trainvaltest_split=trainvaltest_split, video_mode='mono', stereo_mode='mono', scales=self.opt.scales, labels_mode='fromid', labels=labels, keys_to_load=keys_to_load, data_transforms=test_data_transforms, video_frames=self.opt.video_frames, folders_to_load=folders_to_load) self.test_loader = DataLoader(dataset=test_dataset, batch_size=self.opt.batch_size, shuffle=False, num_workers=self.opt.num_workers, pin_memory=True, drop_last=False) print( "++++++++++++++++++++++ INIT VALIDATION ++++++++++++++++++++++++") print("Using dataset\n ", self.opt.dataset, "with split", self.opt.dataset_split) print("There are {:d} validation items\n ".format(len(test_dataset))) print("Validating classes up to train set\n ", self.opt.train_set) # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # LOGGING OPTIONS # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # If no model is passed, standalone validation is to be carried out. The log_path needs to be set before # self.load_model() is invoked. if model is None: self.opt.validate = False self.opt.model_name = self.opt.load_model_name path_getter = GetPath() log_path = path_getter.get_checkpoint_path() self.log_path = os.path.join(log_path, 'erfnet', self.opt.model_name) # All outputs will be saved to save_path self.save_path = self.log_path # Create output path for standalone validation if not self.opt.validate: save_dir = 'eval_{}'.format(self.opt.dataset) if self.opt.hyperparameter: save_dir = save_dir + '_hyper' save_dir = save_dir + '_task_to_val{}'.format(self.opt.task_to_val) self.save_path = os.path.join(self.log_path, save_dir) if not os.path.exists(self.save_path): os.makedirs(self.save_path) # Copy this file to save_path shutil.copy2(__file__, self.save_path) # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # MODEL DEFINITION # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Standalone validation if not self.opt.validate: # Create a conventional ERFNet self.model = ERFNet(self.num_classes_model, self.opt) self.load_model() self.model.to(self.device) # Validate while training else: self.model = model self.model.eval() # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # LOGGING OPTIONS II # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # self.called is used to decide which file mode shall be used when writing metrics to disk. self.called = False self.metric_model = SegmentationRunningScore(self.num_classes_score) # Metrics are only saved if val_frequency > 0! if self.opt.val_frequency != 0: print("Saving metrics to\n ", self.save_path) # Set up colour output. Coloured images are only output if standalone validation is carried out! if not self.opt.validate and self.opt.save_pred_to_disk: # Output path self.img_path = os.path.join( self.save_path, 'output_{}'.format(self.opt.weights_epoch)) if self.opt.pred_wout_blend: self.img_path += '_wout_blend' if not os.path.exists(self.img_path): os.makedirs(self.img_path) print("Saving prediction images to\n ", self.img_path) print("Save frequency\n ", self.opt.pred_frequency) # Get the colours from dataset. colors = [ (label.trainId - self.task_low, label.color) for label in labels if label.trainId != 255 and label.trainId in self.train_ids ] colors.append((255, (0, 0, 0))) # void class self.id_color = dict(colors) self.id_color_keys = [key for key in self.id_color.keys()] self.id_color_vals = [val for val in self.id_color.values()] # Ongoing index to name the outputs self.img_idx = 0 # Set up probability output. Probabilities are only output if standalone validation is carried out! if not self.opt.validate and self.opt.save_probs_to_disk: # Output path self.logit_path = os.path.join( self.save_path, 'probabilities_{}'.format(self.opt.weights_epoch)) if not os.path.exists(self.logit_path): os.makedirs(self.logit_path) print("Saving probabilities to\n ", self.logit_path) print("Save frequency\n ", self.opt.probs_frequency) # Ongoing index to name the probability outputs self.probs_idx = 0 print( "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") # Save all options to disk and print them to stdout self._print_options() self._save_opts(len(test_dataset))
def cityscapes_sequence_train(resize_height, resize_width, crop_height, crop_width, batch_size, num_workers): """A loader that loads images for adaptation from the cityscapes_sequence training set. This loader returns sequences from the left camera, as well as from the right camera. """ transforms_common = [ tf.RandomHorizontalFlip(), tf.CreateScaledImage(), tf.Resize((resize_height * 568 // 512, resize_width * 1092 // 1024), image_types=('color', )), # crop away the sides and bottom parts of the image tf.SidesCrop((resize_height * 320 // 512, resize_width * 1024 // 1024), (resize_height * 32 // 512, resize_width * 33 // 1024)), tf.CreateColoraug(new_element=True), tf.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, gamma=0.0), tf.RemoveOriginals(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'cityscapes_sequence_adaptation'), tf.AddKeyValue('purposes', ('adaptation', )), ] dataset_name = 'cityscapes_sequence' cfg_common = { 'dataset': dataset_name, 'trainvaltest_split': 'train', 'video_mode': 'mono', 'stereo_mode': 'mono', } cfg_left = {'keys_to_load': ('color', ), 'keys_to_video': ('color', )} cfg_right = { 'keys_to_load': ('color_right', ), 'keys_to_video': ('color_right', ) } dataset_left = StandardDataset(data_transforms=transforms_common, **cfg_left, **cfg_common) dataset_right = StandardDataset(data_transforms=[tf.ExchangeStereo()] + transforms_common, **cfg_right, **cfg_common) dataset = ConcatDataset((dataset_left, dataset_right)) loader = DataLoader(dataset, batch_size, True, num_workers=num_workers, pin_memory=True, drop_last=True) print( f" - Can use {len(dataset)} images from the cityscapes_sequence train set for adaptation", flush=True) return loader
def __init__(self, options): print(" -> Executing script", os.path.basename(__file__)) self.opt = options self.device = torch.device("cpu" if self.opt.no_cuda else "cuda") # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # LABELS AND CITIES # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ assert self.opt.train_set in {123, 1}, "Invalid train_set!" keys_to_load = ['color', 'segmentation'] # Labels if self.opt.train_set == 1: labels = labels_cityscape_seg_train1.getlabels() else: labels = labels_cityscape_seg_train3_eval.getlabels() # Train IDs self.train_ids = set([labels[i].trainId for i in range(len(labels))]) self.train_ids.remove(255) self.num_classes = len(self.train_ids) # Apply city filter folders_to_train = CitySet.get_city_set(0) if self.opt.city: folders_to_train = CitySet.get_city_set(self.opt.train_set) # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # DATASET DEFINITIONS # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Data augmentation train_data_transforms = [ mytransforms.RandomHorizontalFlip(), mytransforms.CreateScaledImage(), mytransforms.Resize((self.opt.height, self.opt.width), image_types=keys_to_load), mytransforms.RandomRescale(1.5), mytransforms.RandomCrop( (self.opt.crop_height, self.opt.crop_width)), mytransforms.ConvertSegmentation(), mytransforms.CreateColoraug(new_element=True, scales=self.opt.scales), mytransforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, gamma=0.0), mytransforms.RemoveOriginals(), mytransforms.ToTensor(), mytransforms.NormalizeZeroMean(), ] train_dataset = CityscapesDataset( dataset="cityscapes", trainvaltest_split='train', video_mode='mono', stereo_mode='mono', scales=self.opt.scales, labels_mode='fromid', labels=labels, keys_to_load=keys_to_load, data_transforms=train_data_transforms, video_frames=self.opt.video_frames, folders_to_load=folders_to_train, ) self.train_loader = DataLoader(dataset=train_dataset, batch_size=self.opt.batch_size, shuffle=True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) val_data_transforms = [ mytransforms.CreateScaledImage(), mytransforms.Resize((self.opt.height, self.opt.width), image_types=keys_to_load), mytransforms.ConvertSegmentation(), mytransforms.CreateColoraug(new_element=True, scales=self.opt.scales), mytransforms.RemoveOriginals(), mytransforms.ToTensor(), mytransforms.NormalizeZeroMean(), ] val_dataset = CityscapesDataset( dataset=self.opt.dataset, trainvaltest_split="train", video_mode='mono', stereo_mode='mono', scales=self.opt.scales, labels_mode='fromid', labels=labels, keys_to_load=keys_to_load, data_transforms=val_data_transforms, video_frames=self.opt.video_frames, folders_to_load=CitySet.get_city_set(-1)) self.val_loader = DataLoader(dataset=val_dataset, batch_size=self.opt.batch_size, shuffle=False, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) self.val_iter = iter(self.val_loader) # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # LOGGING OPTIONS # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ print( "++++++++++++++++++++++ INIT TRAINING ++++++++++++++++++++++++++") print("Using dataset:\n ", self.opt.dataset, "with split", self.opt.dataset_split) print( "There are {:d} training items and {:d} validation items\n".format( len(train_dataset), len(val_dataset))) path_getter = GetPath() log_path = path_getter.get_checkpoint_path() self.log_path = os.path.join(log_path, 'erfnet', self.opt.model_name) self.writers = {} for mode in ["train", "validation"]: self.writers[mode] = SummaryWriter( os.path.join(self.log_path, mode)) # Copy this file to log dir shutil.copy2(__file__, self.log_path) print("Training model named:\n ", self.opt.model_name) print("Models and tensorboard events files are saved to:\n ", self.log_path) print("Training is using:\n ", self.device) print("Training takes place on train set:\n ", self.opt.train_set) print( "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # MODEL DEFINITION # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Instantiate model self.model = ERFNet(self.num_classes, self.opt) self.model.to(self.device) self.parameters_to_train = self.model.parameters() # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # OPTIMIZER SET-UP # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ self.model_optimizer = optim.Adam(params=self.parameters_to_train, lr=self.opt.learning_rate, weight_decay=self.opt.weight_decay) lambda1 = lambda epoch: pow((1 - ((epoch - 1) / self.opt.num_epochs)), 0.9) self.model_lr_scheduler = optim.lr_scheduler.LambdaLR( self.model_optimizer, lr_lambda=lambda1) # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # LOSSES # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ self.crossentropy = CrossEntropyLoss(ignore_background=True, device=self.device) self.crossentropy.to(self.device) self.metric_model = SegmentationRunningScore(self.num_classes) # Save all options to disk and print them to stdout self.save_opts(len(train_dataset), len(val_dataset)) self._print_options() # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # EVALUATOR DEFINITION # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.opt.validate: self.evaluator = Evaluator(self.opt, self.model)
def __init__(self, dataset, trainvaltest_split, video_mode='mono', stereo_mode='mono', cluster_mode=None, simple_mode=False, labels=None, labels_mode=None, data_transforms=None, scales=None, keys_to_load=None, keys_to_video=None, keys_to_stereo=None, split=None, video_frames=None, disable_const_items=True, folders_to_load=None, files_to_load=None, n_files=None, output_filenames=False, flow_validation_mode=True): """Initialises the dataset by loading the desired data from the json file :param dataset: name of the dataset :param trainvaltest_split: can be train, validation or test :param video_mode: can be mono or video and defines if only the images or image sequences are to be loaded :param stereo_mode: can be mono or stereo and defines if the stereo images are to be loaded :param simple_mode: if True, the Data is read directly from a folder without using a .json file :param labels: gives the labels as defined in the named tuples style in Cityscapes. Get the labels from defintions folder :param labels_mode: can be fromid or fromrgb and defines if the segmentation masks are given as id or color :param data_transforms: takes the transforms.compose list :param scales: list of all scales at which the images should be loaded (list of exponents for powers of 2) :param keys_to_load: defines all keys which should be loaded :param keys_to_video: defines for which keys the sequences are to be loaded :param keys_to_stereo: defines for which keys the stereo images are supposed to be loaded :param split: dataset split that is supposed to be loaded. default is the complete dataset itself :param video_frames: all frames of the sequence that are supposed to be loaded (list of frame numbers relative to the main frame, e.g. [0, -2, -1, 1, 2]) :param disable_const_items: removes the constant items like camera calibration from loading procedure :param folders_to_load: list of folders from which data should be loaded; folders not mentioned are skipped in the respective set. Only the last folder in a path is considered; filter is case insensitive. Default: None -> all folders are loaded from dataset :param files_to_load: list of files that should be loaded; files not mentioned are skipped in the respective set. File names need not be complete; filter is case insensitive. Default: None -> all files are loaded from dataset :param n_files: How many files shall be loaded. Files are selected randomly if there are more files than n_files Seeded by numpy.random.seed() """ super(BaseDataset, self).__init__() assert isinstance(dataset, str) assert trainvaltest_split in ( 'train', 'validation', 'test'), '''trainvaltest_split must be train, validation or test''' assert video_mode in ('mono', 'video'), 'video_mode must be mono or video' assert stereo_mode in ('mono', 'stereo'), 'stereo_mode must be mono or stereo' assert isinstance(simple_mode, bool) if data_transforms is None: data_transforms = [ mytransforms.CreateScaledImage(), mytransforms.CreateColoraug(), mytransforms.ToTensor() ] if scales is None: scales = [0] if keys_to_load is None: keys_to_load = ['color'] if keys_to_stereo is None and stereo_mode == 'stereo': keys_to_stereo = ['color'] if keys_to_video is None and video_mode == 'video': keys_to_video = ['color'] if video_frames is None: video_frames = [0, -1, 1] self.dataset = dataset self.video_mode = video_mode self.stereo_mode = stereo_mode self.scales = scales self.disable_const_items = disable_const_items self.output_filenames = output_filenames self.parameters = dps.DatasetParameterset(dataset) if labels is not None: self.parameters.labels = labels if labels_mode is not None: self.parameters.labels_mode = labels_mode path_getter = gp.GetPath() dataset_folder = path_getter.get_data_path() datasetpath = os.path.join(dataset_folder, self.dataset) self.datasetpath = datasetpath if split is None: splitpath = None else: splitpath = os.path.join(dataset_folder, self.dataset + '_' + split) if simple_mode is False: self.data = self.read_json_file(datasetpath, splitpath, trainvaltest_split, keys_to_load, keys_to_stereo, keys_to_video, video_frames, folders_to_load, files_to_load, n_files) else: self.data = self.read_from_folder(datasetpath, keys_to_load, video_mode, video_frames) self.load_transforms = transforms.Compose([ mytransforms.LoadRGB(), mytransforms.LoadSegmentation(), mytransforms.LoadDepth(), mytransforms.LoadFlow(validation_mode=flow_validation_mode), mytransforms.LoadNumerics() ]) # IMPORTANT to create a new list if the same list is passed to multiple datasets. Otherwise, due to the # mutability of lists, ConvertSegmentation will only be added once. Hence, the labels may be wrong for the 2nd, # 3rd, ... dataset! self.data_transforms = list(data_transforms) # Error if CreateColorAug and CreateScaledImage not in transforms. if mytransforms.CreateScaledImage not in data_transforms: raise Exception( 'The transform CreateScaledImage() has to be part of the data_transforms list' ) if mytransforms.CreateColoraug not in data_transforms: raise Exception( 'The transform CreateColoraug() has to be part of the data_transforms list' ) # Error if depth, segmentation or flow keys are given but not the corresponding Convert-Transform if any([key.startswith('segmentation') for key in keys_to_load]) and \ mytransforms.ConvertSegmentation not in self.data_transforms: raise Exception( 'When loading segmentation images, please add mytransforms.ConvertSegmentation() to ' 'the data_transforms') if any([key.startswith('depth') for key in keys_to_load]) and \ mytransforms.ConvertDepth not in self.data_transforms: raise Exception( 'When loading depth images, please add mytransforms.ConvertDepth() to the data_transforms' ) if any([key.startswith('flow') for key in keys_to_load]) and \ mytransforms.ConvertFlow not in self.data_transforms: raise Exception( 'When loading flow images, please add mytransforms.ConvertFlow() to the data_transforms' ) # In the flow validation mode, it is not allowed to use data-altering transforms if any([key.startswith('flow') for key in keys_to_load]) and flow_validation_mode: allowed_transforms = [ mytransforms.CreateScaledImage, mytransforms.CreateColoraug, mytransforms.ConvertSegmentation, mytransforms.ConvertDepth, mytransforms.ConvertFlow, mytransforms.RemoveOriginals, mytransforms.ToTensor, mytransforms.Relabel, mytransforms.OneHotEncoding, mytransforms.NormalizeZeroMean, mytransforms.AdjustKeys, mytransforms.RemapKeys, mytransforms.AddKeyValue ] for transform in self.data_transforms: if transform not in allowed_transforms: raise Exception( 'In flow validation mode, it is not allowed to use data-altering transforms' ) # Set the correct parameters to the ConvertDepth and ConvertSegmentation transforms for i, transform in zip(range(len(self.data_transforms)), self.data_transforms): if isinstance(transform, mytransforms.ConvertDepth): transform.set_mode(self.parameters.depth_mode) elif isinstance(transform, mytransforms.ConvertSegmentation): transform.set_mode(self.parameters.labels, self.parameters.labels_mode) elif isinstance(transform, mytransforms.ConvertFlow): transform.set_mode(self.parameters.flow_mode, flow_validation_mode) self.data_transforms = transforms.Compose(self.data_transforms)
def kitti_zhou_train(resize_height, resize_width, crop_height, crop_width, batch_size, num_workers): """A loader that loads image sequences for depth training from the kitti training set. This loader returns sequences from the left camera, as well as from the right camera. """ transforms_common = [ tf.RandomHorizontalFlip(), tf.CreateScaledImage(), tf.Resize((resize_height, resize_width), image_types=('color', 'depth', 'camera_intrinsics', 'K')), tf.ConvertDepth(), tf.CreateColoraug(new_element=True), tf.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, gamma=0.0, fraction=0.5), tf.RemoveOriginals(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'kitti_zhou_train_depth'), tf.AddKeyValue('purposes', ('depth', 'domain')), ] dataset_name = 'kitti' cfg_common = { 'dataset': dataset_name, 'trainvaltest_split': 'train', 'video_mode': 'video', 'stereo_mode': 'mono', 'split': 'zhou_split', 'video_frames': (0, -1, 1), 'disable_const_items': False } cfg_left = {'keys_to_load': ('color', ), 'keys_to_video': ('color', )} cfg_right = { 'keys_to_load': ('color_right', ), 'keys_to_video': ('color_right', ) } dataset_left = StandardDataset(data_transforms=transforms_common, **cfg_left, **cfg_common) dataset_right = StandardDataset(data_transforms=[tf.ExchangeStereo()] + transforms_common, **cfg_right, **cfg_common) dataset = ConcatDataset((dataset_left, dataset_right)) loader = DataLoader(dataset, batch_size, True, num_workers=num_workers, pin_memory=True, drop_last=True) print( f" - Can use {len(dataset)} images from the kitti (zhou_split) train split for depth training", flush=True) return loader
def check_scaled_dataset(dataset_name, scaled_dataset_name, trainvaltest_split, keys_to_load, scaled_size, split=None): """ Checks whether the images in a dataset generated by the dataset_scaler are identical to the images that are generated by loading the original dataset and scaling them afterwards :param dataset_name: Name of the unscaled dataset :param scaled_dataset_name: Name of the scaled dataset :param trainvaltest_split: 'train', 'validation' or 'test' :param keys_to_load: keys that are supposed to be loaded, e.g. 'color', 'depth', 'segmentation', ... :param scaled_size: Size of the scaled image (h, w) :param split: Name of the dataset split, if one exists """ dataset = dataset_name data_transforms = [ mytransforms.CreateScaledImage(), mytransforms.Resize(output_size=scaled_size), mytransforms.CreateColoraug(), mytransforms.ToTensor(), ] if keys_to_load is not None: if any('depth' in key for key in keys_to_load): data_transforms.insert(0, mytransforms.ConvertDepth()) if any('segmentation' in key for key in keys_to_load): data_transforms.insert(0, mytransforms.ConvertSegmentation()) print('\n Loading {} dataset'.format(dataset)) my_dataset = StandardDataset(dataset, split=split, trainvaltest_split=trainvaltest_split, keys_to_load=keys_to_load, data_transforms=data_transforms, output_filenames=True) my_loader = DataLoader(my_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True, drop_last=True) print_dataset(my_loader) dataset_s = scaled_dataset_name data_transforms = [ mytransforms.CreateScaledImage(), mytransforms.CreateColoraug(), mytransforms.ToTensor(), ] if keys_to_load is not None: if any('depth' in key for key in keys_to_load): data_transforms.insert(0, mytransforms.ConvertDepth()) if any('segmentation' in key for key in keys_to_load): data_transforms.insert(0, mytransforms.ConvertSegmentation()) print('\n Loading {} dataset'.format(dataset_s)) my_dataset_s = StandardDataset(dataset_s, split=split, trainvaltest_split=trainvaltest_split, keys_to_load=keys_to_load, data_transforms=data_transforms, output_filenames=True) my_loader_s = DataLoader(my_dataset_s, batch_size=1, shuffle=False, num_workers=0, pin_memory=True, drop_last=True) print_dataset(my_loader_s) print("Testing dataset_scaler") samples = [] samples_s = [] iter_my_loader = iter(my_loader) iter_my_loader_s = iter(my_loader_s) for _ in range(2): samples.append(next(iter_my_loader).copy()) samples_s.append(next(iter_my_loader_s).copy()) for key in keys_to_load: print("Check if {} entries are equal:".format(key)) print(" Should be False: {}".format( torch.equal(samples[1][(key, 0, 0)], samples_s[0][(key, 0, 0)]))) print(" Should be True: {}".format( torch.equal(samples[0][(key, 0, 0)], samples_s[0][(key, 0, 0)]))) print(" Should be True: {}".format( torch.equal(samples[1][(key, 0, 0)], samples_s[1][(key, 0, 0)])))
# The following parameters and the data_transforms list are optional. Standard is just the transform ToTensor() width = 640 height = 192 scales = [0, 1, 2, 3] data_transforms = [ #mytransforms.RandomExchangeStereo(), # (color, 0, -1) mytransforms.RandomHorizontalFlip(), mytransforms.RandomVerticalFlip(), mytransforms.CreateScaledImage(), # (color, 0, 0) mytransforms.RandomRotate(0.0), mytransforms.RandomTranslate(0), mytransforms.RandomRescale(scale=1.1, fraction=0.5), mytransforms.RandomCrop((320, 1088)), mytransforms.Resize((height, width)), mytransforms.MultiResize(scales), mytransforms.CreateColoraug(new_element=True, scales=scales), # (color_aug, 0, 0) mytransforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, gamma=0.0), mytransforms.GaussianBlurr(fraction=0.5), mytransforms.RemoveOriginals(), mytransforms.ToTensor(), mytransforms.NormalizeZeroMean(), ] print('Loading {} dataset, {} split'.format(dataset, trainvaltest_split)) traindataset = StandardDataset( dataset, trainvaltest_split,