def __getitem__(self, idx): """ - We do delayed loading of data to reduce the memory size due to pickling of dataset across dataloader workers. - Loads the data if not already loaded. - Sets and initializes the queue if not already initialized - Depending on the data source (folder or filelist), get the image. If using the QueueDataset and image is valid, save the image in queue if not full. Otherwise return a valid seen image from the queue if queue is not empty. """ if not self.is_initialized: self._load_data(self._path) self.is_initialized = True if not self.queue_init and self.enable_queue_dataset: self._init_queues() is_success = True image_path = self.image_dataset[idx] try: if self.data_source == "disk_filelist": image_path = self._replace_img_path_prefix( image_path, replace_prefix=self._remove_prefix, new_prefix=self._new_prefix, ) with PathManager.open(image_path, "rb") as fopen: img = Image.open(fopen).convert("RGB") elif self.data_source == "disk_folder": img = self.image_dataset[idx][0] if is_success and self.enable_queue_dataset: self.on_sucess(img) except Exception as e: logging.warning( f"Couldn't load: {self.image_dataset[idx]}. Exception: \n{e}") is_success = False # if we have queue dataset class enabled, we try to use it to get # the seen valid images if self.enable_queue_dataset: img, is_success = self.on_failure() if img is None: img = get_mean_image( self.cfg["DATA"][self.split].DEFAULT_GRAY_IMG_SIZE) else: img = get_mean_image( self.cfg["DATA"][self.split].DEFAULT_GRAY_IMG_SIZE) return img, is_success
def __getitem__(self, idx): """ Simply return the mean dummy image of the specified size and mark it as a success. """ img = get_mean_image( self.cfg["DATA"][self.split].DEFAULT_GRAY_IMG_SIZE) is_success = True return img, is_success
def __getitem__(self, idx: int): """ Simply return the mean dummy image of the specified size and mark it as a success. """ crop_size = self.cfg["DATA"][self.split].DEFAULT_GRAY_IMG_SIZE if self.cfg["DATA"][self.split].RANDOM_SYNTHETIC_IMAGES: img = self.generate_image(seed=idx, crop_size=crop_size) else: img = get_mean_image(crop_size) is_success = True return img, is_success
def __getitem__(self, index) -> Tuple[Image.Image, bool]: if self._iterator is None: self._iterator = self._open_iterator() if not self.queue_init and self.enable_queue_dataset: self._init_queues() try: # TODO (wpc, prigoyal): we should check images are good when we are # uploading them to airstore. ImageFile.LOAD_TRUNCATED_IMAGES = True image_bytes = next(self._iterator)["image"] img = Image.open(io.BytesIO(image_bytes)) if img.mode != "RGB": img = img.convert("RGB") if self.enable_queue_dataset: self.on_sucess(img) is_success = True except Exception as e: # TODO: airstore should have no failed images # because they are filtered at prepare time. # Then, this should be removed. logging.warning(e) is_success = False # if we have queue dataset class enabled, we try to use it to get # the seen valid images if self.enable_queue_dataset: img, is_success = self.on_failure() if img is None: raise RuntimeError( "Encountered invalid image and couldn't load from QueueDataset" ) else: img = get_mean_image( self.cfg["DATA"][self.split].DEFAULT_GRAY_IMG_SIZE) return img, is_success
def __getitem__(self, idx): """ - We do delayed loading of data to reduce the memory size due to pickling of dataset across dataloader workers. - Loads the data if not already loaded. - Sets and initializes the queue if not already initialized - Depending on the data source (folder or filelist), get the image. If using the QueueDataset and image is valid, save the image in queue if not full. Otherwise return a valid seen image from the queue if queue is not empty. """ if not self.is_initialized: self._load_data(self._path) self.is_initialized = True if not self.queue_init and self.enable_queue_dataset: self._init_queues() is_success = True image_path = self.image_dataset[idx] try: if self.data_source == "disk_filelist": image_path = self._replace_img_path_prefix( image_path, replace_prefix=self._remove_prefix, new_prefix=self._new_prefix, ) with g_pathmgr.open(image_path, "rb") as fopen: img = Image.open(fopen).convert("RGB") elif self.data_source == "disk_folder": img = self.image_dataset[idx][0] elif self.data_source == "disk_roi_annotations": with g_pathmgr.open(image_path, "rb") as fopen: bbox = [float(item) for item in self.image_roi_bbox[idx]] img = Image.open(fopen).crop(bbox).convert("RGB") # TODO: move the below to a dedicated transform # applicable to openimages dataset only width, height = img.size bbox_size = min(img.size) ratio = max(img.size) / min(img.size) if ratio >= 1.2: if width < height: # bigger height bbox = (0, 0, bbox_size, bbox_size) else: # bigger width. bbox = ( int((width - bbox_size) / 2), 0, int((width - bbox_size) / 2) + bbox_size, height, ) img = img.crop(bbox) if is_success and self.enable_queue_dataset: self.on_sucess(img) except Exception as e: logging.warning( f"Couldn't load: {self.image_dataset[idx]}. Exception: \n{e}") is_success = False # if we have queue dataset class enabled, we try to use it to get # the seen valid images if self.enable_queue_dataset: img, is_success = self.on_failure() if img is None: img = get_mean_image( self.cfg["DATA"][self.split].DEFAULT_GRAY_IMG_SIZE) else: img = get_mean_image( self.cfg["DATA"][self.split].DEFAULT_GRAY_IMG_SIZE) return img, is_success