def generate_x(self, cfg: SampleCfg): img = scipy.misc.imread(dataset.image_crop_fn(cfg.fish_classification.video_id, cfg.fish_classification.frame)) crop = utils.get_image_crop(full_rgb=img, rect=cfg.rect, scale_rect_x=cfg.scale_rect_x, scale_rect_y=cfg.scale_rect_y, shift_x_ratio=cfg.shift_x_ratio, shift_y_ratio=cfg.shift_y_ratio, angle=cfg.angle, out_size=INPUT_ROWS) crop = crop.astype('float32') if cfg.saturation != 0.5: crop = img_augmentation.saturation(crop, variance=0.2, r=cfg.saturation) if cfg.contrast != 0.5: crop = img_augmentation.contrast(crop, variance=0.25, r=cfg.contrast) if cfg.brightness != 0.5: crop = img_augmentation.brightness(crop, variance=0.3, r=cfg.brightness) if cfg.hflip: crop = img_augmentation.horizontal_flip(crop) if cfg.vflip: crop = img_augmentation.vertical_flip(crop) if cfg.blurred_by_downscaling != 1: crop = img_augmentation.blurred_by_downscaling(crop, 1.0 / cfg.blurred_by_downscaling) return crop * 255.0
def create_image_lmdb(image_data, split="train", shape=(3, 224, 224), data_root=None, crop_size=224): """ Create the LMDB for the images. :param image_data: List of JSON objects with details about the images. :param split: Specify which data split this LMDB is for. :param shape: The shape of the image with which to store in the LMDB. :param data_root: The root folder where to store the LMDB file. :param crop_size: The final size of the cropped image. :return: """ print("Creating {0} Image LMDB".format(split)) channels, width, height = shape filename = os.path.join(data_root, "image-{0}-lmdb".format(split)) # We need to prepare the database for the size. We'll set it 100 times # greater than what we theoretically need. map_size = len(image_data) * channels * width * height * 100 in_db = lmdb.open(filename, map_size=map_size) with in_db.begin(write=True) as in_txn: for idx, ann in tqdm(enumerate(image_data), total=len(image_data)): # load image: # - in BGR (switch from RGB) # - in Channel x Height x Width order (switch from H x W x C) img = scp.ndimage.imread(ann["path"]) # If not triple channel image, make it triple channeled if len(img.shape) <= 2: img = np.dstack((img, img, img)) # Crop out the object with context padding. x, y, width, height = ann["bbox"] crop_im = utils.get_image_crop(img, x, y, width, height, crop_size) # Resize it to the desired shape. im = scp.misc.imresize(crop_im, (crop_size, crop_size, 3)) # resize # Convert image from RGB to BGR im = im[:, :, ::-1] # Convert to CxHxW im = im.transpose((2, 0, 1)) im_data = caffe.io.array_to_datum(im) key = '{:0>7d}'.format(idx) # if using python3, we need to convert the UTF-8 string to a byte array if sys.version_info[0] == 3: key = key.encode('utf-8') in_txn.put(key, im_data.SerializeToString()) # print("Saving {0}".format(image_data)) in_db.close()
def process_sample(cfg: SampleCfg): src_fn = os.path.join(crops_dir, cfg.fish_classification.video_id, '{:04}.jpg'.format(int(cfg.fish_classification.frame) + 1)) img = scipy.misc.imread(src_fn) crop = utils.get_image_crop(full_rgb=img, rect=cfg.rect, out_size=INPUT_ROWS) # utils.print_stats('crop', crop) os.makedirs(os.path.join(classification_crops_dir, cfg.fish_classification.video_id), exist_ok=True) if save_jpegs: dst_jpg_fn = os.path.join(classification_crops_dir, cfg.fish_classification.video_id, '{:04}.jpg'.format(int(cfg.fish_classification.frame) + 1)) scipy.misc.imsave(dst_jpg_fn, crop)
def prepare_y(self, cfg: SampleCfg): img = self.masks[cfg.img_idx].astype(np.float32) / 255.0 crop = utils.get_image_crop(full_rgb=img, rect=Rect(0, 0, IMG_WITH, IMG_HEIGHT), scale_rect_x=cfg.scale_rect_x, scale_rect_y=cfg.scale_rect_y, shift_x_ratio=cfg.shift_x_ratio, shift_y_ratio=cfg.shift_y_ratio, angle=cfg.angle, out_size=IMG_WITH, order=1) crop = crop.astype('float32') if cfg.hflip: crop = img_augmentation.horizontal_flip(crop) if cfg.vflip: crop = img_augmentation.vertical_flip(crop) return np.expand_dims(crop, axis=3)
def prepare_x(self, cfg: SampleCfg): img = self.images[cfg.img_idx] crop = utils.get_image_crop(full_rgb=img, rect=Rect(0, 0, IMG_WITH, IMG_HEIGHT), scale_rect_x=cfg.scale_rect_x, scale_rect_y=cfg.scale_rect_y, shift_x_ratio=cfg.shift_x_ratio, shift_y_ratio=cfg.shift_y_ratio, angle=cfg.angle, out_size=IMG_WITH) crop = crop.astype('float32') if cfg.saturation != 0.5: crop = img_augmentation.saturation(crop, variance=0.2, r=cfg.saturation) if cfg.contrast != 0.5: crop = img_augmentation.contrast(crop, variance=0.25, r=cfg.contrast) if cfg.brightness != 0.5: crop = img_augmentation.brightness(crop, variance=0.3, r=cfg.brightness) if cfg.hflip: crop = img_augmentation.horizontal_flip(crop) if cfg.vflip: crop = img_augmentation.vertical_flip(crop) if cfg.blurred_by_downscaling != 1: crop = img_augmentation.blurred_by_downscaling( crop, 1.0 / cfg.blurred_by_downscaling) return preprocess_input(crop * 255.0)
def create_dataset(image_data, filename="train.hdf5", shape=(3, 224, 224), data_root="", crop_size=224): """ Create a HDF5 file to store the data set, so we can use it with Keras or Blocks. Each image is the segment of the object for which the attribute applies. We store the images with the ordering in the first element of the shape tuple i.e. (idx, 3, x, y) Ref. https://www.getdatajoy.com/learn/Read_and_Write_HDF5_from_Python :param image_data: List of JSON objects with details about the images. :param filename: The filename for the HDF5 file. You should specify the split here. :param shape: The shape of the image to save into the HDF5 file. :param data_root: The source path of the MSCOCO dataset. :param crop_size: The final size of the cropped image. :return: We store the images with the ordering in the first element of the shape tuple i.e. (idx, 3, x, y) """ # Ensure the extension of the filename is `hdf5` if filename.split('.')[-1].strip() != 'hdf5': filename += ".hdf5" filename = os.path.join(data_root, filename) try: # Get the shape of the HDF5 file based on the number of images in the split n_images = len(image_data) dataset_shape = (n_images, ) + shape with h5py.File(filename, 'w') as f: dset = f.create_dataset("data", dataset_shape) labels = f.create_dataset("attributes", (n_images, 204)) metadata = f.create_dataset("metadata", (n_images, 1)) dset.attrs["desc"] = "This is the image data." labels.attrs[ "desc"] = "These are the attribute labels of the image data having the same index." metadata.attrs[ "desc"] = "Metadata. Contains the COCO image ID of the image at the same index." # Now continue creating the HDF5 file with the list of attributes for i, ann in tqdm(enumerate(image_data), total=n_images): ann_id = ann["id"] ann_attrs = ann["attrs_vector"] # Get the object attributes as an array of 1s and 0s. img_attrs = np.array([np.float(x > 0) for x in ann_attrs]) # Get the bounding box for the object bbox = ann['bbox'] # Read the image data using the given path. try: img = scp.ndimage.imread(ann["path"]) except: raise Exception("Invalid image path") # If single channel image, make it triple channeled if len(img.shape) <= 2: img = np.dstack((img, img, img)) # Crop out the object whose attributes we have. x, y, width, height = bbox # Now we crop out the object image # crop_img = img[y:y + height, x:x + width] - This is the original way to crop crop_img = utils.get_image_crop(img, x, y, width, height, crop_size) # Resize it to the desired shape. img = scp.misc.imresize(crop_img, (crop_size, crop_size, 3)) # Order the dimensions as expected (channels, width, height) img = np.transpose(img, (2, 0, 1)) # We have all the required data so we now save it. dset[i, :, :, :] = img labels[i, :] = img_attrs metadata[i, :] = ann_id return True except (Exception, ): traceback.print_exc() return False