def fbresnet_augmentor(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ if isTrain: augmentors = [ GoogleNetResize(), imgaug.RandomOrderAug( # Remove these augs if your CPU is not fast enough [imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting(0.1, eigval=np.asarray( [0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array( [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1] )]), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224)), ] return augmentors
def fbresnet_augmentor(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ if isTrain: augmentors = [ GoogleNetResize(), imgaug.RandomOrderAug([ JohnAug(), imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True), ] else: round2pow2 = lambda x: 2**(x - 1).bit_length() augmentors = [ imgaug.ResizeShortestEdge(round2pow2(IMAGE_SIZE), cv2.INTER_CUBIC), imgaug.CenterCrop((IMAGE_SIZE, IMAGE_SIZE)), ] return augmentors
def fbresnet_augmentor(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ if isTrain: """ Sec 5.1: We use scale and aspect ratio data augmentation [35] as in [12]. The network input image is a 224×224 pixel random crop from an augmented image or its horizontal flip. """ augmentors = [ GoogleNetResize(), imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224)), ] return augmentors
def fbresnet_augmentor(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ if isTrain: augmentors = [ GoogleNetResize(), # It's OK to remove the following augs if your CPU is not fast enough. # Removing brightness/contrast/saturation does not have a significant effect on accuracy. # Removing lighting leads to a tiny drop in accuracy. imgaug.RandomOrderAug([ # imgaug.BrightnessScale((0.6, 1.4), clip=False), # imgaug.Contrast((0.6, 1.4), clip=False), # imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((DEFAULT_IMAGE_SHAPE, DEFAULT_IMAGE_SHAPE)), ] return augmentors
def fbresnet_augmentor(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images. """ if isTrain: augmentors = [ GoogleNetResize(), imgaug.RandomOrderAug([ imgaug.Brightness(30, clip=False), imgaug.Contrast((0.8, 1.2), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion imgaug.Lighting(0.1, eigval=[0.2175, 0.0188, 0.0045][::-1], eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Clip(), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224)), ] return augmentors
def fbresnet_augmentor(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. # 残差网络增强图像 """ if isTrain: # 如果训练数据的话 augmentors = [ GoogleNetResize(), # 定义好了crop_area_fraction等参数 imgaug. RandomOrderAug( # GPU不行的话就把这部分删除Remove these augs if your CPU is not fast enough #imgaug是一个图像增强库 [ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True), ] else: # 如果不是训练数据的话 augmentors = [ imgaug.ResizeShortestEdge( 256, cv2.INTER_CUBIC), # 在保持纵横比的同时,将最短边的大小调整为某个数字。 imgaug.CenterCrop((224, 224)), # 在中间裁剪图像 ] return augmentors
def get_augmentations(is_train): if is_train: augmentors = [ GoogleNetResize(crop_area_fraction=0.76, target_shape=224), # TODO : 76% or 49%? imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=True), imgaug.Contrast((0.6, 1.4), clip=True), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224)), ] return augmentors
def fbresnet_augmentor(isTrain, target_shape=224): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ if isTrain: augmentors = [ GoogleNetResize(crop_area_fraction=0.32, target_shape=target_shape), # GoogleNetResize(target_shape=target_shape), imgaug. RandomOrderAug([ # imgaug.BrightnessScale((0.6, 1.4), clip=False), # imgaug.Contrast((0.6, 1.4), clip=False), # imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(int(256 / 224 * target_shape), cv2.INTER_CUBIC), imgaug.CenterCrop((target_shape, target_shape)), ] return augmentors
def small_augmentor(): augmentors = [ GoogleNetResize(), imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array( [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]), imgaug.Flip(horiz=True), ] return augmentors
def fbresnet_augmentor(isTrain): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ interpolation = cv2.INTER_LINEAR if isTrain: """ Sec 5.1: We use scale and aspect ratio data augmentation [35] as in [12]. The network input image is a 224×224 pixel random crop from an augmented image or its horizontal flip. """ augmentors = [ imgaug.GoogleNetRandomCropAndResize(interp=interpolation), # It's OK to remove the following augs if your CPU is not fast enough. # Removing brightness/contrast/saturation does not have a significant effect on accuracy. # Removing lighting leads to a tiny drop in accuracy. imgaug.RandomOrderAug( [imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), rgb=False, clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting(0.1, eigval=np.asarray( [0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array( [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1] )]), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, interp=interpolation), imgaug.CenterCrop((224, 224)), ] return augmentors
def prepare_video(args): data_root, video_path, video_width, video_height, video_length, video_downsample_ratio, video_index, batch_size, shared_mem_idx, is_training, is_ucf101, is_imagenet, is_zipped = args augs = [ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=True), imgaug.Lighting(0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')), ] random.shuffle(augs) video_mem = np.frombuffer(shared_mem[shared_mem_idx], np.ctypeslib.ctypes.c_float) video_mem = video_mem.reshape( (batch_size, video_length, video_height, video_width, 3)) pathgen = lambda x: os.path.join(data_root, str(video_path), x) frames = None if os.path.isdir(pathgen('')): frames = sorted(os.listdir(pathgen(''))) else: frames = [os.path.join(data_root, str(video_path))] crop_frames = is_training flip_frames = bool( random.getrandbits(1)) and is_training and (is_ucf101 or is_imagenet) add_noise = bool(random.getrandbits(1)) and is_training and not is_training # choose a random time to start the video num_frames = len(frames) // video_downsample_ratio t_offset = 0 stride_offset = 0 if is_training and num_frames > video_length: t_offset = random.choice(range(num_frames - video_length)) stride_offset = random.choice(range(video_downsample_ratio)) num_frames = min(len(frames) // video_downsample_ratio, video_length) assert num_frames != 0, 'num frames in video cannot be 0: {}'.format( video_path) round2pow2 = lambda x: 2**(x - 1).bit_length() pow2_width = round2pow2(video_width) pow2_height = round2pow2(video_height) crop_margin_x = pow2_width - video_width crop_margin_y = pow2_height - video_height x1 = random.choice(list(range(crop_margin_x))) y1 = random.choice(list(range(crop_margin_y))) x2 = pow2_width - (crop_margin_x - x1) y2 = pow2_height - (crop_margin_y - y1) rotation_angle = random.choice(list(range(-10, 10, 1))) if is_training else 0 video_mem[video_index, :, :, :] = 0 for i in range(num_frames): image_idx = video_downsample_ratio * (i + t_offset) image_idx = min(image_idx + stride_offset, len(frames)) image = None if is_imagenet and is_zipped: fname = pathgen(frames[image_idx]) jpeg_filename = os.path.basename(fname) jpeg_dirname = os.path.basename(os.path.dirname(fname)) zip_filepath = os.path.dirname(fname) + '.zip' f = zipfile.ZipFile(zip_filepath, 'r') compress_jpeg = io.BytesIO( f.read(os.path.join(jpeg_dirname, jpeg_filename))) image = Image.open(compress_jpeg) else: image = Image.open(pathgen( frames[image_idx])) # in RGB order by default image = image.convert('RGB') #image = image.convert('L') # convert to YUV and grab Y-component if crop_frames: image = image.resize((pow2_width, pow2_height), PIL.Image.BICUBIC) image = image.crop(box=(x1, y1, x2, y2)) else: image = image.resize((video_width, video_height), PIL.Image.BICUBIC) if flip_frames: image = image.transpose(PIL.Image.FLIP_LEFT_RIGHT) if rotation_angle != 0: image = image.rotate(rotation_angle) image = np.asarray(image, dtype=np.uint8) assert image.shape == ( video_width, video_height, 3), 'cropped image must be {} but was {}'.format( (video_width, video_height, 3), image.shape) if is_imagenet: if is_training: for a in augs: a.reset_state() image = a.augment(image) image = np.asarray(image, dtype=np.float32) image = image * (1.0 / 255) mean = np.asarray([0.485, 0.456, 0.406]) std = np.asarray([0.229, 0.224, 0.225]) image = (image - mean) / std else: image = np.asarray(image, dtype=np.uint32) image = image - 116 # center on mean value of 116 (as computed in preprocessing step) image = np.clip(image, -128, 128) image = np.asarray(image, dtype=np.float32) if add_noise: noise = np.random.normal(loc=0, scale=5, size=(video_height, video_width, 3)) image = image + noise image = np.clip(image, -128, 128) video_mem[video_index, i, :, :, :] = image return {'num_frames': num_frames, 'video_path': video_path}
def fbresnet_augmentor(isTrain, crop_method, color_augmentation): """ Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. """ execution_lst = [] if isTrain: augmentors = [ # 1. crop_method # a) GoogleNetResize GoogleNetResize(), # b) ShortestEdgeResize imgaug.ResizeShortestEdge(256), # c) GlobalWarp imgaug.Resize(226), # NOTE: for CAM generation imgaug.RandomCrop((224, 224)), # d) CAMCrop # (when CAMCrop is set, the output from the original DataFlow has already been cropped) # 2. color_augmentation imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True), ] # if crop_method == 'GoogleNetResize': print( '--> perform GoogleNetResize cropping method during the training pipeline' ) execution_lst.extend([0]) elif crop_method == 'ShortestEdgeResize': print( '--> perform ShortestEdgeResize cropping method during the training pipeline' ) execution_lst.extend([1, 3]) elif crop_method == 'GlobalWarp': print( '--> perform GlobalWarp cropping method during the training pipeline' ) execution_lst.extend([2, 3]) elif crop_method == 'CAMCrop': # enable CAMCrop @ 20171124 print( '*** Perform CAMCrop to better the training dynamics and the results ***' ) if color_augmentation: print( '--> perform color augmentation during the training pipeline') execution_lst.extend([4]) else: print( '--> discard the color jittering process during the training pipeline' ) # perform mirror reflection augmentation anyway execution_lst.extend([5]) else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224)), imgaug.RandomCrop((224, 224)), ] if crop_method == 'RandomCrop': execution_lst.extend([0, 2]) elif crop_method == 'CenterCrop': execution_lst.extend([0, 1]) return [ item_ for id_, item_ in enumerate(augmentors) if id_ in execution_lst ]