def test_melspectrogram(): target_keys = ['audios'] with pytest.raises(TypeError): # ratio should be float MelSpectrogram(window_size=12.5) audio = (np.random.rand(1, 160000)) # test padding results = dict(audios=audio, sample_rate=16000) results['num_clips'] = 1 results['sample_rate'] = 16000 mel = MelSpectrogram() results = mel(results) assert assert_dict_has_keys(results, target_keys) # test truncating audio = (np.random.rand(1, 160000)) results = dict(audios=audio, sample_rate=16000) results['num_clips'] = 1 results['sample_rate'] = 16000 mel = MelSpectrogram(fixed_length=1) results = mel(results) assert assert_dict_has_keys(results, target_keys) assert repr(mel) == (f'{mel.__class__.__name__}' f'(window_size={mel.window_size}), ' f'step_size={mel.step_size}, ' f'n_mels={mel.n_mels}, ' f'fixed_length={mel.fixed_length})')
def test_to_data_container(): # check user-defined fields fields = (dict(key='key1', stack=True), dict(key='key2')) to_data_container = ToDataContainer(fields=fields) target_keys = ['key1', 'key2'] original_results = dict(key1=np.random.randn(10, 20), key2=['a', 'b']) results = to_data_container(original_results.copy()) assert assert_dict_has_keys(results, target_keys) for key in target_keys: assert isinstance(results[key], DC) assert np.all(results[key].data == original_results[key]) assert results['key1'].stack assert not results['key2'].stack # Add an additional key which is not in keys. original_results = dict(key1=np.random.randn(10, 20), key2=['a', 'b'], key3='value3') results = to_data_container(original_results.copy()) assert assert_dict_has_keys(results, target_keys) for key in target_keys: assert isinstance(results[key], DC) assert np.all(results[key].data == original_results[key]) assert results['key1'].stack assert not results['key2'].stack assert repr(to_data_container) == (to_data_container.__class__.__name__ + f'(fields={fields})')
def test_box_rescale(self): target_keys = ['img_shape', 'scale_factor', 'proposals', 'gt_bboxes'] results = dict(img_shape=(520, 480), scale_factor=(0.7, 0.8), proposals=np.array([[5.28, 81.64, 314.4, 511.16]]), gt_bboxes=np.array([[14.88, 84.24, 321.6, 517.4]])) scale_factor = results['scale_factor'] with pytest.raises(AssertionError): box_scale = EntityBoxRescale(scale_factor) results_ = copy.deepcopy(results) results_['proposals'] = np.array([[5.28, 81.64, 314.4]]) box_scale(results_) box_scale = EntityBoxRescale(scale_factor) results_ = copy.deepcopy(results) results_ = box_scale(results_) assert_dict_has_keys(results_, target_keys) assert_array_almost_equal(results_['proposals'], np.array([[3.696, 65.312, 220.08, 408.928]])) assert_array_almost_equal(results_['gt_bboxes'], np.array([[10.416, 67.392, 225.12, 413.92]])) results_ = copy.deepcopy(results) results_['proposals'] = None results_ = box_scale(results_) assert_dict_has_keys(results_, target_keys) assert results_['proposals'] is None assert repr(box_scale) == ('EntityBoxRescale' f'(scale_factor={scale_factor})')
def test_three_crop(): with pytest.raises(TypeError): # crop_size must be int or tuple of int ThreeCrop(0.5) with pytest.raises(TypeError): # crop_size must be int or tuple of int ThreeCrop('224') # three crop with crop_size 120 imgs = list(np.random.rand(2, 240, 120, 3)) results = dict(imgs=imgs) three_crop = ThreeCrop(crop_size=120) three_crop_results = three_crop(results) target_keys = ['imgs', 'crop_bbox', 'img_shape'] assert assert_dict_has_keys(three_crop_results, target_keys) assert check_crop(imgs, three_crop_results['imgs'], three_crop_results['crop_bbox'], 3) assert three_crop_results['img_shape'] == (120, 120) # three crop with crop_size 224 imgs = list(np.random.rand(2, 224, 224, 3)) results = dict(imgs=imgs) three_crop = ThreeCrop(crop_size=224) three_crop_results = three_crop(results) target_keys = ['imgs', 'crop_bbox', 'img_shape'] assert assert_dict_has_keys(three_crop_results, target_keys) assert check_crop(imgs, three_crop_results['imgs'], three_crop_results['crop_bbox'], 3) assert three_crop_results['img_shape'] == (224, 224) assert repr(three_crop) == (f'{three_crop.__class__.__name__}' f'(crop_size={(224, 224)})')
def test_init_lazy(self): from mmaction.datasets.pipelines.augmentations import \ _init_lazy_if_proper # noqa: E501 with pytest.raises(AssertionError): # use lazy operation but "lazy" not in results result = dict(lazy=dict(), img_shape=[64, 64]) _init_lazy_if_proper(result, False) lazy_keys = [ 'original_shape', 'crop_bbox', 'flip', 'flip_direction', 'interpolation' ] # 'img_shape' not in results result = dict(imgs=list(np.random.randn(3, 64, 64, 3))) _init_lazy_if_proper(result, True) assert assert_dict_has_keys(result, ['imgs', 'lazy', 'img_shape']) assert assert_dict_has_keys(result['lazy'], lazy_keys) # 'img_shape' in results result = dict(img_shape=[64, 64]) _init_lazy_if_proper(result, True) assert assert_dict_has_keys(result, ['lazy', 'img_shape']) assert assert_dict_has_keys(result['lazy'], lazy_keys) # do not use lazy operation result = dict(img_shape=[64, 64]) _init_lazy_if_proper(result, False) assert assert_dict_has_keys(result, ['img_shape']) assert 'lazy' not in result
def test_sample_ava_frames(self): target_keys = [ 'fps', 'timestamp', 'timestamp_start', 'shot_info', 'frame_inds', 'clip_len', 'frame_interval' ] config = dict(clip_len=32, frame_interval=2) sample_ava_dataset = SampleAVAFrames(**config) ava_result = sample_ava_dataset(results=self.ava_results) assert assert_dict_has_keys(ava_result, target_keys) assert ava_result['clip_len'] == 32 assert ava_result['frame_interval'] == 2 assert len(ava_result['frame_inds']) == 32 assert repr(sample_ava_dataset) == ( f'{sample_ava_dataset.__class__.__name__}(' f'clip_len={32}, ' f'frame_interval={2}, ' f'test_mode={False})') # add test case in Issue #306 config = dict(clip_len=8, frame_interval=8) sample_ava_dataset = SampleAVAFrames(**config) ava_result = sample_ava_dataset(results=self.ava_results) assert assert_dict_has_keys(ava_result, target_keys) assert ava_result['clip_len'] == 8 assert ava_result['frame_interval'] == 8 assert len(ava_result['frame_inds']) == 8 assert repr(sample_ava_dataset) == ( f'{sample_ava_dataset.__class__.__name__}(' f'clip_len={8}, ' f'frame_interval={8}, ' f'test_mode={False})')
def test_pyav_decode_motion_vector(self): pyav_init = PyAVInit() pyav = PyAVDecodeMotionVector() # test pyav with 2-dim input results = { 'filename': self.video_path, 'frame_inds': np.arange(0, 32, 1)[:, np.newaxis] } results = pyav_init(results) results = pyav(results) target_keys = ['motion_vectors'] assert assert_dict_has_keys(results, target_keys) # test pyav with 1 dim input results = { 'filename': self.video_path, 'frame_inds': np.arange(0, 32, 1) } pyav_init = PyAVInit() results = pyav_init(results) pyav = PyAVDecodeMotionVector() results = pyav(results) assert assert_dict_has_keys(results, target_keys)
def test_decord_decode(self): target_keys = ['frame_inds', 'imgs', 'original_shape'] # test Decord with 2 dim input and start_index = 0 video_result = copy.deepcopy(self.video_results) video_result['frame_inds'] = np.arange(0, self.total_frames, 3)[:, np.newaxis] decord_init = DecordInit() decord_init_result = decord_init(video_result) video_result['video_reader'] = decord_init_result['video_reader'] decord_decode = DecordDecode() decord_decode_result = decord_decode(video_result) assert assert_dict_has_keys(decord_decode_result, target_keys) assert decord_decode_result['original_shape'] == (256, 340) assert np.shape(decord_decode_result['imgs']) == (len( video_result['frame_inds']), 256, 340, 3) # test Decord with 1 dim input and start_index = 0 video_result = copy.deepcopy(self.video_results) video_result['frame_inds'] = np.arange(0, self.total_frames, 3) decord_init = DecordInit() decord_init_result = decord_init(video_result) video_result['video_reader'] = decord_init_result['video_reader'] decord_decode = DecordDecode() decord_decode_result = decord_decode(video_result) assert assert_dict_has_keys(decord_decode_result, target_keys) assert decord_decode_result['original_shape'] == (256, 340) assert np.shape(decord_decode_result['imgs']) == (len( video_result['frame_inds']), 256, 340, 3) # test Decord with 2 dim input and start_index = 0 video_result = copy.deepcopy(self.video_results) video_result['frame_inds'] = np.arange(0, self.total_frames, 3)[:, np.newaxis] decord_init = DecordInit() decord_init_result = decord_init(video_result) video_result['video_reader'] = decord_init_result['video_reader'] decord_decode = DecordDecode() decord_decode_result = decord_decode(video_result) assert assert_dict_has_keys(decord_decode_result, target_keys) assert decord_decode_result['original_shape'] == (256, 340) assert np.shape(decord_decode_result['imgs']) == (len( video_result['frame_inds']), 256, 340, 3) # test Decord with 1 dim input video_result = copy.deepcopy(self.video_results) video_result['frame_inds'] = np.arange(1, self.total_frames, 3) decord_init = DecordInit() decord_init_result = decord_init(video_result) video_result['video_reader'] = decord_init_result['video_reader'] decord_decode = DecordDecode() decord_decode_result = decord_decode(video_result) assert assert_dict_has_keys(decord_decode_result, target_keys) assert decord_decode_result['original_shape'] == (256, 340) assert np.shape(decord_decode_result['imgs']) == (len( video_result['frame_inds']), 256, 340, 3)
def test_normalize(self): with pytest.raises(TypeError): # mean must be list, tuple or np.ndarray Normalize( dict(mean=[123.675, 116.28, 103.53]), [58.395, 57.12, 57.375]) with pytest.raises(TypeError): # std must be list, tuple or np.ndarray Normalize([123.675, 116.28, 103.53], dict(std=[58.395, 57.12, 57.375])) target_keys = ['imgs', 'img_norm_cfg', 'modality'] # normalize imgs in RGB format imgs = list(np.random.rand(2, 240, 320, 3).astype(np.float32)) results = dict(imgs=imgs, modality='RGB') config = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) normalize = Normalize(**config) normalize_results = normalize(results) assert assert_dict_has_keys(normalize_results, target_keys) check_normalize(imgs, normalize_results['imgs'], normalize_results['img_norm_cfg']) # normalize flow imgs imgs = list(np.random.rand(4, 240, 320).astype(np.float32)) results = dict(imgs=imgs, modality='Flow') config = dict(mean=[128, 128], std=[128, 128]) normalize = Normalize(**config) normalize_results = normalize(results) assert assert_dict_has_keys(normalize_results, target_keys) assert normalize_results['imgs'].shape == (2, 240, 320, 2) x_components = np.array(imgs[0::2]) y_components = np.array(imgs[1::2]) x_components = (x_components - config['mean'][0]) / config['std'][0] y_components = (y_components - config['mean'][1]) / config['std'][1] result_imgs = np.stack([x_components, y_components], axis=-1) assert np.all(np.isclose(result_imgs, normalize_results['imgs'])) # normalize imgs in BGR format imgs = list(np.random.rand(2, 240, 320, 3).astype(np.float32)) results = dict(imgs=imgs, modality='RGB') config = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=True) normalize = Normalize(**config) normalize_results = normalize(results) assert assert_dict_has_keys(normalize_results, target_keys) check_normalize(imgs, normalize_results['imgs'], normalize_results['img_norm_cfg']) assert normalize.__repr__() == ( normalize.__class__.__name__ + f'(mean={np.array([123.675, 116.28, 103.53])}, ' + f'std={np.array([58.395, 57.12, 57.375])}, to_bgr={True}, ' f'adjust_magnitude={False})')
def test_ava_dataset(self): target_keys = [ 'frame_dir', 'video_id', 'timestamp', 'img_key', 'shot_info', 'fps', 'ann' ] ann_keys = ['gt_labels', 'gt_bboxes', 'entity_ids'] pkl_keys = ['0f39OWEqJ24,0902', '0f39OWEqJ24,0903', '_-Z6wFjXtGQ,0902'] ava_dataset = AVADataset(self.ann_file, self.exclude_file, self.pipeline, data_prefix=self.data_prefix, proposal_file=self.proposal_file) ava_infos = ava_dataset.video_infos assert assert_dict_has_keys(ava_dataset.proposals, pkl_keys) assert assert_dict_has_keys(ava_infos[0], target_keys) assert assert_dict_has_keys(ava_infos[0]['ann'], ann_keys) assert len(ava_infos) == 1 assert ava_infos[0]['frame_dir'] == osp.join(self.data_prefix, '0f39OWEqJ24') assert ava_infos[0]['video_id'] == '0f39OWEqJ24' assert ava_infos[0]['timestamp'] == 902 assert ava_infos[0]['img_key'] == '0f39OWEqJ24,0902' assert ava_infos[0]['shot_info'] == (0, 27000) assert ava_infos[0]['fps'] == 30 assert len(ava_infos[0]['ann']) == 3 target_labels = np.array([12, 17, 79]) labels = np.zeros([81]) labels[target_labels] = 1. target_labels = labels[None, ...] assert_array_equal(ava_infos[0]['ann']['gt_labels'], target_labels) assert_array_equal(ava_infos[0]['ann']['gt_bboxes'], np.array([[0.031, 0.162, 0.67, 0.995]])) assert_array_equal(ava_infos[0]['ann']['entity_ids'], np.array([0])) ava_dataset = AVADataset(self.ann_file, None, self.pipeline, data_prefix=self.data_prefix, proposal_file=self.proposal_file) ava_infos = ava_dataset.video_infos assert len(ava_infos) == 3 ava_dataset = AVADataset(self.ann_file, None, self.pipeline, test_mode=True, data_prefix=self.data_prefix, proposal_file=self.proposal_file) ava_infos = ava_dataset.video_infos assert len(ava_infos) == 3 ava_dataset = AVADataset(self.ann_file, None, self.pipeline, test_mode=True, data_prefix=self.data_prefix, proposal_file=self.proposal_file)
def test_resize(self): with pytest.raises(ValueError): # scale must be positive Resize(-0.5) with pytest.raises(TypeError): # scale must be tuple of int Resize('224') target_keys = [ 'imgs', 'img_shape', 'keep_ratio', 'scale_factor', 'modality' ] # test resize for flow images imgs = list(np.random.rand(2, 240, 320)) results = dict(imgs=imgs, modality='Flow') resize = Resize(scale=(160, 80), keep_ratio=False) resize_results = resize(results) assert assert_dict_has_keys(resize_results, target_keys) assert np.all(resize_results['scale_factor'] == np.array( [.5, 1. / 3.], dtype=np.float32)) assert resize_results['img_shape'] == (80, 160) # scale with -1 to indicate np.inf imgs = list(np.random.rand(2, 240, 320, 3)) results = dict(imgs=imgs, modality='RGB') results['gt_bboxes'] = np.array([[0, 0, 320, 240]]) results['proposals'] = None resize = Resize(scale=(-1, 256), keep_ratio=True) resize_results = resize(results) assert assert_dict_has_keys(resize_results, target_keys) assert np.all(resize_results['scale_factor'] == np.array( [341 / 320, 256 / 240], dtype=np.float32)) assert resize_results['img_shape'] == (256, 341) # scale with a normal tuple (320, 320) to indicate np.inf imgs = list(np.random.rand(2, 240, 320, 3)) results = dict(imgs=imgs, modality='RGB') resize = Resize(scale=(320, 320), keep_ratio=False) resize_results = resize(results) assert assert_dict_has_keys(resize_results, target_keys) assert np.all(resize_results['scale_factor'] == np.array( [1, 320 / 240], dtype=np.float32)) assert resize_results['img_shape'] == (320, 320) # scale with a normal tuple (341, 256) to indicate np.inf imgs = list(np.random.rand(2, 240, 320, 3)) results = dict(imgs=imgs, modality='RGB') resize = Resize(scale=(341, 256), keep_ratio=False) resize_results = resize(results) assert assert_dict_has_keys(resize_results, target_keys) assert np.all(resize_results['scale_factor'] == np.array( [341 / 320, 256 / 240], dtype=np.float32)) assert resize_results['img_shape'] == (256, 341) assert repr(resize) == (resize.__class__.__name__ + f'(scale={(341, 256)}, keep_ratio={False}, ' + f'interpolation=bilinear, lazy={False})')
def test_random_crop_lazy(self): with pytest.raises(TypeError): # size must be an int RandomCrop(size=(112, 112), lazy=True) with pytest.raises(AssertionError): # "size > height" or "size > width" is not allowed imgs = list(np.random.rand(2, 224, 341, 3)) results = dict(imgs=imgs) random_crop = RandomCrop(size=320, lazy=True) random_crop(results) target_keys = ['imgs', 'crop_bbox', 'img_shape', 'lazy'] # General case imgs = list(np.random.rand(2, 224, 341, 3)) results = dict(imgs=imgs) random_crop = RandomCrop(size=224, lazy=True) random_crop_result = random_crop(results) assert assert_dict_has_keys(random_crop_result, target_keys) assert id(imgs) == id(random_crop_result['imgs']) random_crop_result_fuse = Fuse()(random_crop_result) assert 'lazy' not in random_crop_result_fuse assert check_crop(imgs, random_crop_result_fuse['imgs'], results['crop_bbox']) h, w = random_crop_result_fuse['img_shape'] assert h == w == 224 # Test the case that no need for cropping imgs = list(np.random.rand(2, 224, 224, 3)) results = dict(imgs=imgs) random_crop = RandomCrop(size=224, lazy=True) random_crop_result = random_crop(results) assert assert_dict_has_keys(random_crop_result, target_keys) assert id(imgs) == id(random_crop_result['imgs']) random_crop_result_fuse = Fuse()(random_crop_result) assert 'lazy' not in random_crop_result_fuse assert check_crop(imgs, random_crop_result_fuse['imgs'], results['crop_bbox']) h, w = random_crop_result_fuse['img_shape'] assert h == w == 224 # Test the one-side-equal case imgs = list(np.random.rand(2, 224, 225, 3)) results = dict(imgs=imgs) random_crop = RandomCrop(size=224, lazy=True) random_crop_result = random_crop(results) assert assert_dict_has_keys(random_crop_result, target_keys) assert id(imgs) == id(random_crop_result['imgs']) random_crop_result_fuse = Fuse()(random_crop_result) assert 'lazy' not in random_crop_result_fuse assert check_crop(imgs, random_crop_result_fuse['imgs'], results['crop_bbox']) h, w = random_crop_result_fuse['img_shape'] assert h == w == 224 assert repr(random_crop) == (f'{random_crop.__class__.__name__}' f'(size={224}, lazy={True})')
def test_base_head(): head = ExampleHead(3, 400, dict(type='CrossEntropyLoss')) cls_scores = torch.rand((3, 4)) # When truth is non-empty then cls loss should be nonzero for random inputs gt_labels = torch.LongTensor([2] * 3).squeeze() losses = head.loss(cls_scores, gt_labels) assert 'loss_cls' in losses.keys() assert losses.get('loss_cls') > 0, 'cls loss should be non-zero' head = ExampleHead(3, 400, dict(type='CrossEntropyLoss', loss_weight=2.0)) cls_scores = torch.rand((3, 4)) # When truth is non-empty then cls loss should be nonzero for random inputs gt_labels = torch.LongTensor([2] * 3).squeeze() losses = head.loss(cls_scores, gt_labels) assert_dict_has_keys(losses, ['loss_cls']) assert losses.get('loss_cls') > 0, 'cls loss should be non-zero' # Test Soft label with batch size > 1 cls_scores = torch.rand((3, 3)) gt_labels = torch.LongTensor([[2] * 3]) gt_one_hot_labels = F.one_hot(gt_labels, num_classes=3).squeeze() losses = head.loss(cls_scores, gt_one_hot_labels) assert 'loss_cls' in losses.keys() assert losses.get('loss_cls') > 0, 'cls loss should be non-zero' # Test Soft label with batch size = 1 cls_scores = torch.rand((1, 3)) gt_labels = torch.LongTensor([2]) gt_one_hot_labels = F.one_hot(gt_labels, num_classes=3).squeeze() losses = head.loss(cls_scores, gt_one_hot_labels) assert 'loss_cls' in losses.keys() assert losses.get('loss_cls') > 0, 'cls loss should be non-zero' # test multi-class & label smoothing head = ExampleHead(3, 400, dict(type='BCELossWithLogits'), multi_class=True, label_smooth_eps=0.1) # batch size > 1 cls_scores = torch.rand((2, 3)) gt_labels = torch.LongTensor([[1, 0, 1], [0, 1, 0]]).squeeze() losses = head.loss(cls_scores, gt_labels) assert 'loss_cls' in losses.keys() assert losses.get('loss_cls') > 0, 'cls loss should be non-zero' # batch size = 1 cls_scores = torch.rand((1, 3)) gt_labels = torch.LongTensor([[1, 0, 1]]).squeeze() losses = head.loss(cls_scores, gt_labels) assert 'loss_cls' in losses.keys() assert losses.get('loss_cls') > 0, 'cls loss should be non-zero'
def test_opencv_decode(self): target_keys = ['frame_inds', 'imgs', 'original_shape'] # test OpenCV with 2 dim input when start_index = 0 video_result = copy.deepcopy(self.video_results) video_result['frame_inds'] = np.arange(0, self.total_frames, 2)[:, np.newaxis] opencv_init = OpenCVInit() opencv_init_result = opencv_init(video_result) video_result['video_reader'] = opencv_init_result['video_reader'] opencv_decode = OpenCVDecode() opencv_decode_result = opencv_decode(video_result) assert assert_dict_has_keys(opencv_decode_result, target_keys) assert opencv_decode_result['original_shape'] == (256, 340) assert np.shape(opencv_decode_result['imgs']) == (len( video_result['frame_inds']), 256, 340, 3) # test OpenCV with 2 dim input video_result = copy.deepcopy(self.video_results) video_result['frame_inds'] = np.arange(1, self.total_frames, 2)[:, np.newaxis] opencv_init = OpenCVInit() opencv_init_result = opencv_init(video_result) video_result['video_reader'] = opencv_init_result['video_reader'] opencv_decode = OpenCVDecode() opencv_decode_result = opencv_decode(video_result) assert assert_dict_has_keys(opencv_decode_result, target_keys) assert opencv_decode_result['original_shape'] == (256, 340) assert np.shape(opencv_decode_result['imgs']) == (len( video_result['frame_inds']), 256, 340, 3) # test OpenCV with 1 dim input when start_index = 0 video_result = copy.deepcopy(self.video_results) video_result['frame_inds'] = np.arange(0, self.total_frames, 3) opencv_init = OpenCVInit() opencv_init_result = opencv_init(video_result) video_result['video_reader'] = opencv_init_result['video_reader'] # test OpenCV with 1 dim input video_result = copy.deepcopy(self.video_results) video_result['frame_inds'] = np.arange(1, self.total_frames, 3) opencv_init = OpenCVInit() opencv_init_result = opencv_init(video_result) video_result['video_reader'] = opencv_init_result['video_reader'] opencv_decode = OpenCVDecode() opencv_decode_result = opencv_decode(video_result) assert assert_dict_has_keys(opencv_decode_result, target_keys) assert opencv_decode_result['original_shape'] == (256, 340) assert np.shape(opencv_decode_result['imgs']) == (len( video_result['frame_inds']), 256, 340, 3)
def test_random_crop(): with pytest.raises(TypeError): # size must be an int RandomCrop(size=(112, 112)) with pytest.raises(AssertionError): # "size > height" or "size > width" is not allowed imgs = list(np.random.rand(2, 224, 341, 3)) results = dict(imgs=imgs) random_crop = RandomCrop(size=320) random_crop(results) target_keys = ['imgs', 'crop_bbox', 'img_shape'] # General case imgs = list(np.random.rand(2, 224, 341, 3)) results = dict(imgs=imgs) random_crop = RandomCrop(size=224) results['gt_bboxes'] = np.array([[0, 0, 340, 224]]) results['proposals'] = np.array([[0, 0, 340, 224]]) kp = np.array([[160, 120], [160, 120]]).reshape([1, 1, 2, 2]) results['keypoint'] = kp random_crop_result = random_crop(results) assert assert_dict_has_keys(random_crop_result, target_keys) assert check_crop(imgs, random_crop_result['imgs'], results['crop_bbox']) h, w = random_crop_result['img_shape'] assert h == w == 224 # Test the case that no need for cropping imgs = list(np.random.rand(2, 224, 224, 3)) results = dict(imgs=imgs) random_crop = RandomCrop(size=224) random_crop_result = random_crop(results) assert assert_dict_has_keys(random_crop_result, target_keys) assert check_crop(imgs, random_crop_result['imgs'], results['crop_bbox']) h, w = random_crop_result['img_shape'] assert h == w == 224 # Test the one-side-equal case imgs = list(np.random.rand(2, 224, 225, 3)) results = dict(imgs=imgs) random_crop = RandomCrop(size=224) random_crop_result = random_crop(results) assert assert_dict_has_keys(random_crop_result, target_keys) assert check_crop(imgs, random_crop_result['imgs'], results['crop_bbox']) h, w = random_crop_result['img_shape'] assert h == w == 224 assert repr(random_crop) == (f'{random_crop.__class__.__name__}' f'(size={224}, lazy={False})')
def test_resize_lazy(self): with pytest.raises(ValueError): # scale must be positive Resize(-0.5, lazy=True) with pytest.raises(TypeError): # scale must be tuple of int Resize('224', lazy=True) target_keys = [ 'imgs', 'img_shape', 'keep_ratio', 'scale_factor', 'modality' ] # scale with -1 to indicate np.inf imgs = list(np.random.rand(2, 240, 320, 3)) results = dict(imgs=imgs, modality='RGB') resize = Resize(scale=(-1, 256), keep_ratio=True, lazy=True) resize_results = resize(results) assert id(imgs) == id(resize_results['imgs']) assert assert_dict_has_keys(resize_results, target_keys) resize_results_fuse = Fuse()(resize_results) assert np.all(resize_results_fuse['scale_factor'] == np.array( [341 / 320, 256 / 240], dtype=np.float32)) assert resize_results_fuse['img_shape'] == (256, 341) # scale with a normal tuple (320, 320) to indicate np.inf imgs = list(np.random.rand(2, 240, 320, 3)) results = dict(imgs=imgs, modality='RGB') resize = Resize(scale=(320, 320), keep_ratio=False, lazy=True) resize_results = resize(results) assert id(imgs) == id(resize_results['imgs']) assert assert_dict_has_keys(resize_results, target_keys) resize_results_fuse = Fuse()(resize_results) assert np.all(resize_results_fuse['scale_factor'] == np.array( [1, 320 / 240], dtype=np.float32)) assert resize_results_fuse['img_shape'] == (320, 320) # scale with a normal tuple (341, 256) to indicate np.inf imgs = list(np.random.rand(2, 240, 320, 3)) results = dict(imgs=imgs, modality='RGB') resize = Resize(scale=(341, 256), keep_ratio=False, lazy=True) resize_results = resize(results) assert id(imgs) == id(resize_results['imgs']) assert assert_dict_has_keys(resize_results, target_keys) resize_results_fuse = Fuse()(resize_results) assert np.all(resize_results_fuse['scale_factor'] == np.array( [341 / 320, 256 / 240], dtype=np.float32)) assert resize_results_fuse['img_shape'] == (256, 341) assert repr(resize) == (f'{resize.__class__.__name__ }' f'(scale={(341, 256)}, keep_ratio={False}, ' + f'interpolation=bilinear, lazy={True})')
def test_random_resized_crop(): with pytest.raises(TypeError): # area_range must be a tuple of float RandomResizedCrop(area_range=0.5) with pytest.raises(TypeError): # aspect_ratio_range must be a tuple of float RandomResizedCrop(area_range=(0.08, 1.0), aspect_ratio_range=0.1) target_keys = ['imgs', 'crop_bbox', 'img_shape'] # There will be a slight difference because of rounding eps = 0.01 imgs = list(np.random.rand(2, 256, 341, 3)) results = dict(imgs=imgs) results['gt_bboxes'] = np.array([[0, 0, 340, 256]]) results['proposals'] = np.array([[0, 0, 340, 256]]) kp = np.array([[160, 120], [160, 120]]).reshape([1, 1, 2, 2]) results['keypoint'] = kp with pytest.raises(AssertionError): # area_range[0] > area_range[1], which is wrong random_crop = RandomResizedCrop(area_range=(0.9, 0.7)) random_crop(results) with pytest.raises(AssertionError): # 0 > area_range[0] and area_range[1] > 1, which is wrong random_crop = RandomResizedCrop(aspect_ratio_range=(-0.1, 2.0)) random_crop(results) random_crop = RandomResizedCrop() random_crop_result = random_crop(results) assert assert_dict_has_keys(random_crop_result, target_keys) assert check_crop(imgs, random_crop_result['imgs'], results['crop_bbox']) h, w = random_crop_result['img_shape'] assert ((0.08 - eps <= h * w / 256 / 341) and (h * w / 256 / 341 <= 1 + eps)) assert (3. / 4. - eps <= h / w) and (h / w - eps <= 4. / 3.) assert repr(random_crop) == (f'{random_crop.__class__.__name__}' f'(area_range={(0.08, 1.0)}, ' f'aspect_ratio_range={(3 / 4, 4 / 3)}, ' f'lazy={False})') random_crop = RandomResizedCrop(area_range=(0.9, 0.9), aspect_ratio_range=(10.0, 10.1)) # Test fallback cases by very big area range imgs = list(np.random.rand(2, 256, 341, 3)) results = dict(imgs=imgs) random_crop_result = random_crop(results) assert assert_dict_has_keys(random_crop_result, target_keys) assert check_crop(imgs, random_crop_result['imgs'], results['crop_bbox']) h, w = random_crop_result['img_shape'] assert h == w == 256
def test_pytorchvideo_trans(): with pytest.raises(AssertionError): # transforms not supported in pytorchvideo PytorchVideoTrans(type='BlaBla') with pytest.raises(AssertionError): # This trans exists in pytorchvideo but not supported in MMAction2 PytorchVideoTrans(type='MixUp') target_keys = ['imgs'] imgs = list(np.random.randint(0, 256, (4, 32, 32, 3)).astype(np.uint8)) results = dict(imgs=imgs) # test AugMix augmix = PytorchVideoTrans(type='AugMix') results = augmix(results) assert assert_dict_has_keys(results, target_keys) assert (all(img.shape == (32, 32, 3) for img in results['imgs'])) # test RandAugment rand_augment = PytorchVideoTrans(type='RandAugment') results = rand_augment(results) assert assert_dict_has_keys(results, target_keys) assert (all(img.shape == (32, 32, 3) for img in results['imgs'])) # test RandomResizedCrop random_resized_crop = PytorchVideoTrans( type='RandomResizedCrop', target_height=16, target_width=16, scale=(0.1, 1.), aspect_ratio=(0.8, 1.2)) results = random_resized_crop(results) assert assert_dict_has_keys(results, target_keys) assert (all(img.shape == (16, 16, 3) for img in results['imgs'])) # test ShortSideScale short_side_scale = PytorchVideoTrans(type='ShortSideScale', size=24) results = short_side_scale(results) assert assert_dict_has_keys(results, target_keys) assert (all(img.shape == (24, 24, 3) for img in results['imgs'])) # test ShortSideScale random_short_side_scale = PytorchVideoTrans( type='RandomShortSideScale', min_size=24, max_size=36) results = random_short_side_scale(results) target_shape = results['imgs'][0].shape assert 36 >= target_shape[0] >= 24 assert assert_dict_has_keys(results, target_keys) assert (all(img.shape == target_shape for img in results['imgs']))
def test_flip_lazy(self): with pytest.raises(ValueError): Flip(direction='vertically', lazy=True) target_keys = ['imgs', 'flip_direction', 'modality'] # do not flip imgs. imgs = list(np.random.rand(2, 64, 64, 3)) imgs_tmp = imgs.copy() results = dict(imgs=imgs_tmp, modality='RGB') flip = Flip(flip_ratio=0, direction='horizontal', lazy=True) flip_results = flip(results) assert id(imgs_tmp) == id(flip_results['imgs']) assert assert_dict_has_keys(flip_results, target_keys) flip_results_fuse = Fuse()(flip_results) assert np.equal(imgs, results['imgs']).all() assert id(flip_results['imgs']) == id(results['imgs']) assert flip_results_fuse['imgs'][0].shape == (64, 64, 3) # always flip imgs horizontally. imgs = list(np.random.rand(2, 64, 64, 3)) imgs_tmp = imgs.copy() results = dict(imgs=imgs_tmp, modality='RGB') flip = Flip(flip_ratio=1, direction='horizontal', lazy=True) flip_results = flip(results) assert id(imgs_tmp) == id(flip_results['imgs']) assert assert_dict_has_keys(flip_results, target_keys) flip_results_fuse = Fuse()(flip_results) assert check_flip(imgs, flip_results['imgs'], flip_results['flip_direction']) assert id(flip_results['imgs']) == id(results['imgs']) assert flip_results_fuse['imgs'][0].shape == (64, 64, 3) # always flip imgs vertivally. imgs = list(np.random.rand(2, 64, 64, 3)) imgs_tmp = imgs.copy() results = dict(imgs=imgs_tmp, modality='RGB') flip = Flip(flip_ratio=1, direction='vertical', lazy=True) flip_results = flip(results) assert id(imgs_tmp) == id(flip_results['imgs']) assert assert_dict_has_keys(flip_results, target_keys) flip_results_fuse = Fuse()(flip_results) assert check_flip(imgs, flip_results['imgs'], flip_results['flip_direction']) assert id(flip_results['imgs']) == id(results['imgs']) assert flip_results_fuse['imgs'][0].shape == (64, 64, 3) assert repr(flip) == (f'{flip.__class__.__name__}' f'(flip_ratio={1}, direction=vertical, ' f'flip_label_map={None}, lazy={True})')
def test_pims_init(self): target_keys = ['video_reader', 'total_frames'] video_result = copy.deepcopy(self.video_results) pims_init = PIMSInit() pims_init_result = pims_init(video_result) assert assert_dict_has_keys(pims_init_result, target_keys) assert pims_init_result['total_frames'] == 300 pims_init = PIMSInit(mode='efficient') pims_init_result = pims_init(video_result) assert assert_dict_has_keys(pims_init_result, target_keys) assert pims_init_result['total_frames'] == 300 assert repr(pims_init) == (f'{pims_init.__class__.__name__}' f'(io_backend=disk, mode=efficient)')
def test_random_resized_crop_lazy(self): target_keys = ['imgs', 'crop_bbox', 'img_shape', 'lazy'] # There will be a slight difference because of rounding eps = 0.01 imgs = list(np.random.rand(2, 256, 341, 3)) results = dict(imgs=imgs) with pytest.raises(AssertionError): # area_range[0] > area_range[1], which is wrong random_crop = RandomResizedCrop(area_range=(0.9, 0.7), lazy=True) random_crop(results) with pytest.raises(AssertionError): # 0 > area_range[0] and area_range[1] > 1, which is wrong random_crop = RandomResizedCrop(aspect_ratio_range=(-0.1, 2.0), lazy=True) random_crop(results) random_crop = RandomResizedCrop(lazy=True) random_crop_result = random_crop(results) assert assert_dict_has_keys(random_crop_result, target_keys) assert id(imgs) == id(random_crop_result['imgs']) random_crop_result_fuse = Fuse()(random_crop_result) assert check_crop(imgs, random_crop_result_fuse['imgs'], results['crop_bbox']) h, w = random_crop_result['img_shape'] assert ((0.08 - eps <= h * w / 256 / 341) and (h * w / 256 / 341 <= 1 + eps)) assert (3. / 4. - eps <= h / w) and (h / w - eps <= 4. / 3.) assert repr(random_crop) == (f'{random_crop.__class__.__name__}' f'(area_range={(0.08, 1.0)}, ' f'aspect_ratio_range={(3 / 4, 4 / 3)}, ' f'lazy={True})') random_crop = RandomResizedCrop(area_range=(0.9, 0.9), aspect_ratio_range=(10.0, 10.1), lazy=True) # Test fallback cases by very big area range imgs = np.random.rand(2, 256, 341, 3) results = dict(imgs=imgs) random_crop_result = random_crop(results) assert assert_dict_has_keys(random_crop_result, target_keys) assert id(imgs) == id(random_crop_result['imgs']) random_crop_result_fuse = Fuse()(random_crop_result) assert check_crop(imgs, random_crop_result_fuse['imgs'], results['crop_bbox']) h, w = random_crop_result['img_shape'] assert h == w == 256
def test_ten_crop(self): with pytest.raises(TypeError): # crop_size must be int or tuple of int TenCrop(0.5) with pytest.raises(TypeError): # crop_size must be int or tuple of int TenCrop('224') with pytest.raises(TypeError): # crop_size must be int or tuple of int TenCrop([224, 224]) # ten crop with crop_size 256 imgs = list(np.random.rand(2, 256, 256, 3)) results = dict(imgs=imgs) ten_crop = TenCrop(crop_size=224) ten_crop_results = ten_crop(results) target_keys = ['imgs', 'crop_bbox', 'img_shape'] assert assert_dict_has_keys(ten_crop_results, target_keys) assert check_crop(imgs, ten_crop_results['imgs'], ten_crop_results['crop_bbox'], 10) assert ten_crop_results['img_shape'] == (224, 224) assert repr(ten_crop) == (f'{ten_crop.__class__.__name__}' f'(crop_size={(224, 224)})')
def test_format_shape(): with pytest.raises(ValueError): # invalid input format FormatShape('NHWC') # 'NCHW' input format results = dict( imgs=np.random.randn(3, 224, 224, 3), num_clips=1, clip_len=3) format_shape = FormatShape('NCHW') assert format_shape(results)['input_shape'] == (3, 3, 224, 224) # `NCTHW` input format with num_clips=1, clip_len=3 results = dict( imgs=np.random.randn(3, 224, 224, 3), num_clips=1, clip_len=3) format_shape = FormatShape('NCTHW') assert format_shape(results)['input_shape'] == (1, 3, 3, 224, 224) # `NCTHW` input format with num_clips=2, clip_len=3 results = dict( imgs=np.random.randn(18, 224, 224, 3), num_clips=2, clip_len=3) assert format_shape(results)['input_shape'] == (6, 3, 3, 224, 224) target_keys = ['imgs', 'input_shape'] assert assert_dict_has_keys(results, target_keys) assert repr(format_shape) == format_shape.__class__.__name__ + \ "(input_format='NCTHW')" # 'NPTCHW' input format results = dict( imgs=np.random.randn(72, 224, 224, 3), num_clips=9, clip_len=1, num_proposals=8) format_shape = FormatShape('NPTCHW') assert format_shape(results)['input_shape'] == (8, 9, 3, 224, 224)
def test_multi_group_crop(): with pytest.raises(TypeError): # crop_size must be int or tuple of int MultiGroupCrop(0.5, 1) with pytest.raises(TypeError): # crop_size must be int or tuple of int MultiGroupCrop('224', 1) with pytest.raises(TypeError): # groups must be int MultiGroupCrop(224, '1') with pytest.raises(ValueError): # groups must be positive MultiGroupCrop(224, 0) target_keys = ['imgs', 'crop_bbox', 'img_shape'] # multi_group_crop with crop_size 224, groups 3 imgs = list(np.random.rand(2, 256, 341, 3)) results = dict(imgs=imgs) multi_group_crop = MultiGroupCrop(224, 3) multi_group_crop_result = multi_group_crop(results) assert assert_dict_has_keys(multi_group_crop_result, target_keys) assert check_crop(imgs, multi_group_crop_result['imgs'], multi_group_crop_result['crop_bbox'], multi_group_crop.groups) assert multi_group_crop_result['img_shape'] == (224, 224) assert repr(multi_group_crop) == ( f'{multi_group_crop.__class__.__name__}' f'(crop_size={(224, 224)}, groups={3})')
def test_center_crop(): with pytest.raises(TypeError): # crop_size must be int or tuple of int CenterCrop(0.5) with pytest.raises(TypeError): # crop_size must be int or tuple of int CenterCrop('224') # center crop with crop_size 224 # add kps in test_center_crop imgs = list(np.random.rand(2, 240, 320, 3)) results = dict(imgs=imgs) kp = np.array([[160, 120], [160, 120]]).reshape([1, 1, 2, 2]) results['keypoint'] = kp results['gt_bboxes'] = np.array([[0, 0, 320, 240]]) results['proposals'] = np.array([[0, 0, 320, 240]]) center_crop = CenterCrop(crop_size=224) center_crop_results = center_crop(results) target_keys = ['imgs', 'crop_bbox', 'img_shape', 'keypoint'] assert assert_dict_has_keys(center_crop_results, target_keys) assert check_crop(imgs, center_crop_results['imgs'], center_crop_results['crop_bbox']) assert np.all( center_crop_results['crop_bbox'] == np.array([48, 8, 272, 232])) assert center_crop_results['img_shape'] == (224, 224) assert np.all(center_crop_results['keypoint'] == 112) assert repr(center_crop) == (f'{center_crop.__class__.__name__}' f'(crop_size={(224, 224)}, lazy={False})')
def test_center_crop_lazy(self): with pytest.raises(TypeError): # crop_size must be int or tuple of int CenterCrop(0.5) with pytest.raises(TypeError): # crop_size must be int or tuple of int CenterCrop('224') with pytest.raises(TypeError): # crop_size must be int or tuple of int CenterCrop([224, 224]) # center crop with crop_size 224 imgs = list(np.random.rand(2, 240, 320, 3)) results = dict(imgs=imgs) center_crop = CenterCrop(crop_size=224, lazy=True) center_crop_results = center_crop(results) target_keys = ['imgs', 'crop_bbox', 'img_shape'] assert assert_dict_has_keys(center_crop_results, target_keys) center_crop_results_fuse = Fuse()(center_crop_results) assert check_crop(imgs, center_crop_results_fuse['imgs'], center_crop_results['crop_bbox']) assert np.all(center_crop_results_fuse['crop_bbox'] == np.array( [48, 8, 272, 232])) assert center_crop_results_fuse['img_shape'] == (224, 224) assert repr(center_crop) == (f'{center_crop.__class__.__name__}' f'(crop_size={(224, 224)}, lazy={True})')
def test_color_jitter(): imgs = list( np.random.randint(0, 255, size=(3, 112, 112, 3), dtype=np.uint8)) results = dict(imgs=imgs) color_jitter = ColorJitter() assert color_jitter.brightness == (0.5, 1.5) assert color_jitter.contrast == (0.5, 1.5) assert color_jitter.saturation == (0.5, 1.5) assert color_jitter.hue == (-0.1, 0.1) color_jitter_results = color_jitter(results) target_keys = ['imgs'] assert assert_dict_has_keys(color_jitter_results, target_keys) assert np.shape(color_jitter_results['imgs']) == (3, 112, 112, 3) for img in color_jitter_results['imgs']: assert np.all(img >= 0) assert np.all(img <= 255) assert repr(color_jitter) == (f'{color_jitter.__class__.__name__}(' f'brightness={(0.5, 1.5)}, ' f'contrast={(0.5, 1.5)}, ' f'saturation={(0.5, 1.5)}, ' f'hue={-0.1, 0.1})')
def test_load_audio_feature(self): target_keys = ['audios'] inputs = copy.deepcopy(self.audio_feature_results) load_audio_feature = LoadAudioFeature() results = load_audio_feature(inputs) assert assert_dict_has_keys(results, target_keys) # test when no audio feature file exists inputs = copy.deepcopy(self.audio_feature_results) inputs['audio_path'] = 'foo/foo/bar.npy' load_audio_feature = LoadAudioFeature() results = load_audio_feature(inputs) assert results['audios'].shape == (640, 80) assert assert_dict_has_keys(results, target_keys) assert repr(load_audio_feature) == ( f'{load_audio_feature.__class__.__name__}(' f'pad_method=zero)')
def test_proposal_pipeline(self): target_keys = [ 'frame_dir', 'video_id', 'total_frames', 'gts', 'proposals', 'filename_tmpl', 'modality', 'out_proposals', 'reg_targets', 'proposal_scale_factor', 'proposal_labels', 'proposal_type', 'start_index' ] # SSN Dataset not in test mode proposal_dataset = SSNDataset( self.proposal_ann_file, self.proposal_pipeline, self.proposal_train_cfg, self.proposal_test_cfg, data_prefix=self.data_prefix) result = proposal_dataset[0] assert assert_dict_has_keys(result, target_keys) # SSN Dataset with random sampling proposals proposal_dataset = SSNDataset( self.proposal_ann_file, self.proposal_pipeline, self.proposal_train_cfg, self.proposal_test_cfg, data_prefix=self.data_prefix, video_centric=False) result = proposal_dataset[0] assert assert_dict_has_keys(result, target_keys) target_keys = [ 'frame_dir', 'video_id', 'total_frames', 'gts', 'proposals', 'filename_tmpl', 'modality', 'relative_proposal_list', 'scale_factor_list', 'proposal_tick_list', 'reg_norm_consts', 'start_index' ] # SSN Dataset in test mode proposal_dataset = SSNDataset( self.proposal_ann_file, self.proposal_test_pipeline, self.proposal_train_cfg, self.proposal_test_cfg, data_prefix=self.data_prefix, test_mode=True) result = proposal_dataset[0] assert assert_dict_has_keys(result, target_keys)
def test_pyav_init(self): target_keys = ['video_reader', 'total_frames'] video_result = copy.deepcopy(self.video_results) pyav_init = PyAVInit() pyav_init_result = pyav_init(video_result) assert assert_dict_has_keys(pyav_init_result, target_keys) assert pyav_init_result['total_frames'] == 300 assert repr( pyav_init) == f'{pyav_init.__class__.__name__}(io_backend=disk)'