def test_dataset_wrapper(): CustomDataset.load_annotations = MagicMock() CustomDataset.__getitem__ = MagicMock(side_effect=lambda idx: idx) dataset_a = CustomDataset( ann_file=MagicMock(), pipeline=[], test_mode=True, img_prefix='') len_a = 10 cat_ids_list_a = [ np.random.randint(0, 80, num).tolist() for num in np.random.randint(1, 20, len_a) ] dataset_a.data_infos = MagicMock() dataset_a.data_infos.__len__.return_value = len_a dataset_a.get_cat_ids = MagicMock( side_effect=lambda idx: cat_ids_list_a[idx]) dataset_b = CustomDataset( ann_file=MagicMock(), pipeline=[], test_mode=True, img_prefix='') len_b = 20 cat_ids_list_b = [ np.random.randint(0, 80, num).tolist() for num in np.random.randint(1, 20, len_b) ] dataset_b.data_infos = MagicMock() dataset_b.data_infos.__len__.return_value = len_b dataset_b.get_cat_ids = MagicMock( side_effect=lambda idx: cat_ids_list_b[idx]) concat_dataset = ConcatDataset([dataset_a, dataset_b]) assert concat_dataset[5] == 5 assert concat_dataset[25] == 15 assert concat_dataset.get_cat_ids(5) == cat_ids_list_a[5] assert concat_dataset.get_cat_ids(25) == cat_ids_list_b[15] assert len(concat_dataset) == len(dataset_a) + len(dataset_b) repeat_dataset = RepeatDataset(dataset_a, 10) assert repeat_dataset[5] == 5 assert repeat_dataset[15] == 5 assert repeat_dataset[27] == 7 assert repeat_dataset.get_cat_ids(5) == cat_ids_list_a[5] assert repeat_dataset.get_cat_ids(15) == cat_ids_list_a[5] assert repeat_dataset.get_cat_ids(27) == cat_ids_list_a[7] assert len(repeat_dataset) == 10 * len(dataset_a) category_freq = defaultdict(int) for cat_ids in cat_ids_list_a: cat_ids = set(cat_ids) for cat_id in cat_ids: category_freq[cat_id] += 1 for k, v in category_freq.items(): category_freq[k] = v / len(cat_ids_list_a) mean_freq = np.mean(list(category_freq.values())) repeat_thr = mean_freq category_repeat = { cat_id: max(1.0, math.sqrt(repeat_thr / cat_freq)) for cat_id, cat_freq in category_freq.items() } repeat_factors = [] for cat_ids in cat_ids_list_a: cat_ids = set(cat_ids) repeat_factor = max({category_repeat[cat_id] for cat_id in cat_ids}) repeat_factors.append(math.ceil(repeat_factor)) repeat_factors_cumsum = np.cumsum(repeat_factors) repeat_factor_dataset = ClassBalancedDataset(dataset_a, repeat_thr) assert len(repeat_factor_dataset) == repeat_factors_cumsum[-1] for idx in np.random.randint(0, len(repeat_factor_dataset), 3): assert repeat_factor_dataset[idx] == bisect.bisect_right( repeat_factors_cumsum, idx) img_scale = (60, 60) dynamic_scale = (80, 80) pipeline = [ dict(type='Mosaic', img_scale=img_scale, pad_val=114.0), dict( type='RandomAffine', scaling_ratio_range=(0.1, 2), border=(-img_scale[0] // 2, -img_scale[1] // 2)), dict( type='MixUp', img_scale=img_scale, ratio_range=(0.8, 1.6), pad_val=114.0), dict(type='RandomFlip', flip_ratio=0.5), dict(type='Resize', keep_ratio=True), dict(type='Pad', pad_to_square=True, pad_val=114.0), ] CustomDataset.load_annotations = MagicMock() results = [] for _ in range(2): height = np.random.randint(10, 30) weight = np.random.randint(10, 30) img = np.ones((height, weight, 3)) gt_bbox = np.concatenate([ np.random.randint(1, 5, (2, 2)), np.random.randint(1, 5, (2, 2)) + 5 ], axis=1) gt_labels = np.random.randint(0, 80, 2) results.append(dict(gt_bboxes=gt_bbox, gt_labels=gt_labels, img=img)) CustomDataset.__getitem__ = MagicMock(side_effect=lambda idx: results[idx]) dataset_a = CustomDataset( ann_file=MagicMock(), pipeline=[], test_mode=True, img_prefix='') len_a = 2 cat_ids_list_a = [ np.random.randint(0, 80, num).tolist() for num in np.random.randint(1, 20, len_a) ] dataset_a.data_infos = MagicMock() dataset_a.data_infos.__len__.return_value = len_a dataset_a.get_cat_ids = MagicMock( side_effect=lambda idx: cat_ids_list_a[idx]) multi_image_mix_dataset = MultiImageMixDataset(dataset_a, pipeline, dynamic_scale) for idx in range(len_a): results_ = multi_image_mix_dataset[idx] assert results_['img'].shape == (dynamic_scale[0], dynamic_scale[1], 3) # test skip_type_keys multi_image_mix_dataset = MultiImageMixDataset( dataset_a, pipeline, dynamic_scale, skip_type_keys=('MixUp', 'RandomFlip', 'Resize', 'Pad')) for idx in range(len_a): results_ = multi_image_mix_dataset[idx] assert results_['img'].shape == (img_scale[0], img_scale[1], 3)
def test_dataset_evaluation(): tmp_dir = tempfile.TemporaryDirectory() # create dummy data fake_json_file = osp.join(tmp_dir.name, 'fake_data.json') _create_dummy_coco_json(fake_json_file) # test single coco dataset evaluation coco_dataset = CocoDataset(ann_file=fake_json_file, classes=('car', ), pipeline=[]) fake_results = _create_dummy_results() eval_results = coco_dataset.evaluate(fake_results, classwise=True) assert eval_results['bbox_mAP'] == 1 assert eval_results['bbox_mAP_50'] == 1 assert eval_results['bbox_mAP_75'] == 1 # test concat dataset evaluation fake_concat_results = _create_dummy_results() + _create_dummy_results() # build concat dataset through two config dict coco_cfg = dict(type='CocoDataset', ann_file=fake_json_file, classes=('car', ), pipeline=[]) concat_cfgs = [coco_cfg, coco_cfg] concat_dataset = build_dataset(concat_cfgs) eval_results = concat_dataset.evaluate(fake_concat_results) assert eval_results['0_bbox_mAP'] == 1 assert eval_results['0_bbox_mAP_50'] == 1 assert eval_results['0_bbox_mAP_75'] == 1 assert eval_results['1_bbox_mAP'] == 1 assert eval_results['1_bbox_mAP_50'] == 1 assert eval_results['1_bbox_mAP_75'] == 1 # build concat dataset through concatenated ann_file coco_cfg = dict(type='CocoDataset', ann_file=[fake_json_file, fake_json_file], classes=('car', ), pipeline=[]) concat_dataset = build_dataset(coco_cfg) eval_results = concat_dataset.evaluate(fake_concat_results) assert eval_results['0_bbox_mAP'] == 1 assert eval_results['0_bbox_mAP_50'] == 1 assert eval_results['0_bbox_mAP_75'] == 1 assert eval_results['1_bbox_mAP'] == 1 assert eval_results['1_bbox_mAP_50'] == 1 assert eval_results['1_bbox_mAP_75'] == 1 # create dummy data fake_pkl_file = osp.join(tmp_dir.name, 'fake_data.pkl') _create_dummy_custom_pkl(fake_pkl_file) # test single custom dataset evaluation custom_dataset = CustomDataset(ann_file=fake_pkl_file, classes=('car', ), pipeline=[]) fake_results = _create_dummy_results() eval_results = custom_dataset.evaluate(fake_results) assert eval_results['mAP'] == 1 # test concat dataset evaluation fake_concat_results = _create_dummy_results() + _create_dummy_results() # build concat dataset through two config dict custom_cfg = dict(type='CustomDataset', ann_file=fake_pkl_file, classes=('car', ), pipeline=[]) concat_cfgs = [custom_cfg, custom_cfg] concat_dataset = build_dataset(concat_cfgs) eval_results = concat_dataset.evaluate(fake_concat_results) assert eval_results['0_mAP'] == 1 assert eval_results['1_mAP'] == 1 # build concat dataset through concatenated ann_file concat_cfg = dict(type='CustomDataset', ann_file=[fake_pkl_file, fake_pkl_file], classes=('car', ), pipeline=[]) concat_dataset = build_dataset(concat_cfg) eval_results = concat_dataset.evaluate(fake_concat_results) assert eval_results['0_mAP'] == 1 assert eval_results['1_mAP'] == 1 # build concat dataset through explicit type concat_cfg = dict(type='ConcatDataset', datasets=[custom_cfg, custom_cfg], separate_eval=False) concat_dataset = build_dataset(concat_cfg) eval_results = concat_dataset.evaluate(fake_concat_results, metric='mAP') assert eval_results['mAP'] == 1 assert len(concat_dataset.datasets[0].data_infos) == \ len(concat_dataset.datasets[1].data_infos) assert len(concat_dataset.datasets[0].data_infos) == 1 tmp_dir.cleanup()
def test_dataset_wrapper(): CustomDataset.load_annotations = MagicMock() CustomDataset.__getitem__ = MagicMock(side_effect=lambda idx: idx) dataset_a = CustomDataset( ann_file=MagicMock(), pipeline=[], test_mode=True, img_prefix='') len_a = 10 cat_ids_list_a = [ np.random.randint(0, 80, num).tolist() for num in np.random.randint(1, 20, len_a) ] dataset_a.data_infos = MagicMock() dataset_a.data_infos.__len__.return_value = len_a dataset_a.get_cat_ids = MagicMock( side_effect=lambda idx: cat_ids_list_a[idx]) dataset_b = CustomDataset( ann_file=MagicMock(), pipeline=[], test_mode=True, img_prefix='') len_b = 20 cat_ids_list_b = [ np.random.randint(0, 80, num).tolist() for num in np.random.randint(1, 20, len_b) ] dataset_b.data_infos = MagicMock() dataset_b.data_infos.__len__.return_value = len_b dataset_b.get_cat_ids = MagicMock( side_effect=lambda idx: cat_ids_list_b[idx]) concat_dataset = ConcatDataset([dataset_a, dataset_b]) assert concat_dataset[5] == 5 assert concat_dataset[25] == 15 assert concat_dataset.get_cat_ids(5) == cat_ids_list_a[5] assert concat_dataset.get_cat_ids(25) == cat_ids_list_b[15] assert len(concat_dataset) == len(dataset_a) + len(dataset_b) repeat_dataset = RepeatDataset(dataset_a, 10) assert repeat_dataset[5] == 5 assert repeat_dataset[15] == 5 assert repeat_dataset[27] == 7 assert repeat_dataset.get_cat_ids(5) == cat_ids_list_a[5] assert repeat_dataset.get_cat_ids(15) == cat_ids_list_a[5] assert repeat_dataset.get_cat_ids(27) == cat_ids_list_a[7] assert len(repeat_dataset) == 10 * len(dataset_a) category_freq = defaultdict(int) for cat_ids in cat_ids_list_a: cat_ids = set(cat_ids) for cat_id in cat_ids: category_freq[cat_id] += 1 for k, v in category_freq.items(): category_freq[k] = v / len(cat_ids_list_a) mean_freq = np.mean(list(category_freq.values())) repeat_thr = mean_freq category_repeat = { cat_id: max(1.0, math.sqrt(repeat_thr / cat_freq)) for cat_id, cat_freq in category_freq.items() } repeat_factors = [] for cat_ids in cat_ids_list_a: cat_ids = set(cat_ids) repeat_factor = max({category_repeat[cat_id] for cat_id in cat_ids}) repeat_factors.append(math.ceil(repeat_factor)) repeat_factors_cumsum = np.cumsum(repeat_factors) repeat_factor_dataset = ClassBalancedDataset(dataset_a, repeat_thr) assert len(repeat_factor_dataset) == repeat_factors_cumsum[-1] for idx in np.random.randint(0, len(repeat_factor_dataset), 3): assert repeat_factor_dataset[idx] == bisect.bisect_right( repeat_factors_cumsum, idx)
def main(): parser = argparse.ArgumentParser( description="Train a model with data parallel for base net \ and model parallel for classify net.") #parser.add_argument('--batch-size', type=int, default=256, # help='training batch size of all devices.') #parser.add_argument('--epochs', type=int, default=1000, # help='number of training epochs.') #parser.add_argument('--log-frequence', type=int, default=10, # help='log frequence, default is 400') parser.add_argument('--gpus', type=str, default='0', help='gpus, default is 0') parser.add_argument('--fb_cfg', type=str, help='fbnet_buildconfig') parser.add_argument('--model_cfg', type=str, help='fbnet_buildconfig') parser.add_argument('--speed_txt', type=str, help='block_time') args = parser.parse_args() search_cfg = mmcv_config.fromfile(args.fb_cfg) _space = search_cfg.search_space model_cfg = mmcv_config.fromfile(args.model_cfg) # # dataset settings classes = ['background', 'face'] min_scale = 0 w_data, t_data = split_data('./data/newlibraf_info/train_imglist', './newlibraf_info/newlibraf_face', classes, min_scale) img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) w_data_cfg = dict(train=dict( # ann_file='/home1/zhaoyu/dataset/car_w.pkl', ann_file=w_data, img_prefix="./data/", img_scale=(1000, 216), img_norm_cfg=img_norm_cfg, size_divisor=32, flip_ratio=0.5, with_mask=False, with_crowd=True, with_label=True)) t_data_cfg = dict(train=dict( # ann_file='/home1/zhaoyu/dataset/car_t.pkl', ann_file=t_data, img_prefix="./data/", img_scale=(1000, 216), img_norm_cfg=img_norm_cfg, size_divisor=32, flip_ratio=0.5, with_mask=False, with_crowd=True, with_label=True)) gpus = [int(x) for x in args.gpus.split(",")] w_dataset = CustomDataset(**w_data_cfg['train']) w_dataset = build_dataloader(w_dataset, imgs_per_gpu=16, workers_per_gpu=4, dist=False, num_gpus=len(gpus)) t_dataset = CustomDataset(**t_data_cfg['train']) t_dataset = build_dataloader(t_dataset, imgs_per_gpu=16, workers_per_gpu=2, dist=False, num_gpus=len(gpus)) det = detection(mmcv_config(model_cfg['model_cfg']), mmcv_config(model_cfg['train_cfg']), mmcv_config(model_cfg['test_cfg']), _space, speed_txt=args.speed_txt) print(det) save_result_path = "./theta/" + args.fb_cfg.split( '/')[-1][:-3] + '_' + args.model_cfg.split('/')[-1][:-3] if not os.path.exists(save_result_path): os.makedirs(save_result_path) searcher = fbnet_search(det, gpus, imgs_per_gpu=8, weight_opt_dict={ 'type': 'SGD', 'lr': 0.01, 'momentum': 0.9, 'weight_decay': 0.001 }, theta_opt_dict={ 'type': 'Adam', 'lr': 0.01, 'betas': (0.9, 0.99), 'weight_decay': 5e-4 }, weight_lr_sche={ 'logger': _logger, 'T_max': 400, 'alpha': 1e-4, 'warmup_step': 1000, 't_mul': 1.5, 'lr_mul': 0.95, }, alpha=0.1, save_result_path=save_result_path) searcher.search(train_w_ds=w_dataset, train_t_ds=t_dataset, epoch=100, start_w_epoch=5, log_frequence=10)