def load_annotations(self, ann_file, LT_ann_file=None): self.lvis = LVIS(ann_file) self.cat_ids = self.lvis.get_cat_ids() if self.CLASSES is None: self.CLASSES = self.cat_ids self.cat2label = { cat_id: i + 1 for i, cat_id in enumerate(self.cat_ids) } self.categories = self.cat_ids if LT_ann_file is not None: self.img_ids = [] for LT_ann_file in LT_ann_file: self.img_ids += mmcv.list_from_file(LT_ann_file) self.img_ids = [ int(x) for x in self.img_ids] else: self.img_ids = self.lvis.get_img_ids() img_infos = [] for i in self.img_ids: info = self.lvis.load_imgs([i])[0] info['filename'] = info['file_name'][-16:] img_infos.append(info) return img_infos
def __init__(self, ann_file): self.lvis = LVIS(ann_file) CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant', 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat', 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket', 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush')##use CLASSES[self.cat2label[35]] to find the class name self.cat_ids = self.lvis.get_cat_ids() # self.cat2label = { # cat_id: i + 1 # for i, cat_id in enumerate(self.cat_ids) # } self.img_ids = self.lvis.get_img_ids() img_infos = [] for i in self.img_ids: info = self.lvis.load_imgs([i])[0] info['filename'] = info['file_name'] img_infos.append(info) self.img_infos = img_infos # self.img_prefix = './data/lvis/val2017' self.img_prefix = './data/lvis/train2017' self.filter_to_keep_finetune_classes()
def __init__(self, lvis_gt, lvis_dt=None, img_dir=None, dpi=75): """Constructor for LVISVis. Args: lvis_gt (LVIS class instance, or str containing path of annotation file) lvis_dt (LVISResult class instance, or str containing path of result file, or list of dict) img_dir (str): path of folder containing all images. If None, the image to be displayed will be downloaded to the current working dir. dpi (int): dpi for figure size setup """ self.logger = logging.getLogger(__name__) if isinstance(lvis_gt, LVIS): self.lvis_gt = lvis_gt elif isinstance(lvis_gt, str): self.lvis_gt = LVIS(lvis_gt) else: raise TypeError("Unsupported type {} of lvis_gt.".format(lvis_gt)) if lvis_dt is not None: if isinstance(lvis_dt, LVISResults): self.lvis_dt = lvis_dt elif isinstance(lvis_dt, (str, list)): self.lvis_dt = LVISResults(self.lvis_gt, lvis_dt) else: raise TypeError("Unsupported type {} of lvis_dt.".format(lvis_dt)) else: self.lvis_dt = None self.dpi = dpi self.img_dir = img_dir if img_dir else '.' if self.img_dir == '.': self.logger.warn("img_dir not specified. Images will be downloaded.")
def __init__(self, dataset_name, cfg, distributed, output_dir=None): self._distributed = distributed self._output_dir = output_dir self._cpu_device = torch.device("cpu") self._logger = logging.getLogger(__name__) self._metadata = MetadataCatalog.get(dataset_name) json_file = PathManager.get_local_path(self._metadata.json_file) self._oid_api = LVIS(json_file) # Test set json files do not contain annotations (evaluation must be # performed using the LVIS evaluation server). self._do_evaluation = len(self._oid_api.get_ann_ids()) > 0 self._mask_on = cfg.MODEL.MASK_ON
def get_split2_wt(): train_ann_file = './data/lvis/lvis_v0.5_train.json' val_ann_file = './data/lvis/lvis_v0.5_val.json' # For training set lvis_train = LVIS(train_ann_file) # lvis_val = LVIS(val_ann_file) train_catsinfo = lvis_train.cats # val_catsinfo = lvis_val.cats bin300 = [] binover = [] for cid, cate in train_catsinfo.items(): ins_count = cate['instance_count'] if ins_count < 300: bin300.append(cid) else: binover.append(cid) splits = {} splits['(0, 300)'] = np.array(bin300, dtype=np.int) splits['[300, ~)'] = np.array(binover, dtype=np.int) splits['normal'] = np.arange(1, 1231) splits['background'] = np.zeros((1, ), dtype=np.int) splits['all'] = np.arange(1231) split_file_name = './data/lvis/2bins300/valsplit.pkl' with open(split_file_name, 'wb') as f: pickle.dump(splits, f)
def get_cate_weight(): train_ann_file = './data/lvis/lvis_v0.5_train.json' lvis_train = LVIS(train_ann_file) train_catsinfo = lvis_train.cats ins_count_all = np.zeros((1231, ), dtype=np.float) for cid, cate in train_catsinfo.items(): ins_count_all[cid] = cate['instance_count'] ins_count_all[0] = 1 tmp = np.ones_like(ins_count_all) weight = tmp / ins_count_all weight_mean = weight[1:].mean() weight = weight / weight_mean weight[0] = 1 # pdb.set_trace() weight = np.where(weight > 5, 5, weight) weight = np.where(weight < 0.1, 0.1, weight) savebin = torch.from_numpy(weight) save_path = './data/lvis/cls_weight.pt' torch.save(savebin, save_path)
def get_dense_det(): train_ann_file = './data/lvis/lvis_v0.5_val.json' lvis_train = LVIS(train_ann_file) img_ann_map = lvis_train.img_ann_map set20 = set() set40 = set() set300 = set() for k, v in img_ann_map.items(): ins_num = len(img_ann_map[k]) if ins_num >= 20: set20.add(k) if ins_num >= 40: set40.add(k) if ins_num >= 300: set300.add(k) with open(train_ann_file, 'r') as fin: data = json.load(fin) # data: ['images', 'info', 'annotations', 'categories', 'licenses'] ann20 = [] ann40 = [] for ann in data['annotations']: if ann['image_id'] in set20: ann20.append(ann) if ann['image_id'] in set40: ann40.append(ann) img20 = [] img40 = [] for im in data['images']: if im['id'] in set20: img20.append(im) if im['id'] in set40: img40.append(im) data_20 = { 'images': img20, 'info': data['info'], 'annotations': ann20, 'categories': data['categories'], 'licenses': data['licenses'] } save_path = './data/lvis/lvis_v0.5_val_20.json' with open(save_path, 'w') as fout: json.dump(data_20, fout) data_40 = { 'images': img40, 'info': data['info'], 'annotations': ann40, 'categories': data['categories'], 'licenses': data['licenses'] } save_path = './data/lvis/lvis_v0.5_val_40.json' with open(save_path, 'w') as fout: json.dump(data_40, fout)
def del_tail(): train_ann_file = './data/lvis/lvis_v0.5_val.json' # val_ann_file = './data/lvis/lvis_v0.5_val.json' # For training set lvis_train = LVIS(train_ann_file) train_catsinfo = lvis_train.cats cats_head = set() for cid, cate in train_catsinfo.items(): ins_count = cate['instance_count'] if ins_count > 100: cats_head.add(cid) with open(train_ann_file, 'r') as fin: traindata = json.load(fin) new_ann = [] new_img_set = set() for ann in traindata['annotations']: cid = ann['category_id'] if cid in cats_head: new_ann.append(ann) new_img_set.add(ann['image_id']) new_images = [] for img in traindata['images']: if img['id'] in new_img_set: not_exhaustive_category_ids = img['not_exhaustive_category_ids'] new_not = [] for cc in not_exhaustive_category_ids: if cc in cats_head: new_not.append(cc) img['not_exhaustive_category_ids'] = new_not neg_category_ids = img['neg_category_ids'] new_neg = [] for cc in neg_category_ids: if cc in cats_head: new_neg.append(cc) img['neg_category_ids'] = new_neg new_images.append(img) new_cats = [] for cat in traindata['categories']: if cat['id'] in cats_head: new_cats.append(cat) no_tail_data = { 'images': new_images, 'annotations': new_ann, 'categories': new_cats, 'info': traindata['info'], 'licenses': traindata['licenses'] } save_file = './data/lvis/lvis_v0.5_val_headonly.json' with open(save_file, 'w') as fout: json.dump(no_tail_data, fout)
def get_cate_gs8(): train_ann_file = './data/lvis/lvis_v0.5_train.json' lvis_train = LVIS(train_ann_file) train_catsinfo = lvis_train.cats binlabel_count = [1, 1, 1, 1, 1, 1, 1, 1, 1] label2binlabel = np.zeros((9, 1231), dtype=np.int) label2binlabel[0, 1:] = binlabel_count[0] binlabel_count[0] += 1 for cid, cate in train_catsinfo.items(): ins_count = cate['instance_count'] if ins_count < 5: label2binlabel[1, cid] = binlabel_count[1] binlabel_count[1] += 1 elif ins_count < 10: label2binlabel[2, cid] = binlabel_count[2] binlabel_count[2] += 1 elif ins_count < 50: label2binlabel[3, cid] = binlabel_count[3] binlabel_count[3] += 1 elif ins_count < 100: label2binlabel[4, cid] = binlabel_count[4] binlabel_count[4] += 1 elif ins_count < 500: label2binlabel[5, cid] = binlabel_count[5] binlabel_count[5] += 1 elif ins_count < 1000: label2binlabel[6, cid] = binlabel_count[6] binlabel_count[6] += 1 elif ins_count < 5000: label2binlabel[7, cid] = binlabel_count[7] binlabel_count[7] += 1 else: label2binlabel[8, cid] = binlabel_count[8] binlabel_count[8] += 1 savebin = torch.from_numpy(label2binlabel) save_path = './data/lvis/8bins/label2binlabel.pt' torch.save(savebin, save_path) # start and length pred_slice = np.zeros((9, 2), dtype=np.int) start_idx = 0 for i, bincount in enumerate(binlabel_count): pred_slice[i, 0] = start_idx pred_slice[i, 1] = bincount start_idx += bincount savebin = torch.from_numpy(pred_slice) save_path = './data/lvis/8bins/pred_slice_with0.pt' torch.save(savebin, save_path) pdb.set_trace()
def get_draw_val_imgs(): train_ann_file = './data/lvis/lvis_v0.5_train.json' val_ann_file = './data/lvis/lvis_v0.5_val.json' # For training set lvis_train = LVIS(train_ann_file) lvis_val = LVIS(val_ann_file) train_catsinfo = lvis_train.cats # val_catsinfo = lvis_val.cats val_img_ann = lvis_val.img_ann_map bin100 = set() for cid, cate in train_catsinfo.items(): ins_count = cate['instance_count'] if ins_count < 20: bin100.add(cid) print('bin100----------', bin100) print(len(bin100)) with open(val_ann_file, 'r') as fin: data = json.load(fin) draw_val = { 'info': data['info'], 'annotations': data['annotations'], 'categories': data['categories'], 'licenses': data['licenses'] } imglist = [] for im in data['images']: id = im['id'] catids = set([v['category_id'] for v in val_img_ann[id]]) if len(catids & bin100) > 0: imglist.append(im) draw_val['images'] = imglist print('-------------', len(imglist)) with open('./data/lvis/draw_val.json', 'w') as fout: json.dump(draw_val, fout)
def del_nondense_cls(): train_ann_file = './data/lvis/dense/dense_lvis_v0.5_train.json' val_ann_file = './data/lvis/dense/dense_lvis_v0.5_val.json' with open(train_ann_file, 'r') as fin: train_old = json.load(fin) with open(val_ann_file, 'r') as fin: val = json.load(fin) lvisval = LVIS(val_ann_file) # valcats = lvisval.cats # pdb.set_trace() cid_set = set() for ann in val['annotations']: cid_set.add(ann['category_id']) img_set = set() ann_new_train = [] for ann in train_old['annotations']: if ann['category_id'] in cid_set: ann_new_train.append(ann) img_set.add(ann['image_id']) img_new_train = [] for img in train_old['images']: if img['id'] in img_set: img_new_train.append(img) new_cats = [] for cat in train_old['categories']: if cat['id'] in cid_set: new_cats.append(cat) save_train = { 'images': img_new_train, 'annotations': ann_new_train, 'categories': new_cats, 'info': train_old['info'], 'licenses': train_old['licenses'] } save_val = val save_val['categories'] = new_cats pdb.set_trace() save_dir = './data/lvis/densenew/' if not os.path.exists(save_dir): os.mkdir(save_dir) save_path = os.path.join(save_dir, 'dense_lvis_v0.5_train.json') with open(save_path, 'w') as fout: json.dump(save_train, fout) save_path = os.path.join(save_dir, 'dense_lvis_v0.5_val.json') with open(save_path, 'w') as fout: json.dump(save_val, fout)
def ana_param(): cate2insnum_file = './data/lvis/cate2insnum.pkl' if False: # os.path.exists(cate2insnum_file): with open(cate2insnum_file, 'rb') as f: cate2insnum = pickle.load(f) else: train_ann_file = './data/lvis/lvis_v0.5_train.json' val_ann_file = './data/lvis/lvis_v0.5_val.json' lvis_train = LVIS(train_ann_file) lvis_val = LVIS(val_ann_file) train_catsinfo = lvis_train.cats val_catsinfo = lvis_val.cats train_cat2ins = [ v['instance_count'] for k, v in train_catsinfo.items() ] train_cat2ins = [0] + train_cat2ins val_cat2ins = [v['instance_count'] for k, v in val_catsinfo.items()] val_cat2ins = [0] + val_cat2ins cate2insnum = {} cate2insnum['train'] = np.array(train_cat2ins, dtype=np.int) cate2insnum['val'] = np.array(val_cat2ins, dtype=np.int) with open(cate2insnum_file, 'wb') as fout: pickle.dump(cate2insnum, fout) checkpoint_file = './work_dirs/faster_rcnn_r50_fpn_1x_lvis/latest.pth' checkpoint = torch.load(checkpoint_file, map_location='cpu') param = checkpoint['state_dict'] cls_fc_weight = param['bbox_head.fc_cls.weight'].numpy() cls_fc_bias = param['bbox_head.fc_cls.bias'].numpy() cls_fc_weight_norm = np.linalg.norm(cls_fc_weight, axis=1) savelist = [cls_fc_weight_norm, cls_fc_bias] with open('./data/lvis/r50_param_ana.pkl', 'wb') as fout: pickle.dump(savelist, fout)
def __init__(self, lvis_gt, lvis_dt, iou_type="segm"): """Constructor for LVISEval. Args: lvis_gt (LVIS class instance, or str containing path of annotation file) lvis_dt (LVISResult class instance, or str containing path of result file, or list of dict) iou_type (str): segm or bbox evaluation """ self.logger = logging.getLogger(__name__) if iou_type not in ["bbox", "segm"]: raise ValueError("iou_type: {} is not supported.".format(iou_type)) if isinstance(lvis_gt, LVIS): self.lvis_gt = lvis_gt elif isinstance(lvis_gt, str): self.lvis_gt = LVIS(lvis_gt) else: raise TypeError("Unsupported type {} of lvis_gt.".format(lvis_gt)) if isinstance(lvis_dt, LVISResults): self.lvis_dt = lvis_dt elif isinstance(lvis_dt, (str, list)): self.lvis_dt = LVISResults(self.lvis_gt, lvis_dt) else: raise TypeError("Unsupported type {} of lvis_dt.".format(lvis_dt)) # per-image per-category evaluation results self.eval_imgs = defaultdict(list) self.eval = {} # accumulated evaluation results self._gts = defaultdict(list) # gt for evaluation self._dts = defaultdict(list) # dt for evaluation self.params = Params(iou_type=iou_type) # parameters self.results = OrderedDict() self.ious = {} # ious between all gts and dts self.params.img_ids = sorted(self.lvis_gt.get_img_ids()) self.params.cat_ids = sorted(self.lvis_gt.get_cat_ids())
def lvis_eval(result_files, result_types, lvis, ann_file, max_dets=(100, 300, 1000), existing_json=None): ANNOTATION_PATH = ann_file print('gt: ', ANNOTATION_PATH) for res_type in result_types: assert res_type in [ 'proposal', 'proposal_fast', 'proposal_fast_percat', 'bbox', 'segm', 'keypoints' ] if mmcv.is_str(lvis): lvis = LVIS(lvis) assert isinstance(lvis, LVIS) if result_types == ['proposal_fast']: ar = lvis_fast_eval_recall(result_files, lvis, np.array(max_dets)) for i, num in enumerate(max_dets): print('AR@{}\t= {:.4f}'.format(num, ar[i])) return elif result_types == ['proposal_fast_percat']: assert existing_json is not None per_cat_recall = {} for cat_id in range(1, 1231): ar = lvis_fast_eval_recall(result_files, lvis, np.array(max_dets), category_id=cat_id) for i, num in enumerate(max_dets): per_cat_recall.update({cat_id: ar}) print('cat{} AR@{}\t= {:.4f}'.format(cat_id, num, ar[i])) pickle.dump(per_cat_recall, open('./{}_per_cat_recall.pt'.format(existing_json), 'wb')) return for res_type in result_types: result_file = result_files[res_type] assert result_file.endswith('.json') iou_type = 'bbox' if res_type == 'proposal' else res_type lvisEval = LVISEval(ANNOTATION_PATH, result_file, iou_type) # lvisEval.params.imgIds = img_ids if res_type == 'proposal': lvisEval.params.use_cats = 0 lvisEval.params.max_dets = list(max_dets) lvisEval.run() lvisEval.print_results()
def load_annotations(self, ann_file): self.lvis = LVIS(ann_file) self.full_cat_ids = self.lvis.get_cat_ids() self.full_cat2label = { cat_id: i + 1 for i, cat_id in enumerate(self.full_cat_ids) } self.CLASSES = tuple( [item['name'] for item in self.lvis.dataset['categories']]) self.cat_ids = self.lvis.get_cat_ids() self.cat2label = { cat_id: i + 1 for i, cat_id in enumerate(self.cat_ids) } self.img_ids = self.lvis.get_img_ids() img_infos = [] for i in self.img_ids: info = self.lvis.load_imgs([i])[0] info['filename'] = info['file_name'].split('_')[-1] img_infos.append(info) return img_infos
def get_hist(): train_ann_file = './data/lvis/lvis_v0.5_val.json' lvis_train = LVIS(train_ann_file) img_ann_map = lvis_train.img_ann_map hist = dict() for k, v in img_ann_map.items(): ins_num = len(img_ann_map[k]) if ins_num not in hist: hist[ins_num] = 1 else: hist[ins_num] += 1 with open('tempdata/lvis_hist_val.pkl', 'wb') as fout: pickle.dump(hist, fout)
def get_mask(): train_ann_file = './data/lvis/lvis_v0.5_train.json' # For training set lvis_train = LVIS(train_ann_file) train_catsinfo = lvis_train.cats mask = np.zeros((1231, ), dtype=np.int) for cid, cate in train_catsinfo.items(): ins_count = cate['instance_count'] if ins_count < 100: mask[cid] = 1 mask_torch = torch.from_numpy(mask) torch.save(mask_torch, './data/lvis/mask.pt')
def count_ins(): train_ann_file = './data/lvis/dense_v3/train.json' # train_ann_file = './data/lvis/dense_v3/val.json' # For training set lvis_train = LVIS(train_ann_file) train_catsinfo = lvis_train.cats counts = {} for cid, cate in train_catsinfo.items(): ins_count = cate['instance_count'] counts[cid] = ins_count pdb.set_trace() with open('tempdata/dense_train_catins.pkl', 'wb') as fout: pickle.dump(counts, fout)
def get_cate_gs2_wt(): train_ann_file = './data/lvis/lvis_v0.5_train.json' lvis_train = LVIS(train_ann_file) train_catsinfo = lvis_train.cats binlabel_count = [1, 1, 1] label2binlabel = np.zeros((3, 1231), dtype=np.int) label2binlabel[0, 1:] = binlabel_count[0] binlabel_count[0] += 1 for cid, cate in train_catsinfo.items(): ins_count = cate['instance_count'] if ins_count < 300: label2binlabel[1, cid] = binlabel_count[1] binlabel_count[1] += 1 else: label2binlabel[2, cid] = binlabel_count[2] binlabel_count[2] += 1 savebin = torch.from_numpy(label2binlabel) save_path = './data/lvis/2bins300/label2binlabel.pt' torch.save(savebin, save_path) # start and length pred_slice = np.zeros((3, 2), dtype=np.int) start_idx = 0 for i, bincount in enumerate(binlabel_count): pred_slice[i, 0] = start_idx pred_slice[i, 1] = bincount start_idx += bincount savebin = torch.from_numpy(pred_slice) save_path = './data/lvis/2bins300/pred_slice_with0.pt' torch.save(savebin, save_path)
def get_cate_weight_bours(): train_ann_file = './data/lvis/lvis_v0.5_train.json' lvis_train = LVIS(train_ann_file) train_catsinfo = lvis_train.cats beta = 0.999 ins_count_all = np.zeros((1231, ), dtype=np.float) for cid, cate in train_catsinfo.items(): ins_count_all[cid] = cate['instance_count'] ins_count_all[0] = 1 ins_count = ins_count_all[1:] tmp = np.ones_like(ins_count) weight = (tmp - beta) / (tmp - np.power(beta, ins_count)) weight_mean = np.mean(weight) weight = weight / weight_mean # weight_mean = weight[1:].mean() # weight = weight / weight_mean # weight[0] = 1 weight_all = np.ones((1231, ), dtype=np.float) weight_all[1:] = weight weight = weight_all pdb.set_trace() weight = np.where(weight > 5, 5, weight) weight = np.where(weight < 0.1, 0.1, weight) savebin = torch.from_numpy(weight) save_path = './data/lvis/cls_weight_bours.pt' torch.save(savebin, save_path)
def test_node_map(): train_ann_file = './data/lvis/lvis_v0.5_train.json' # For training set lvis_train = LVIS(train_ann_file) train_catsinfo = lvis_train.cats bin10 = [] bin100 = [] bin1000 = [] binover = [] for cid, cate in train_catsinfo.items(): ins_count = cate['instance_count'] if ins_count < 10: bin10.append(cid) elif ins_count < 100: bin100.append(cid) elif ins_count < 1000: bin1000.append(cid) else: binover.append(cid) splits = {} splits['(0, 10)'] = np.array(bin10, dtype=np.int) splits['[10, 100)'] = np.array(bin100, dtype=np.int) splits['[100, 1000)'] = np.array(bin1000, dtype=np.int) splits['[1000, ~)'] = np.array(binover, dtype=np.int) splits['normal'] = np.arange(1, 1231) splits['background'] = np.zeros((1, ), dtype=np.int) splits['all'] = np.arange(1231) split_file_name = './data/lvis/valsplit.pkl' with open(split_file_name, 'wb') as f: pickle.dump(splits, f)
def get_bin_weight(): train_ann_file = './data/lvis/lvis_v0.5_train.json' lvis_train = LVIS(train_ann_file) train_catsinfo = lvis_train.cats ins_count_all = np.zeros((1231, ), dtype=np.float) for cid, cate in train_catsinfo.items(): ins_count_all[cid] = cate['instance_count'] ins_count_all[0] = 1 tmp = np.ones_like(ins_count_all) weight = tmp / ins_count_all label2binlabel = torch.load('./data/lvis/label2binlabel.pt').cpu().numpy() pdb.set_trace() bins = label2binlabel[1:, :] allws = [] for i in range(1, label2binlabel.shape[0]): bin = label2binlabel[i] idx = np.where(bin > 0) binw = weight[idx] binw_mean = binw.mean() binw = binw / binw_mean binw = np.where(binw > 5, 5, binw) binw = np.where(binw < 0.1, 0.1, binw) binw = np.hstack((np.ones(1, ), binw)) allws.append(binw) with open('./data/lvis/bins_cls_weight.pkl', 'wb') as fout: pickle.dump(allws, fout)
class LVISEval: def __init__(self, lvis_gt, lvis_dt, iou_type="segm"): """Constructor for LVISEval. Args: lvis_gt (LVIS class instance, or str containing path of annotation file) lvis_dt (LVISResult class instance, or str containing path of result file, or list of dict) iou_type (str): segm or bbox evaluation """ # self.logger = logging.getLogger(__name__) if iou_type not in ["bbox", "segm"]: raise ValueError("iou_type: {} is not supported.".format(iou_type)) if isinstance(lvis_gt, LVIS): self.lvis_gt = lvis_gt elif isinstance(lvis_gt, str): self.lvis_gt = LVIS(lvis_gt) else: raise TypeError("Unsupported type {} of lvis_gt.".format(lvis_gt)) if isinstance(lvis_dt, LVISResults): self.lvis_dt = lvis_dt elif isinstance(lvis_dt, (str, list)): self.lvis_dt = LVISResults(self.lvis_gt, lvis_dt) else: raise TypeError("Unsupported type {} of lvis_dt.".format(lvis_dt)) # per-image per-category evaluation results self.eval_imgs = defaultdict(list) self.eval = {} # accumulated evaluation results self._gts = defaultdict(list) # gt for evaluation self._dts = defaultdict(list) # dt for evaluation self.params = Params(iou_type=iou_type) # parameters self.results = OrderedDict() self.ious = {} # ious between all gts and dts self.params.img_ids = sorted(self.lvis_gt.get_img_ids()) self.params.cat_ids = sorted(self.lvis_gt.get_cat_ids()) def _to_mask(self, anns, lvis): for ann in anns: rle = lvis.ann_to_rle(ann) ann["segmentation"] = rle def _prepare(self): """Prepare self._gts and self._dts for evaluation based on params.""" cat_ids = self.params.cat_ids if self.params.cat_ids else None gts = self.lvis_gt.load_anns( self.lvis_gt.get_ann_ids(img_ids=self.params.img_ids, cat_ids=cat_ids)) dts = self.lvis_dt.load_anns( self.lvis_dt.get_ann_ids(img_ids=self.params.img_ids, cat_ids=cat_ids)) # convert ground truth to mask if iou_type == 'segm' if self.params.iou_type == "segm": self._to_mask(gts, self.lvis_gt) self._to_mask(dts, self.lvis_dt) # set ignore flag for gt in gts: if "ignore" not in gt: gt["ignore"] = 0 for gt in gts: self._gts[gt["image_id"], gt["category_id"]].append(gt) # For federated dataset evaluation we will filter out all dt for an # image which belong to categories not present in gt and not present in # the negative list for an image. In other words detector is not penalized # for categories about which we don't have gt information about their # presence or absence in an image. img_data = self.lvis_gt.load_imgs(ids=self.params.img_ids) # per image map of categories not present in image img_nl = {d["id"]: d["neg_category_ids"] for d in img_data} # per image list of categories present in image img_pl = defaultdict(set) for ann in gts: img_pl[ann["image_id"]].add(ann["category_id"]) # per image map of categoires which have missing gt. For these # categories we don't penalize the detector for flase positives. self.img_nel = { d["id"]: d["not_exhaustive_category_ids"] for d in img_data } for dt in dts: img_id, cat_id = dt["image_id"], dt["category_id"] if cat_id not in img_nl[img_id] and cat_id not in img_pl[img_id]: continue self._dts[img_id, cat_id].append(dt) self.freq_groups = self._prepare_freq_group() def _prepare_freq_group(self): freq_groups = [[] for _ in self.params.img_count_lbl] cat_data = self.lvis_gt.load_cats(self.params.cat_ids) for idx, _cat_data in enumerate(cat_data): frequency = _cat_data["frequency"] freq_groups[self.params.img_count_lbl.index(frequency)].append(idx) return freq_groups def evaluate(self): """ Run per image evaluation on given images and store results (a list of dict) in self.eval_imgs. """ print("Running per image evaluation.") print("Evaluate annotation type *{}*".format(self.params.iou_type)) self.params.img_ids = list(np.unique(self.params.img_ids)) if self.params.use_cats: cat_ids = self.params.cat_ids else: cat_ids = [-1] self._prepare() self.ious = {(img_id, cat_id): self.compute_iou(img_id, cat_id) for img_id in self.params.img_ids for cat_id in cat_ids} # loop through images, area range, max detection number self.eval_imgs = [ self.evaluate_img(img_id, cat_id, area_rng) for cat_id in cat_ids for area_rng in self.params.area_rng for img_id in self.params.img_ids ] def _get_gt_dt(self, img_id, cat_id): """Create gt, dt which are list of anns/dets. If use_cats is true only anns/dets corresponding to tuple (img_id, cat_id) will be used. Else, all anns/dets in image are used and cat_id is not used. """ if self.params.use_cats: gt = self._gts[img_id, cat_id] dt = self._dts[img_id, cat_id] else: gt = [ _ann for _cat_id in self.params.cat_ids for _ann in self._gts[img_id, cat_id] ] dt = [ _ann for _cat_id in self.params.cat_ids for _ann in self._dts[img_id, cat_id] ] return gt, dt def compute_iou(self, img_id, cat_id): gt, dt = self._get_gt_dt(img_id, cat_id) if len(gt) == 0 and len(dt) == 0: return [] # Sort detections in decreasing order of score. idx = np.argsort([-d["score"] for d in dt], kind="mergesort") dt = [dt[i] for i in idx] iscrowd = [int(False)] * len(gt) if self.params.iou_type == "segm": ann_type = "segmentation" elif self.params.iou_type == "bbox": ann_type = "bbox" else: raise ValueError("Unknown iou_type for iou computation.") gt = [g[ann_type] for g in gt] dt = [d[ann_type] for d in dt] # compute iou between each dt and gt region # will return array of shape len(dt), len(gt) ious = mask_utils.iou(dt, gt, iscrowd) return ious def evaluate_img(self, img_id, cat_id, area_rng): """Perform evaluation for single category and image.""" gt, dt = self._get_gt_dt(img_id, cat_id) if len(gt) == 0 and len(dt) == 0: return None # Add another filed _ignore to only consider anns based on area range. for g in gt: if g["ignore"] or (g["area"] < area_rng[0] or g["area"] > area_rng[1]): g["_ignore"] = 1 else: g["_ignore"] = 0 # Sort gt ignore last gt_idx = np.argsort([g["_ignore"] for g in gt], kind="mergesort") gt = [gt[i] for i in gt_idx] # Sort dt highest score first dt_idx = np.argsort([-d["score"] for d in dt], kind="mergesort") dt = [dt[i] for i in dt_idx] # load computed ious ious = (self.ious[img_id, cat_id][:, gt_idx] if len(self.ious[img_id, cat_id]) > 0 else self.ious[img_id, cat_id]) num_thrs = len(self.params.iou_thrs) num_gt = len(gt) num_dt = len(dt) # Array to store the "id" of the matched dt/gt gt_m = np.zeros((num_thrs, num_gt)) dt_m = np.zeros((num_thrs, num_dt)) gt_ig = np.array([g["_ignore"] for g in gt]) dt_ig = np.zeros((num_thrs, num_dt)) for iou_thr_idx, iou_thr in enumerate(self.params.iou_thrs): if len(ious) == 0: break for dt_idx, _dt in enumerate(dt): iou = min([iou_thr, 1 - 1e-10]) # information about best match so far (m=-1 -> unmatched) # store the gt_idx which matched for _dt m = -1 for gt_idx, _ in enumerate(gt): # if this gt already matched continue if gt_m[iou_thr_idx, gt_idx] > 0: continue # if _dt matched to reg gt, and on ignore gt, stop if m > -1 and gt_ig[m] == 0 and gt_ig[gt_idx] == 1: break # continue to next gt unless better match made if ious[dt_idx, gt_idx] < iou: continue # if match successful and best so far, store appropriately iou = ious[dt_idx, gt_idx] m = gt_idx # No match found for _dt, go to next _dt if m == -1: continue # if gt to ignore for some reason update dt_ig. # Should not be used in evaluation. dt_ig[iou_thr_idx, dt_idx] = gt_ig[m] # _dt match found, update gt_m, and dt_m with "id" dt_m[iou_thr_idx, dt_idx] = gt[m]["id"] gt_m[iou_thr_idx, m] = _dt["id"] # For LVIS we will ignore any unmatched detection if that category was # not exhaustively annotated in gt. dt_ig_mask = [ d["area"] < area_rng[0] or d["area"] > area_rng[1] or d["category_id"] in self.img_nel[d["image_id"]] for d in dt ] dt_ig_mask = np.array(dt_ig_mask).reshape((1, num_dt)) # 1 X num_dt dt_ig_mask = np.repeat(dt_ig_mask, num_thrs, 0) # num_thrs X num_dt # Based on dt_ig_mask ignore any unmatched detection by updating dt_ig dt_ig = np.logical_or(dt_ig, np.logical_and(dt_m == 0, dt_ig_mask)) # store results for given image and category return { "image_id": img_id, "category_id": cat_id, "area_rng": area_rng, "dt_ids": [d["id"] for d in dt], "gt_ids": [g["id"] for g in gt], "dt_matches": dt_m, "gt_matches": gt_m, "dt_scores": [d["score"] for d in dt], "gt_ignore": gt_ig, "dt_ignore": dt_ig, } def accumulate(self): """Accumulate per image evaluation results and store the result in self.eval. """ print("Accumulating evaluation results.") if not self.eval_imgs: print("Please run evaluate first.") if self.params.use_cats: cat_ids = self.params.cat_ids else: cat_ids = [-1] num_thrs = len(self.params.iou_thrs) num_recalls = len(self.params.rec_thrs) num_cats = len(cat_ids) num_area_rngs = len(self.params.area_rng) num_imgs = len(self.params.img_ids) # -1 for absent categories precision = -np.ones((num_thrs, num_recalls, num_cats, num_area_rngs)) recall = -np.ones((num_thrs, num_cats, num_area_rngs)) # Initialize dt_pointers dt_pointers = {} for cat_idx in range(num_cats): dt_pointers[cat_idx] = {} for area_idx in range(num_area_rngs): dt_pointers[cat_idx][area_idx] = {} # Per category evaluation for cat_idx in range(num_cats): Nk = cat_idx * num_area_rngs * num_imgs for area_idx in range(num_area_rngs): Na = area_idx * num_imgs E = [ self.eval_imgs[Nk + Na + img_idx] for img_idx in range(num_imgs) ] # Remove elements which are None E = [e for e in E if not e is None] if len(E) == 0: continue # Append all scores: shape (N,) dt_scores = np.concatenate([e["dt_scores"] for e in E], axis=0) dt_ids = np.concatenate([e["dt_ids"] for e in E], axis=0) dt_idx = np.argsort(-dt_scores, kind="mergesort") dt_scores = dt_scores[dt_idx] dt_ids = dt_ids[dt_idx] dt_m = np.concatenate([e["dt_matches"] for e in E], axis=1)[:, dt_idx] dt_ig = np.concatenate([e["dt_ignore"] for e in E], axis=1)[:, dt_idx] gt_ig = np.concatenate([e["gt_ignore"] for e in E]) # num gt anns to consider num_gt = np.count_nonzero(gt_ig == 0) if num_gt == 0: continue tps = np.logical_and(dt_m, np.logical_not(dt_ig)) fps = np.logical_and(np.logical_not(dt_m), np.logical_not(dt_ig)) tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float) fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float) dt_pointers[cat_idx][area_idx] = { "dt_ids": dt_ids, "tps": tps, "fps": fps, } for iou_thr_idx, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): tp = np.array(tp) fp = np.array(fp) num_tp = len(tp) rc = tp / num_gt if num_tp: recall[iou_thr_idx, cat_idx, area_idx] = rc[-1] else: recall[iou_thr_idx, cat_idx, area_idx] = 0 # np.spacing(1) ~= eps pr = tp / (fp + tp + np.spacing(1)) pr = pr.tolist() # Replace each precision value with the maximum precision # value to the right of that recall level. This ensures # that the calculated AP value will be less suspectable # to small variations in the ranking. for i in range(num_tp - 1, 0, -1): if pr[i] > pr[i - 1]: pr[i - 1] = pr[i] rec_thrs_insert_idx = np.searchsorted(rc, self.params.rec_thrs, side="left") pr_at_recall = [0.0] * num_recalls try: for _idx, pr_idx in enumerate(rec_thrs_insert_idx): pr_at_recall[_idx] = pr[pr_idx] except: pass precision[iou_thr_idx, :, cat_idx, area_idx] = np.array(pr_at_recall) self.eval = { "params": self.params, "counts": [num_thrs, num_recalls, num_cats, num_area_rngs], "date": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "precision": precision, "recall": recall, "dt_pointers": dt_pointers, } def _summarize(self, summary_type, iou_thr=None, area_rng="all", freq_group_idx=None): aidx = [ idx for idx, _area_rng in enumerate(self.params.area_rng_lbl) if _area_rng == area_rng ] if summary_type == 'ap': s = self.eval["precision"] if iou_thr is not None: tidx = np.where(iou_thr == self.params.iou_thrs)[0] s = s[tidx] if freq_group_idx is not None: s = s[:, :, self.freq_groups[freq_group_idx], aidx] else: s = s[:, :, :, aidx] else: s = self.eval["recall"] if iou_thr is not None: tidx = np.where(iou_thr == self.params.iou_thrs)[0] s = s[tidx] s = s[:, :, aidx] if len(s[s > -1]) == 0: mean_s = -1 else: mean_s = np.mean(s[s > -1]) return mean_s def summarize(self): """Compute and display summary metrics for evaluation results.""" if not self.eval: raise RuntimeError("Please run accumulate() first.") max_dets = self.params.max_dets self.results["AP"] = self._summarize('ap') self.results["AP50"] = self._summarize('ap', iou_thr=0.50) self.results["AP75"] = self._summarize('ap', iou_thr=0.75) self.results["APs"] = self._summarize('ap', area_rng="small") self.results["APm"] = self._summarize('ap', area_rng="medium") self.results["APl"] = self._summarize('ap', area_rng="large") self.results["APr"] = self._summarize('ap', freq_group_idx=0) self.results["APc"] = self._summarize('ap', freq_group_idx=1) self.results["APf"] = self._summarize('ap', freq_group_idx=2) key = "AR@{}".format(max_dets) self.results[key] = self._summarize('ar') for area_rng in ["small", "medium", "large"]: key = "AR{}@{}".format(area_rng[0], max_dets) self.results[key] = self._summarize('ar', area_rng=area_rng) def run(self): """Wrapper function which calculates the results.""" self.evaluate() self.accumulate() self.summarize() def print_results(self): template = " {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} catIds={:>3s}] = {:0.3f}" for key, value in self.results.items(): max_dets = self.params.max_dets if "AP" in key: title = "Average Precision" _type = "(AP)" else: title = "Average Recall" _type = "(AR)" if len(key) > 2 and key[2].isdigit(): iou_thr = (float(key[2:]) / 100) iou = "{:0.2f}".format(iou_thr) else: iou = "{:0.2f}:{:0.2f}".format(self.params.iou_thrs[0], self.params.iou_thrs[-1]) if len(key) > 2 and key[2] in ["r", "c", "f"]: cat_group_name = key[2] else: cat_group_name = "all" if len(key) > 2 and key[2] in ["s", "m", "l"]: area_rng = key[2] else: area_rng = "all" print( template.format(title, _type, iou, area_rng, max_dets, cat_group_name, value)) def get_results(self): if not self.results: self.logger.warn("results is empty. Call run().") return self.results
def __init__( self, lvis_gt, lvis_dt, iou_type="bbox", expand_pred_label=False, oid_hierarchy_path='./datasets/oid/annotations/challenge-2019-label500-hierarchy.json' ): """Constructor for OIDEval. Args: lvis_gt (LVIS class instance, or str containing path of annotation file) lvis_dt (LVISResult class instance, or str containing path of result file, or list of dict) iou_type (str): segm or bbox evaluation """ self.logger = logging.getLogger(__name__) if iou_type not in ["bbox", "segm"]: raise ValueError("iou_type: {} is not supported.".format(iou_type)) if isinstance(lvis_gt, LVIS): self.lvis_gt = lvis_gt elif isinstance(lvis_gt, str): self.lvis_gt = LVIS(lvis_gt) else: raise TypeError("Unsupported type {} of lvis_gt.".format(lvis_gt)) if isinstance(lvis_dt, LVISResults): self.lvis_dt = lvis_dt elif isinstance(lvis_dt, (str, list)): self.lvis_dt = LVISResults(self.lvis_gt, lvis_dt, max_dets=-1) else: raise TypeError("Unsupported type {} of lvis_dt.".format(lvis_dt)) if expand_pred_label: oid_hierarchy = json.load(open(oid_hierarchy_path, 'r')) cat_info = self.lvis_gt.dataset['categories'] freebase2id = {x['freebase_id']: x['id'] for x in cat_info} id2freebase = {x['id']: x['freebase_id'] for x in cat_info} id2name = {x['id']: x['name'] for x in cat_info} fas = defaultdict(set) def dfs(hierarchy, cur_id): all_childs = set() all_keyed_child = {} if 'Subcategory' in hierarchy: for x in hierarchy['Subcategory']: childs = dfs(x, freebase2id[x['LabelName']]) all_childs.update(childs) if cur_id != -1: for c in all_childs: fas[c].add(cur_id) all_childs.add(cur_id) return all_childs dfs(oid_hierarchy, -1) expanded_pred = [] id_count = 0 for d in self.lvis_dt.dataset['annotations']: cur_id = d['category_id'] ids = [cur_id] + [x for x in fas[cur_id]] for cat_id in ids: new_box = copy.deepcopy(d) id_count = id_count + 1 new_box['id'] = id_count new_box['category_id'] = cat_id expanded_pred.append(new_box) print('Expanding original {} preds to {} preds'.format( len(self.lvis_dt.dataset['annotations']), len(expanded_pred))) self.lvis_dt.dataset['annotations'] = expanded_pred self.lvis_dt._create_index() # per-image per-category evaluation results self.eval_imgs = defaultdict(list) self.eval = {} # accumulated evaluation results self._gts = defaultdict(list) # gt for evaluation self._dts = defaultdict(list) # dt for evaluation self.params = Params(iou_type=iou_type) # parameters self.results = OrderedDict() self.ious = {} # ious between all gts and dts self.params.img_ids = sorted(self.lvis_gt.get_img_ids()) self.params.cat_ids = sorted(self.lvis_gt.get_cat_ids())
class LvisDataset(CustomDataset): def load_annotations(self, ann_file, LT_ann_file=None): self.lvis = LVIS(ann_file) self.cat_ids = self.lvis.get_cat_ids() if self.CLASSES is None: self.CLASSES = self.cat_ids self.cat2label = { cat_id: i + 1 for i, cat_id in enumerate(self.cat_ids) } self.categories = self.cat_ids if LT_ann_file is not None: self.img_ids = [] for LT_ann_file in LT_ann_file: self.img_ids += mmcv.list_from_file(LT_ann_file) self.img_ids = [ int(x) for x in self.img_ids] else: self.img_ids = self.lvis.get_img_ids() img_infos = [] for i in self.img_ids: info = self.lvis.load_imgs([i])[0] info['filename'] = info['file_name'][-16:] img_infos.append(info) return img_infos def get_ann_info(self, idx): img_id = self.img_infos[idx]['id'] ann_ids = self.lvis.get_ann_ids(img_ids=[img_id]) ann_info = self.lvis.load_anns(ann_ids) ann = self._parse_ann_info(ann_info) if self.see_only: ann['labels'] = ann['labels'][list(self.see_only)] return ann def _filter_imgs(self, min_size=32): """Filter images too small or without ground truths.""" valid_inds = [] ids_with_ann = set(_['image_id'] for _ in self.lvis.anns.values()) for i, img_info in enumerate(self.img_infos): if self.img_ids[i] not in ids_with_ann: continue if min(img_info['width'], img_info['height']) >= min_size: valid_inds.append(i) return valid_inds def _parse_ann_info(self, ann_info): """Parse label annotation. """ gt_labels = np.zeros((len(self.categories),), dtype=np.int64) for i, ann in enumerate(ann_info): if ann.get('ignore', False): continue x1, y1, w, h = ann['bbox'] if ann['area'] <= 0 or w < 1 or h < 1: continue gt_labels[self.cat2label[ann['category_id']]-1] = 1 ann = dict(labels=gt_labels) return ann
class TaoDataset(CustomDataset): def load_annotations(self, ann_file): self.lvis = LVIS(ann_file) self.full_cat_ids = self.lvis.get_cat_ids() self.full_cat2label = { cat_id: i + 1 for i, cat_id in enumerate(self.full_cat_ids) } self.CLASSES = tuple( [item['name'] for item in self.lvis.dataset['categories']]) self.cat_ids = self.lvis.get_cat_ids() self.cat2label = { cat_id: i + 1 for i, cat_id in enumerate(self.cat_ids) } self.img_ids = self.lvis.get_img_ids() img_infos = [] for i in self.img_ids: info = self.lvis.load_imgs([i])[0] #info['filename'] = info['file_name'].split('_')[-1] info['filename'] = info['file_name'] img_infos.append(info) return img_infos def get_ann_info(self, idx): img_id = self.img_infos[idx]['id'] ann_ids = self.lvis.get_ann_ids(img_ids=[img_id]) ann_info = self.lvis.load_anns(ann_ids) return self._parse_ann_info(self.img_infos[idx], ann_info) def get_ann_info_withoutparse(self, idx): img_id = self.img_infos[idx]['id'] ann_ids = self.lvis.get_ann_ids(img_ids=[img_id]) ann_info = self.lvis.load_anns(ann_ids) return ann_info def _filter_imgs(self, min_size=32): """Filter images too small or without ground truths.""" valid_inds = [] ids_with_ann = set(_['image_id'] for _ in self.lvis.anns.values()) for i, img_info in enumerate(self.img_infos): if self.img_ids[i] not in ids_with_ann: continue if min(img_info['width'], img_info['height']) >= min_size: valid_inds.append(i) return valid_inds def _parse_ann_info(self, img_info, ann_info): """Parse bbox and mask annotation. Args: ann_info (list[dict]): Annotation info of an image. with_mask (bool): Whether to parse mask annotations. Returns: dict: A dict containing the following keys: bboxes, bboxes_ignore, labels, masks, mask_polys, poly_lens. """ gt_bboxes = [] gt_labels = [] gt_bboxes_ignore = [] # Two formats are provided. # 1. mask: a binary map of the same size of the image. # 2. polys: each mask consists of one or several polys, each poly is a # list of float. gt_masks = [] for i, ann in enumerate(ann_info): if ann.get('ignore', False): continue x1, y1, w, h = ann['bbox'] if ann['area'] <= 0 or w < 1 or h < 1: continue bbox = [x1, y1, x1 + w - 1, y1 + h - 1] if 'iscrowd' in ann.keys(): if ann['iscrowd']: gt_bboxes_ignore.append(bbox) else: gt_bboxes.append(bbox) gt_labels.append(self.cat2label[ann['category_id']]) gt_masks.append(self.lvis.ann_to_mask(ann)) if gt_bboxes: gt_bboxes = np.array(gt_bboxes, dtype=np.float32) gt_labels = np.array(gt_labels, dtype=np.int64) else: gt_bboxes = np.zeros((0, 4), dtype=np.float32) gt_labels = np.array([], dtype=np.int64) if gt_bboxes_ignore: gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32) else: gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32) seg_map = img_info['filename'].replace('jpg', 'png') ann = dict(bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore, masks=gt_masks, seg_map=seg_map) return ann
class LvisGtAnnVis(): def __init__(self, ann_file): self.lvis = LVIS(ann_file) CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant', 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat', 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket', 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush' ) ##use CLASSES[self.cat2label[35]] to find the class name self.cat_ids = self.lvis.get_cat_ids() # self.cat2label = { # cat_id: i + 1 # for i, cat_id in enumerate(self.cat_ids) # } self.img_ids = self.lvis.get_img_ids() img_infos = [] for i in self.img_ids: info = self.lvis.load_imgs([i])[0] info['filename'] = info['file_name'] img_infos.append(info) self.img_infos = img_infos # self.img_prefix = './data/lvis/val2017' self.img_prefix = './data/lvis/train2017' valid_inds = self._filter_imgs() self.img_infos = [self.img_infos[i] for i in valid_inds] def _filter_imgs(self, min_size=32): """Filter images too small or without ground truths.""" valid_inds = [] ids_with_ann = set(_['image_id'] for _ in self.lvis.anns.values()) for i, img_info in enumerate(self.img_infos): if self.img_ids[i] not in ids_with_ann: continue if min(img_info['width'], img_info['height']) >= min_size: valid_inds.append(i) return valid_inds def show(self, idx): img_id = self.img_infos[idx]['id'] ann_ids = self.lvis.get_ann_ids(img_ids=[img_id]) ann_info = self.lvis.load_anns(ann_ids) # for ann in ann_info: # if ann['iscrowd'] == 1: # is_crowd_id_val.append(idx) if self.img_infos[idx]['filename'].startswith('COCO'): ##for val set imdata = cv2.imread( osp.join(self.img_prefix, self.img_infos[idx]['filename'][13:])) else: imdata = cv2.imread( osp.join(self.img_prefix, self.img_infos[idx]['filename'])) imdata = cv2.cvtColor(imdata, cv2.COLOR_BGR2RGB) plt.imshow(imdata) plt.axis('off') self.lvis.showanns(ann_info) plt.show()
class LvisGtAnnVis(): def __init__(self, ann_file): self.lvis = LVIS(ann_file) CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant', 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat', 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket', 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush')##use CLASSES[self.cat2label[35]] to find the class name self.cat_ids = self.lvis.get_cat_ids() # self.cat2label = { # cat_id: i + 1 # for i, cat_id in enumerate(self.cat_ids) # } self.img_ids = self.lvis.get_img_ids() img_infos = [] for i in self.img_ids: info = self.lvis.load_imgs([i])[0] info['filename'] = info['file_name'] img_infos.append(info) self.img_infos = img_infos # self.img_prefix = './data/lvis/val2017' self.img_prefix = './data/lvis/train2017' self.filter_to_keep_finetune_classes() ###filter to keep only the finetune classes with zero_ap but not with fewshot training instances def filter_to_keep_finetune_classes(self): self.lvis._filter_anns_finetune_classes()## first filter anns keep_img_info_ids = []## then filter imgs for idx in range(len(self.img_infos)): img_id = self.img_infos[idx]['id'] ann_ids = self.lvis.get_ann_ids(img_ids=[img_id]) for ann_id in ann_ids: if ann_id in self.lvis.anns.keys(): if idx not in keep_img_info_ids: keep_img_info_ids.append(idx) self.img_infos = [self.img_infos[i] for i in keep_img_info_ids] def show(self, idx): img_id = self.img_infos[idx]['id'] ann_ids = self.lvis.get_ann_ids(img_ids=[img_id]) ann_info = self.lvis.load_anns(ann_ids) # for ann in ann_info: # if ann['iscrowd'] == 1: # is_crowd_id_val.append(idx) if self.img_infos[idx]['filename'].startswith('COCO'):##for val set imdata = cv2.imread(osp.join(self.img_prefix, self.img_infos[idx]['filename'][13:])) else: imdata = cv2.imread(osp.join(self.img_prefix, self.img_infos[idx]['filename'])) imdata = cv2.cvtColor(imdata, cv2.COLOR_BGR2RGB) plt.imshow(imdata) plt.axis('off') self.lvis.showanns(ann_info) plt.show()
def construct_data(): train_ann_file = './data/lvis/lvis_v0.5_train_headonly.json' val_ann_file = './data/lvis/lvis_v0.5_val_headonly.json' lvis_train = LVIS(train_ann_file) lvis_val = LVIS(val_ann_file) img_ann_map_train = lvis_train.img_ann_map img_ann_map_val = lvis_val.img_ann_map train_train = [] train_20 = [] train_40 = [] for k, v in img_ann_map_train.items(): ins_num = len(img_ann_map_train[k]) if ins_num < 20 or ins_num > 300: train_train.append(k) elif ins_num < 40: train_20.append(k) else: train_40.append(k) val_train = [] val_20 = [] val_40 = [] for k, v in img_ann_map_val.items(): ins_num = len(img_ann_map_val[k]) if ins_num < 20 or ins_num > 300: val_train.append(k) elif ins_num < 40: val_20.append(k) else: val_40.append(k) train_new = train_train + val_train cat2img_20 = {} for im in train_20: anns = img_ann_map_train[im] for ann in anns: cid = ann['category_id'] if cid in cat2img_20: cat2img_20[cid].add(im) else: cat2img_20[cid] = {im} train_val_20 = set() for cid, imgs in cat2img_20.items(): img_num = len(imgs) sample_num = int(img_num / 2) rest_img = imgs - train_val_20 already_got_num = img_num - len(rest_img) sample_num = sample_num - already_got_num if sample_num <= 0: continue choose = np.random.choice(len(rest_img), sample_num, replace=False) rest_img = list(rest_img) for i in choose: train_val_20.add(rest_img[int(i)]) cat2img_40 = {} for im in train_40: anns = img_ann_map_train[im] for ann in anns: cid = ann['category_id'] if cid in cat2img_40: cat2img_40[cid].add(im) else: cat2img_40[cid] = {im} train_val_40 = set() for cid, imgs in cat2img_40.items(): img_num = len(imgs) sample_num = int(img_num / 2) rest_img = imgs - train_val_40 already_got_num = img_num - len(rest_img) sample_num = sample_num - already_got_num if sample_num <= 0: continue choose = np.random.choice(len(rest_img), sample_num, replace=False) rest_img = list(rest_img) for i in choose: train_val_40.add(rest_img[int(i)]) val_new_20 = val_20 + list(train_val_20) val_new_40 = val_40 + list(train_val_40) val_new = set(val_new_20 + val_new_40) train_new = set(train_new + list(set(train_20) - train_val_20) + \ list(set(train_40) - train_val_40)) with open(train_ann_file, 'r') as fin: traindata = json.load(fin) with open(val_ann_file, 'r') as fin: valdata = json.load(fin) train_img = [] val_img = [] for img in (traindata['images'] + valdata['images']): if img['id'] in train_new: train_img.append(img) elif img['id'] in val_new: val_img.append(img) else: print('NO SET ERROR! {}'.format(img)) train_ann = [] val_ann = [] for ann in (traindata['annotations'] + valdata['annotations']): if ann['image_id'] in train_new: train_ann.append(ann) elif ann['image_id'] in val_new: val_ann.append(ann) else: print('ANN NO SET ERROR! {}'.format(ann)) save_train = { 'images': train_img, 'annotations': train_ann, 'categories': traindata['categories'], 'info': traindata['info'], 'licenses': traindata['licenses'] } save_dir = './data/lvis/dense/' if not os.path.exists(save_dir): os.mkdir(save_dir) save_path = os.path.join(save_dir, 'dense_lvis_v0.5_train.json') with open(save_path, 'w') as fout: json.dump(save_train, fout) save_val = { 'images': val_img, 'annotations': val_ann, 'categories': valdata['categories'], 'info': valdata['info'], 'licenses': valdata['licenses'] } save_path = os.path.join(save_dir, 'dense_lvis_v0.5_val.json') with open(save_path, 'w') as fout: json.dump(save_val, fout)
class LVISVis: def __init__(self, lvis_gt, lvis_dt=None, img_dir=None, dpi=75): """Constructor for LVISVis. Args: lvis_gt (LVIS class instance, or str containing path of annotation file) lvis_dt (LVISResult class instance, or str containing path of result file, or list of dict) img_dir (str): path of folder containing all images. If None, the image to be displayed will be downloaded to the current working dir. dpi (int): dpi for figure size setup """ self.logger = logging.getLogger(__name__) if isinstance(lvis_gt, LVIS): self.lvis_gt = lvis_gt elif isinstance(lvis_gt, str): self.lvis_gt = LVIS(lvis_gt) else: raise TypeError("Unsupported type {} of lvis_gt.".format(lvis_gt)) if lvis_dt is not None: if isinstance(lvis_dt, LVISResults): self.lvis_dt = lvis_dt elif isinstance(lvis_dt, (str, list)): self.lvis_dt = LVISResults(self.lvis_gt, lvis_dt) else: raise TypeError("Unsupported type {} of lvis_dt.".format(lvis_dt)) else: self.lvis_dt = None self.dpi = dpi self.img_dir = img_dir if img_dir else '.' if self.img_dir == '.': self.logger.warn("img_dir not specified. Images will be downloaded.") def coco_segm_to_poly(self, _list): x = _list[0::2] y = _list[1::2] points = np.asarray([x, y]) return np.transpose(points) def get_synset(self, idx): synset = self.lvis_gt.load_cats(ids=[idx])[0]["synset"] text = synset.split(".") text = "{}.{}".format(text[0], int(text[-1])) return text def setup_figure(self, img, title="", dpi=75): fig = plt.figure(frameon=False) fig.set_size_inches(img.shape[1] / dpi, img.shape[0] / dpi) ax = plt.Axes(fig, [0.0, 0.0, 1.0, 1.0]) ax.set_title(title) ax.axis("off") fig.add_axes(ax) ax.imshow(img) return fig, ax def vis_bbox(self, ax, bbox, box_alpha=0.5, edgecolor="g", linestyle="--"): # bbox should be of the form x, y, w, h ax.add_patch( plt.Rectangle( (bbox[0], bbox[1]), bbox[2], bbox[3], fill=False, edgecolor=edgecolor, linewidth=2.5, alpha=box_alpha, linestyle=linestyle, ) ) def vis_text(self, ax, bbox, text, color="w"): ax.text( bbox[0], bbox[1] - 2, text, fontsize=15, family="serif", bbox=dict(facecolor="none", alpha=0.4, pad=0, edgecolor="none"), color=color, zorder=10, ) def vis_mask(self, ax, segm, color): # segm is numpy array of shape Nx2 polygon = Polygon( segm, fill=True, facecolor=color, edgecolor=color, linewidth=3, alpha=0.5 ) ax.add_patch(polygon) def get_color(self, idx): color_list = colormap(rgb=True) / 255 return color_list[idx % len(color_list), 0:3] def load_img(self, img_id): img = self.lvis_gt.load_imgs([img_id])[0] img_path = os.path.join(self.img_dir, img["file_name"]) if not os.path.exists(img_path): self.lvis_gt.download(self.img_dir, img_ids=[img_id]) img = cv2.imread(img_path) b, g, r = cv2.split(img) return cv2.merge([r, g, b]) def vis_img( self, img_id, show_boxes=False, show_segms=True, show_classes=False, cat_ids_to_show=None ): ann_ids = self.lvis_gt.get_ann_ids(img_ids=[img_id]) anns = self.lvis_gt.load_anns(ids=ann_ids) boxes, segms, classes = [], [], [] for ann in anns: boxes.append(ann["bbox"]) segms.append(ann["segmentation"]) classes.append(ann["category_id"]) if len(boxes) == 0: self.logger.warn("No gt anno found for img_id: {}".format(img_id)) return boxes = np.asarray(boxes) areas = boxes[:, 2] * boxes[:, 3] sorted_inds = np.argsort(-areas) fig, ax = self.setup_figure(self.load_img(img_id)) for idx in sorted_inds: if cat_ids_to_show is not None and classes[idx] not in cat_ids_to_show: continue color = self.get_color(idx) if show_boxes: self.vis_bbox(ax, boxes[idx], edgecolor=color) if show_classes: text = self.get_synset(classes[idx]) self.vis_text(ax, boxes[idx], text) if show_segms: for segm in segms[idx]: self.vis_mask(ax, self.coco_segm_to_poly(segm), color) def vis_result( self, img_id, show_boxes=False, show_segms=True, show_classes=False, cat_ids_to_show=None, score_thrs=0.0, show_scores=True ): assert self.lvis_dt is not None, "lvis_dt was not specified." anns = self.lvis_dt.get_top_results(img_id, score_thrs) boxes, segms, classes, scores = [], [], [], [] for ann in anns: boxes.append(ann["bbox"]) segms.append(ann["segmentation"]) classes.append(ann["category_id"]) scores.append(ann["score"]) if len(boxes) == 0: self.logger.warn("No gt anno found for img_id: {}".format(img_id)) return boxes = np.asarray(boxes) areas = boxes[:, 2] * boxes[:, 3] sorted_inds = np.argsort(-areas) fig, ax = self.setup_figure(self.load_img(img_id)) for idx in sorted_inds: if cat_ids_to_show is not None and classes[idx] not in cat_ids_to_show: continue color = self.get_color(idx) if show_boxes: self.vis_bbox(ax, boxes[idx], edgecolor=color) if show_classes: text = self.get_synset(classes[idx]) if show_scores: text = "{}: {:.2f}".format(text, scores[idx]) self.vis_text(ax, boxes[idx], text) if show_segms: for segm in segms[idx]: self.vis_mask(ax, self.coco_segm_to_poly(segm), color)