def main(): ref_data_root = '<path/to/refer/data/folder>' all_refs = [] for dataset, split_bys in [ ('refcoco', ['google', 'unc']), ('refcoco+', ['unc']), ('refcocog', ['google', 'umd']) ]: for split_by in split_bys: refer = REFER(ref_data_root, dataset, split_by) for ref_id in refer.getRefIds(): ref = refer.Refs[ref_id] ann = refer.refToAnn[ref_id] ref['ann'] = ann ref['dataset'] = dataset ref['dataset_partition'] = split_by all_refs.append(ref) coco_annotations_file = '<path/to/instances_train2014.json>' coco = COCO(coco_annotations_file) ref_image_ids = set(x['image_id'] for x in all_refs) coco_anns = {image_id: {'info': coco.imgs[image_id], 'anns': coco.imgToAnns[image_id]} for image_id in ref_image_ids} out_file = '<path/to/refcoco.json>' with open(out_file, 'w') as f: json.dump({'ref': all_refs, 'coco_anns': coco_anns}, f)
class RefDataset(Dataset): def __init__(self, split): self.refer = REFER(dataset='refcoco+', splitBy='unc') self.ref_ids = self.refer.getRefIds(split=split) self.image_embeds = np.load( os.path.join("data", "embeddings", "FINALImageEmbeddings.npy")) self.image_ids = list( np.load(os.path.join("data", "embeddings", "FINALImageIDs.npy"))) before_text_embeds = time.time() self.text_embeds = np.concatenate( (np.load( os.path.join("data", "embeddings", "FINALTextEmbeddings1of2.npy")), np.load( os.path.join("data", "embeddings", "FINALTextEmbeddings2of2.npy"))), axis=0) after_text_embeds = time.time() print("Text Embedding Time: ", after_text_embeds - before_text_embeds) assert (len(self.text_embeds) == 141564) assert (self.text_embeds[0].shape[1] == 3072) print('Found {} referred objects in {} split.'.format( len(self.ref_ids), split)) def __len__(self): return len(self.ref_ids) def __getitem__(self, i): ref_id = self.ref_ids[i] ref = self.refer.loadRefs(ref_id)[0] image_id = ref['image_id'] image = self.refer.Imgs[image_id] image_idx = self.image_ids.index(image_id) image_embed = self.image_embeds[image_idx, :, :, :] height = image['height'] width = image['width'] bound_box = torch.Tensor(self.refer.getRefBox(ref_id)) bound_box[0] /= width bound_box[1] /= height bound_box[2] /= width bound_box[3] /= height #bound_box = bound_box.unsqueeze(dim=0) #whole_file_name = ref['file_name'] #file_name = whole_file_name[:whole_file_name.rfind("_")]+".jpg" sent = random.choice(ref['sentences']) ref_expr = sent['raw'] text_id = sent['sent_id'] text_idx = text_id text_embed = torch.from_numpy(self.text_embeds[text_idx]) return image_embed, text_embed, bound_box
def create_cache(**kwargs): data_root = kwargs.get('data_root') dataset = kwargs.get('dataset') splitBy = kwargs.get('splitBy') splits = kwargs.get('splits') refer = REFER(data_root, dataset, splitBy) # print stats about the given dataset print('dataset [%s_%s] contains: ' % (dataset, splitBy)) ref_ids = refer.getRefIds() image_ids = refer.getImgIds() print('%s expressions for %s refs in %s images.' % (len(refer.Sents), len(ref_ids), len(image_ids))) checkpoint_dir = osp.join('cache', 'prepro', ds + "_" + splitBy) if not osp.isdir(checkpoint_dir): os.makedirs(checkpoint_dir) for split in splits + ['train']: ref_ids = refer.getRefIds(split=split) print('%s refs are in split [%s].' % (len(ref_ids), split)) #have to sample various sentences and their tokens from here. data = [] for ref_id in ref_ids: ref = refer.Refs[ref_id] image_id = ref['image_id'] ref['image_info'] = refer.Imgs[image_id] sentences = ref.pop('sentences') ref.pop('sent_ids') coco_boxes_info = refer.imgToAnns[image_id] coco_boxes = [box_ann['bbox'] for box_ann in coco_boxes_info] gtbox = refer.refToAnn[ref_id]['bbox'] for sentence in sentences: entnew = copy.deepcopy(ref) entnew['boxes'] = coco_boxes entnew['sentence'] = sentence entnew['gtbox'] = gtbox data.append(entnew) data_json = osp.join('cache/prepro', ds + "_" + splitBy, split + '.json') with open(data_json, 'w') as f: json.dump(data, f)
class RefDataset(Dataset): def __init__(self): self.refer = REFER(dataset='refcoco+', splitBy='unc') self.ref_ids = self.refer.getRefIds() def __len__(self): return len(self.ref_ids) def __getitem__(self, i): ref_id = self.ref_ids[i] ref = self.refer.loadRefs(ref_id)[0] for sent in ref['sentences']: s = sent['raw'] sid = sent['sent_id'] return s, sid
def gen_ref_coco_data(): dataroot = "/projectnb/llamagrp/shawnlin/ref-exp-gen/dataset/refer2/refer/data" dataset = "refcoco" refer = REFER(dataroot, dataset, "google") ref_ids = refer.getRefIds(split="test")[:] print("total ref ids:", len(ref_ids)) for ref_id in ref_ids[:]: ref = refer.Refs[ref_id] img_id = ref["image_id"] ann_id = ref["ann_id"] img_path = os.path.join(refer.IMAGE_DIR, refer.Imgs[img_id]["file_name"]) img = cv2.imread(img_path) #im_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) ref_expr = "\n".join([s["raw"] for s in ref["sentences"]]) #print(ref_expr) #print("img_id", img_id) yield (img, ref_expr, img_id, ann_id, ref_id)
if __name__ == '__main__': import os.path as osp import sys ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..')) sys.path.insert(0, osp.join(ROOT_DIR, 'lib', 'datasets')) from refer import REFER # load refer of dataset dataset = 'refcoco' refer = REFER(dataset, splitBy = 'google') # mimic some Res val_refIds = refer.getRefIds(split='test') ref_id = 49767 print "GD: %s" % refer.Refs[ref_id]['sentences'] Res = [{'ref_id': ref_id, 'sent': 'left bottle'}] # evaluate some refer expressions refEval = RefEvaluation(refer, Res) refEval.evaluate() # print output evaluation scores for metric, score in refEval.eval.items(): print '%s: %.3f'%(metric, score) # demo how to use evalImgs to retrieve low score result # evals = [eva for eva in refEval.evalRefs if eva['CIDEr']<30] # print 'ground truth sents'
import os.path as osp import cv2 import argparse parser = argparse.ArgumentParser(description='Data preparation') parser.add_argument('--data_root', type=str) # contains refclef, refcoco, refcoco+, refcocog and images parser.add_argument('--output_dir', type=str) parser.add_argument('--dataset', type=str, choices=['refcoco', 'refcoco+','refcocog','refclef'],default='refcoco') parser.add_argument('--split', type=str,default='umd') parser.add_argument('--generate_mask', action='store_true') args = parser.parse_args() # data_root # contains refclef, refcoco, refcoco+, refcocog and images refer = REFER(args.data_root, args.dataset, args.split) print ('dataset [%s_%s] contains: ' % (args.dataset, args.split)) ref_ids = refer.getRefIds() image_ids = refer.getImgIds() print ('%s expressions for %s refs in %s images.' % (len(refer.Sents), len(ref_ids), len(image_ids))) print('\nAmong them:') if args.dataset == 'refclef': if args.split == 'unc': splits = ['train', 'val', 'testA','testB','testC'] else: splits = ['train', 'val', 'test'] elif args.dataset == 'refcoco': splits = ['train', 'val', 'testA', 'testB'] elif args.dataset == 'refcoco+': splits = ['train', 'val', 'testA', 'testB'] elif args.dataset == 'refcocog': splits = ['train', 'val', 'test'] # we don't have test split for refcocog right now.
class RefCOCODataset(Dataset): def __init__(self, refcoco_dir, refcoco_images_dir, coco_dir, split='val'): self.image_dir = refcoco_images_dir # coco_train_annFile = coco_dir.joinpath('annotations/instances_train2014.json') # self.coco = COCO(coco_train_annFile) assert split in ['train', 'val', 'test'] workspace_dir = Path(__file__).resolve().parent.parent refcoco_util_dir = workspace_dir.joinpath('refcoco_utils') import sys sys.path.append(str(refcoco_util_dir)) from refer import REFER self.refer = REFER('refcocog', 'umd') ref_ids = self.refer.getRefIds(split=split) id2dets = {} img_ids = [] image_fns = [] for ref_id in ref_ids: ref = self.refer.Refs[ref_id] img_id = ref['image_id'] if img_id not in img_ids: img_ids.append(img_id) fn_ann = ref['file_name'] # COCO_train2014_000000419645_398406.jpg # COCO_train2014_000000419645.jpg suffix = fn_ann.split('.')[-1] fname = '_'.join(fn_ann.split('_')[:-1]) + '.' + suffix image_fns.append(fname) detections = self.refer.imgToAnns[img_id] id2dets[img_id] = detections self.image_ids = img_ids self.image_fns = image_fns self.id2dets = id2dets def __len__(self): return len(self.image_ids) def __getitem__(self, idx): image_id = self.image_ids[idx] image_fn = self.image_fns[idx] image_path = self.image_dir.joinpath(image_fn) assert Path(image_path).exists(), image_path img = cv2.imread(str(image_path)) H, W, C = img.shape dets = self.id2dets[image_id] # cat_names = [det['category_name'] for det in dets] boxes = [] for i, region in enumerate([det['bbox'] for det in dets]): # (x1, y1, x2, y2) x, y, w, h = region[:4] x1, y1, x2, y2 = x, y, x + w, y + h # x1, y1, x2, y2 = region[:4] assert x2 <= W, (image_id, i, region) assert y2 <= H, (image_id, i, region) box = [x1, y1, x2, y2] boxes.append(box) boxes = np.array(boxes) return { 'img_id': str(image_id), 'img_fn': image_fn, 'img': img, 'boxes': boxes, # 'captions': cat_names }
class RefCOCO(Dataset): def __init__(self, data_dir="/projectnb/llamagrp/shawnlin/ref-exp-gen/dataset/refer2/refer/data", dataset="refcoco", split="train", splitBy="google", transforms=None): assert (split in ["train", "val", "test"]) assert os.path.exists(data_dir), \ "cannot find folder {}, please download refcoco data into this folder".format(data_dir) self.data_dir = data_dir self.dataset = dataset self.transforms = transforms self.split = split self.splitBy = splitBy self.refer = REFER(self.data_dir, self.dataset, self.splitBy) self.ref_ids = self.refer.getRefIds(split=self.split)[:] #self.filter_non_overlap = filter_non_overlap #self.filter_duplicate_rels = filter_duplicate_rels and self.split == 'train' # read in dataset from a h5 file and a dict (json) file #self.im_h5 = h5py.File(self.image_file, 'r') self.info = json.load(open("/projectnb/llamagrp/shawnlin/ref-exp-gen/graph-rcnn.pytorch/datasets/vg_bm/VG-SGG-dicts.json", 'r')) #self.im_refs = self.im_h5['images'] # image data reference #im_scale = self.im_refs.shape[2] # add background class self.info['label_to_idx']['__background__'] = 0 self.class_to_ind = self.info['label_to_idx'] self.ind_to_classes = sorted(self.class_to_ind, key=lambda k: self.class_to_ind[k]) #cfg.ind_to_class = self.ind_to_classes self.predicate_to_ind = self.info['predicate_to_idx'] self.predicate_to_ind['__background__'] = 0 self.ind_to_predicates = sorted(self.predicate_to_ind, key=lambda k: self.predicate_to_ind[k]) #cfg.ind_to_predicate = self.ind_to_predicates #self.split_mask, self.image_index, self.im_sizes, self.gt_boxes, self.gt_classes, self.relationships = load_graphs( # self.roidb_file, self.image_file, # self.split, num_im, num_val_im=num_val_im, # filter_empty_rels=filter_empty_rels, # filter_non_overlap=filter_non_overlap and split == "train", #) #self.json_category_id_to_contiguous_id = self.class_to_ind #self.contiguous_category_id_to_json_id = { # v: k for k, v in self.json_category_id_to_contiguous_id.items() #} #@property #def coco(self): # """ # :return: a Coco-like object that we can use to evaluate detection! # """ # anns = [] # for i, (cls_array, box_array) in enumerate(zip(self.gt_classes, self.gt_boxes)): # for cls, box in zip(cls_array.tolist(), box_array.tolist()): # anns.append({ # 'area': (box[3] - box[1] + 1) * (box[2] - box[0] + 1), # 'bbox': [box[0], box[1], box[2] - box[0] + 1, box[3] - box[1] + 1], # 'category_id': cls, # 'id': len(anns), # 'image_id': i, # 'iscrowd': 0, # }) # fauxcoco = COCO() # fauxcoco.dataset = { # 'info': {'description': 'ayy lmao'}, # 'images': [{'id': i} for i in range(self.__len__())], # 'categories': [{'supercategory': 'person', # 'id': i, 'name': name} for i, name in enumerate(self.ind_to_classes) if name != '__background__'], # 'annotations': anns, # } # fauxcoco.createIndex() # return fauxcoco #def _im_getter(self, idx): # w, h = self.im_sizes[idx, :] # ridx = self.image_index[idx] # im = self.im_refs[ridx] # im = im[:, :h, :w] # crop out # im = im.transpose((1,2,0)) # c h w -> h w c # return im def __len__(self): return len(self.ref_ids) def __getitem__(self, index): """ get dataset item """ # get image ref = self.refer.Refs[self.ref_ids[index]] img_id = ref["image_id"] ann_id = ref["ann_id"] img_path = os.path.join(self.refer.IMAGE_DIR, self.refer.Imgs[img_id]["file_name"]) img = cv2.imread(img_path) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #print("img original size", img.shape) width, height = img.shape[0], img.shape[1] #print("img after size", img.shape) ref_expr = "\n".join([s["raw"] for s in ref["sentences"]]) # get object bounding boxes, labels and relations #obj_boxes = [[34.79, 272.54, 106.72, 80.43]] # dummy target bbox referent_box = [self.refer.Anns[ann_id]["bbox"]] target_raw = BoxList(referent_box, (width, height), mode="xyxy") if self.transforms is not None: img, target = self.transforms(img, target_raw) else: img, target = img, target_raw target.add_field("ref_sents", [s["raw"] for s in ref["sentences"]]) target.add_field("label", self.refer.Anns[ann_id]["category_id"]) #target.add_field("labels", torch.from_numpy(obj_labels)) #target.add_field("pred_labels", torch.from_numpy(obj_relations)) #target.add_field("relation_labels", torch.from_numpy(obj_relation_triplets)) target = target.clip_to_image(remove_empty=False) info = {"img_id":img_id, "ann_id":ann_id, "ref_id": self.ref_ids[index], "ref_sents": [s["raw"] for s in ref["sentences"]]} return img, target, index, info #def get_groundtruth(self, index): # width, height = self.im_sizes[index, :] # # get object bounding boxes, labels and relations # obj_boxes = self.gt_boxes[index].copy() # obj_labels = self.gt_classes[index].copy() # obj_relation_triplets = self.relationships[index].copy() # if self.filter_duplicate_rels: # # Filter out dupes! # assert self.split == 'train' # old_size = obj_relation_triplets.shape[0] # all_rel_sets = defaultdict(list) # for (o0, o1, r) in obj_relation_triplets: # all_rel_sets[(o0, o1)].append(r) # obj_relation_triplets = [(k[0], k[1], np.random.choice(v)) for k,v in all_rel_sets.items()] # obj_relation_triplets = np.array(obj_relation_triplets) # obj_relations = np.zeros((obj_boxes.shape[0], obj_boxes.shape[0])) # for i in range(obj_relation_triplets.shape[0]): # subj_id = obj_relation_triplets[i][0] # obj_id = obj_relation_triplets[i][1] # pred = obj_relation_triplets[i][2] # obj_relations[subj_id, obj_id] = pred # target = BoxList(obj_boxes, (width, height), mode="xyxy") # target.add_field("labels", torch.from_numpy(obj_labels)) # target.add_field("pred_labels", torch.from_numpy(obj_relations)) # target.add_field("relation_labels", torch.from_numpy(obj_relation_triplets)) # target.add_field("difficult", torch.from_numpy(obj_labels).clone().fill_(0)) # return target def get_img_info(self, img_id): ref = self.refer.Refs[self.ref_ids[index]] img_id = ref["image_id"] w, h = self.refer.Imgs[img_id]["width"], self.refer.Imgs[img_id]["height"] return {"height": h, "width": w}
class RefCOCODataset(Dataset): def __init__(self, refcoco_dir, refcoco_images_dir, split='val'): self.image_dir = refcoco_images_dir mattnet_maskrcnn_detections_path = refcoco_dir.joinpath( 'detections/refcocog_umd/res101_coco_minus_refer_notime_dets.json') with open(mattnet_maskrcnn_detections_path) as f: mattnet_maskrcnn_detections = json.load(f) id2dets = {} for det in mattnet_maskrcnn_detections: image_id = det['image_id'] if image_id not in id2dets: id2dets[image_id] = [] id2dets[image_id].append(det) self.id2dets = id2dets print('Load mattnet detections from', mattnet_maskrcnn_detections_path) assert split in ['train', 'val', 'test'] workspace_dir = Path(__file__).resolve().parent.parent refcoco_util_dir = workspace_dir.joinpath('refcoco_utils') import sys sys.path.append(str(refcoco_util_dir)) from refer import REFER self.refer = REFER('refcocog', 'umd') ref_ids = self.refer.getRefIds(split=split) img_ids = [] image_fns = [] for ref_id in ref_ids: ref = self.refer.Refs[ref_id] img_id = ref['image_id'] if img_id not in img_ids: img_ids.append(img_id) fn_ann = ref['file_name'] # COCO_train2014_000000419645_398406.jpg # COCO_train2014_000000419645.jpg suffix = fn_ann.split('.')[-1] fname = '_'.join(fn_ann.split('_')[:-1]) + '.' + suffix image_fns.append(fname) self.image_ids = img_ids self.image_fns = image_fns def __len__(self): return len(self.image_ids) def __getitem__(self, idx): image_id = self.image_ids[idx] image_fn = self.image_fns[idx] image_path = self.image_dir.joinpath(image_fn) assert Path(image_path).exists(), image_path img = cv2.imread(str(image_path)) H, W, C = img.shape dets = self.id2dets[image_id] cat_names = [det['category_name'] for det in dets] boxes = [] for i, region in enumerate([det['box'] for det in dets]): # (x1, y1, x2, y2) x, y, w, h = region[:4] x1, y1, x2, y2 = x, y, x + w, y + h assert x2 <= W, (image_id, i, region) assert y2 <= H, (image_id, i, region) box = [x1, y1, x2, y2] boxes.append(box) boxes = np.array(boxes) return { 'img_id': str(image_id), 'img_fn': image_fn, 'img': img, 'boxes': boxes, 'captions': cat_names }
# compute the intersection over union by taking the intersection # area and dividing it by the sum of prediction + ground-truth # areas - the interesection area iou = interArea / float(boxAArea + boxBArea - interArea) # return the intersection over union value return iou data_root = '../refer/data' dataset = 'refcoco' splitBy = 'unc' refer = REFER(data_root, dataset, splitBy) ref_ids = refer.getRefIds(split='testB') images_dir = '/root/refer/data/images/mscoco/images/train2014/' hyp = open("hyp.txt", "w") ref1 = open("ref1.txt", "w") ref2 = open("ref2.txt", "w") ref3 = open("ref3.txt", "w") ref4 = open("ref4.txt", "w") for ref_id in tqdm(ref_ids): ref = refer.Refs[ref_id] x, y, w, h = refer.getRefBox(ref_id) # [x, y, w, h] x1, y1, x2, y2 = x, y, x + w, y + h image_path = images_dir + refer.Imgs[ref['image_id']]['file_name'] image = scipy.misc.imread(image_path)
def main(args): # Image Directory params = vars(args) dataset_splitBy = params['dataset'] + '_' + params['splitBy'] if 'coco' or 'combined' in dataset_splitBy: IMAGE_DIR = 'data/images/mscoco/images/train2014' elif 'clef' in dataset_splitBy: IMAGE_DIR = 'data/images/saiapr_tc-12' else: print('No image directory prepared for ', args.dataset) sys.exit(0) # make save dir save_dir = osp.join('cache/detections', dataset_splitBy) if not osp.isdir(save_dir): os.makedirs(save_dir) print(save_dir) # get mrcn instance mrcn = inference.Inference(args) imdb = mrcn.imdb # import refer from refer import REFER data_root, dataset, splitBy = params['data_root'], params['dataset'], params['splitBy'] refer = REFER(data_root, dataset, splitBy) cat_name_to_cat_ix = {category_name: category_id for category_id, category_name in refer.Cats.items()} # detect and prepare dets.json proposals = [] det_id = 0 cnt = 0 # # TEMPS DEBUG # # os.makedirs('cache/old_internals') # img_path = '/home/mwb/Datasets/mscoco/images/train2014/COCO_train2014_000000581857.jpg' # scores, boxes = mrcn.predict(img_path) # image_feat = mrcn.net._predictions['__temp_net_conv'].data.cpu().numpy() # roi_feats = mrcn.net._predictions['__temp_pool5'].data.cpu().numpy() # rois = mrcn.net._predictions['__temp_rois'].data.cpu().numpy()[:,1:] # head_feats = mrcn.net._predictions['__temp_head_feats'].data.cpu().numpy() # head_pool = mrcn.net._predictions['__temp_head_pool'].data.cpu().numpy() # print(image_feat.shape, roi_feats.shape, rois.shape, head_feats.shape, head_pool.shape) # np.save('cache/old_internals/image_feat.npy', image_feat) # np.save('cache/old_internals/roi_feats.npy', roi_feats) # np.save('cache/old_internals/rois.npy', rois) # np.save('cache/old_internals/head_feats.npy', head_feats) # np.save('cache/old_internals/head_pool.npy', head_pool) val_image_ids = {refer.Refs[ref_id]['image_id'] for ref_id in refer.getRefIds(split='val') if refer.Refs[ref_id]['split'] == 'val'} print('val image num:', len(val_image_ids)) start = time() # for image_id, image in refer.Imgs.items(): for image_id in val_image_ids: image = refer.Imgs[image_id] file_name = image['file_name'] img_path = osp.join(IMAGE_DIR, file_name) # predict scores, boxes = mrcn.predict(img_path) rois = mrcn.net._predictions['rois'].data.cpu().numpy()[:,1:] / mrcn._scale cnt += 1 # print('%s/%s done.' % (cnt, len(refer.Imgs))) print('%s/%s done.' % (cnt, len(val_image_ids))) # info = { # 'image_id': image_id, # 'rois': rois, # 'scores': scores, # 'boxes': boxes, # 'roi_scores': mrcn.net._predictions['__roi_scores'].data.cpu().numpy() # } torch.cuda.empty_cache() # proposals.append(info) total_t = time() - start avg_t = total_t / len(val_image_ids) print('time: %.6f / %.6f = %.6f' % (total_t, len(val_image_ids), avg_t))