class RefDataset(Dataset): def __init__(self, split): self.refer = REFER(dataset='refcoco+', splitBy='unc') self.ref_ids = self.refer.getRefIds(split=split) self.image_embeds = np.load( os.path.join("data", "embeddings", "FINALImageEmbeddings.npy")) self.image_ids = list( np.load(os.path.join("data", "embeddings", "FINALImageIDs.npy"))) before_text_embeds = time.time() self.text_embeds = np.concatenate( (np.load( os.path.join("data", "embeddings", "FINALTextEmbeddings1of2.npy")), np.load( os.path.join("data", "embeddings", "FINALTextEmbeddings2of2.npy"))), axis=0) after_text_embeds = time.time() print("Text Embedding Time: ", after_text_embeds - before_text_embeds) assert (len(self.text_embeds) == 141564) assert (self.text_embeds[0].shape[1] == 3072) print('Found {} referred objects in {} split.'.format( len(self.ref_ids), split)) def __len__(self): return len(self.ref_ids) def __getitem__(self, i): ref_id = self.ref_ids[i] ref = self.refer.loadRefs(ref_id)[0] image_id = ref['image_id'] image = self.refer.Imgs[image_id] image_idx = self.image_ids.index(image_id) image_embed = self.image_embeds[image_idx, :, :, :] height = image['height'] width = image['width'] bound_box = torch.Tensor(self.refer.getRefBox(ref_id)) bound_box[0] /= width bound_box[1] /= height bound_box[2] /= width bound_box[3] /= height #bound_box = bound_box.unsqueeze(dim=0) #whole_file_name = ref['file_name'] #file_name = whole_file_name[:whole_file_name.rfind("_")]+".jpg" sent = random.choice(ref['sentences']) ref_expr = sent['raw'] text_id = sent['sent_id'] text_idx = text_id text_embed = torch.from_numpy(self.text_embeds[text_idx]) return image_embed, text_embed, bound_box
class RefDataset(Dataset): def __init__(self): self.refer = REFER(dataset='refcoco+', splitBy='unc') self.ref_ids = self.refer.getRefIds() def __len__(self): return len(self.ref_ids) def __getitem__(self, i): ref_id = self.ref_ids[i] ref = self.refer.loadRefs(ref_id)[0] for sent in ref['sentences']: s = sent['raw'] sid = sent['sent_id'] return s, sid