Exemplo n.º 1
0
    def __getitem__(self, idx):
        ent = self.data[idx]
        sent_id = ent['sentence']['sent_id']
        file_name = ent['image_info']['file_name']
        img_id = ent['image_id']
        ans = ent['category_id']
        W = ent['image_info']['width']
        H = ent['image_info']['height']
        que = ent['sentence']['sent']
        gtbox = ent['gtbox']
        gtbox = torch.tensor(gtbox)
        #boxes from refcoc is in xywh format
        gtboxorig = convert_xywh_x1y1x2y2(gtbox.unsqueeze(0)).squeeze(0)
        box_coords = ent['boxes']
        box_coords = torch.tensor(box_coords)

        L, W, H, box_feats, box_coords_6d, box_coordsorig = self._load_image_coco(
            img_id)
        box_coords_6d = torch.from_numpy(box_coords_6d)

        #boxes in h4files are in x1 y1 x2 y2 format
        iou = getIOU(gtboxorig.unsqueeze(0), torch.from_numpy(box_coordsorig))
        correct = iou > 0.5
        _, idx = torch.max(iou, dim=0)
        #        print (iou,iou.shape,box_coordsorig,"index",idx)
        gtboxiou = box_coordsorig[idx]
        gtboxiou = torch.from_numpy(gtboxiou)

        tokens = tokenize_ques(self.dictionary, que)
        qfeat = torch.from_numpy(tokens).long()
        Lvec = torch.zeros(100).long()
        Lvec[:L] = 1
        return sent_id, ans, box_feats, box_coordsorig, box_coords_6d.float(
        ), gtboxorig.float(), qfeat, Lvec, idx, correct.view(-1)
Exemplo n.º 2
0
    def __getitem__(self, idx):
        ent = self.data[idx]
        sent_id = ent['question_id']
        img_id = ent['image_id']
        ans = 0
        W = ent['width']
        H = ent['height']
        que = ent['question']
        #0  to N boxes
        gtbox = ent['gtbox']
        L_gtboxes = len(gtbox)
        Max_box = 15  #the max number of boxes  in VQD
        if len(gtbox[0]) == 0:
            gtbox = [[0, 0, 1, 1.0]] * Max_box
        else:
            gtbox = gtbox + [[0, 0, 1, 1.0]] * (Max_box - L_gtboxes)

        gtbox = torch.tensor(gtbox).float()
        #boxes from refcoc is in xywh format
        gtboxorig = convert_xywh_x1y1x2y2(gtbox)

        L, W, H, box_feats, box_coords_6d, box_coordsorig = self._load_image_coco(
            img_id)
        box_coords_6d = torch.from_numpy(box_coords_6d)

        #boxes in h4files are in x1 y1 x2 y2 format
        iou = getIOU(gtboxorig.unsqueeze(1),
                     torch.from_numpy(box_coordsorig)).squeeze(-1)
        correct = iou > 0.5
        correct = correct.sum(dim=0).clamp(max=1)

        _, idxall = torch.max(iou, dim=1)
        #maybe more than one indices so sample for now
        idx = torch.tensor([int(np.random.choice(idxall))])
        idx = torch.tensor([int(idxall[0])])

        #        print (iou,iou.shape,box_coordsorig,"index",idx)
        gtboxiou = box_coordsorig[idx]
        gtboxiou = torch.from_numpy(gtboxiou)

        tokens = tokenize_ques(self.dictionary, que)
        qfeat = torch.from_numpy(tokens).long()
        #tortal number of entries
        N = box_coordsorig.shape[0]
        Lvec = torch.zeros(N).long()
        Lvec[:L] = 1
        return sent_id,ans,box_feats,box_coordsorig,box_coords_6d.float(),\
                gtboxorig[0].float(),qfeat,Lvec,idx,correct.view(-1)
Exemplo n.º 3
0
Arquivo: plot_f.py Projeto: hyzcn/VQD
def saveimage(ent,boxes):
    
    image = ent['image_info']['file_name']
    if "train2014" in image:
        image = os.path.join('/home/manoj/train2014',image)
    else:
        image = os.path.join('/home/manoj/val2014',image)
        
    image_id = ent['image_id']
    sent_id = ent['sentence']['sent_id']
    npimg = Image.open(image)      
    plt.figure()
    plt.imshow(npimg)
    ansidx = ent['gtnms']
    
    scores = ent['scores']
    classify = ent['cls']
    clspred = ent['pred']
    
           
    cocogtbox = torch.tensor(ent['gtbox'])
    cocogtbox = convert_xywh_x1y1x2y2(cocogtbox.unsqueeze(0)).squeeze(0)
    predbox = torch.tensor(boxes[clspred]).unsqueeze(0)
    bottomupgtbox = torch.tensor(boxes[ansidx])
    
    iou_cocogt = getIOU(predbox,cocogtbox).item()
    iou_bottomupgt= getIOU(predbox,bottomupgtbox).item()
    
    for i in range(ent['L']):
       xmin,ymin,xmax,ymax  = boxes[i]
       x =[xmin,ymin,xmax,ymax]
       rect = retbox(x)
       alpha = np.abs(scores[i])/ np.max(np.abs(scores))
       #alpha = abs(scores[i])
       if i == ansidx:
           plt.plot(rect[:,0],rect[:,1],'g',linewidth=3.0)
           loc = (xmin,0.5*(ymin+ymax))
           plt.text(*loc,"{:.2f}, {:d}".format(scores[i],classify[i]),color='g', fontsize=8)

       if i == clspred:
           plt.plot(rect[:,0],rect[:,1],'r-.',linewidth=2.0)
           loc = (0.5*(xmin+xmax),0.5*(ymin+ymax))
           plt.text(*loc,"{:.2f}, {:d}".format(scores[i],classify[i]),color='r', fontsize=8)

       else:
           plt.plot(rect[:,0],rect[:,1],'y',alpha = alpha,linewidth=1.0)
           plt.text(xmin,ymin,"{:.2f}[{:d}]".format(scores[i],classify[i]),color='c', fontsize=7,alpha = alpha)
       
       
    cocogt = retbox(ent['gtbox'],format='xywh')
    plt.plot(cocogt[:,0],cocogt[:,1],'k',linewidth=3.0)          
           
    question = ent['sentence']['raw']
    imglast = image.split("/")[-1]
    plt.title("Pred index: {} .. G = GT, K = COCOGT , R = pred".format(clspred))
    plt.xlabel("{}".format(question))
    plt.ylabel("IOU COCO: {:.2f},BUP: {:.2f}".format(iou_cocogt,iou_bottomupgt))
    path = os.path.join(DIR,"ann_{}_{}".format(sent_id,imglast))
    if iou_cocogt >=0.5:
        plt.savefig(path,dpi=150)
    plt.close()
Exemplo n.º 4
0
    def __getitem__(self, idx):
        ent = self.data[idx]
        sent_id = ent['sentence']['sent_id']
        file_name = ent['image_info']['file_name']
        img_id = ent['image_id']
        ans = ent['category_id']  
        W = ent['image_info']['width']
        H = ent['image_info']['height']
        que = ent['sentence']['sent']                  
        gtbox = ent['gtbox']
        gtbox = torch.tensor(gtbox)
        #boxes from refcoc is in xywh format
        gtboxorig = convert_xywh_x1y1x2y2(gtbox.unsqueeze(0)).squeeze(0)
        box_coords = ent['boxes']
        box_coords = torch.tensor(box_coords)
        
        
        if 'train' in file_name:
            pk = pickle.load(open(osp.join("feats","train2014",file_name+".pkl"),"rb"))
        elif 'val' in file_name:
            pk = pickle.load(open(osp.join("feats","val2014",file_name+".pkl"),"rb"))    
            
        L =  len(pk) - 1 # lenght of entries in pickle file

        if L<=0:
            L = 1           
            
        N =  20  # maximum entries to use
        L = 20   # uncomment if you want variable L 
        if L>N:
            L=N   
            
        box_feats = np.zeros((N,2048),dtype=np.float32)    
        box_coords = np.zeros((N,4))
        for i,ent in enumerate(pk[:-1]):
            if i == N:
                break
            box_feats[i,:] = ent['feat'].flatten()
            box_coords[i,:] = np.array(ent['coords'])
            
        lastent = pk[-1]
        wholefeat = lastent['image']
        W = lastent['w']
        H = lastent['h']            
            
        box_feats = torch.from_numpy(box_feats)
        box_coordsorig = box_coords
        box_coords_6d = self._process_boxes(box_coords,W,H)       
        box_coords_6d  = torch.from_numpy(box_coords_6d)
        
        #boxes in h4files are in x1 y1 x2 y2 format
        iou = getIOU(gtboxorig.unsqueeze(0),torch.from_numpy(box_coordsorig).float())
        correct = iou>0.5
        _,idx = torch.max(iou,dim=0)
#        print (iou,iou.shape,box_coordsorig,"index",idx)
        gtboxiou = box_coordsorig[idx]
        gtboxiou = torch.from_numpy(gtboxiou)
        
        tokens = tokenize_ques(self.dictionary,que)
        qfeat = torch.from_numpy(tokens).long()
        Lvec = torch.zeros(N).long()
        Lvec[:L] = 1        
        return sent_id,ans,box_feats,box_coordsorig,box_coords_6d.float(),\
                    gtboxorig.float(),qfeat,Lvec,idx,correct.view(-1)
Exemplo n.º 5
0
    print(kid, len(qid2ent[kid]))

#%%


# box functions
def xywh_to_xyxy(boxes):
    """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
    return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))


def xyxy_to_xywh(boxes):
    """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
    return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))


acc = 0
L = len(testds.data)
for ent in testds.data:
    imgid = ent['image_id']
    gtbox_xywh = np.array([ent['gtbox']])
    boxes_xywh = np.array([b['box'] for b in qid2ent[imgid]])
    gtbox_xyxy = torch.from_numpy(xywh_to_xyxy(gtbox_xywh))
    boxes_xyxy = torch.from_numpy(xywh_to_xyxy(boxes_xywh))
    ious = eval_extra.getIOU(gtbox_xyxy, boxes_xyxy) > 0.5
    iou = ious.sum().item()
    if iou >= 1:
        acc += 1.0

print("\nAccuracy using Mattnet Boxes {:.2f}%".format(100 * acc / L))