def boxes_to_masks(self, img_path, boxes, labels): """ Arguments: - img_path: img_file - boxes : ndaray [[xyxy]] (n, 4) in original image - labels : ndarray (n, ) Return: - masks : (n, ih, iw) uint8 [0,1] - rles : list of rle instance """ im = cv2.imread(img_path) blobs, im_scales = self._get_blobs(im) im_blob = blobs['data'] # (1, iH, iW, 3) blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) # forward self.net.test_image(blobs['data'], blobs['im_info']) # net_conv net_conv = self.net._predictions['net_conv'] # run mask_prob = self.net._predict_masks_from_boxes_and_labels( net_conv, boxes * im_scales[0], labels) mask_prob = mask_prob.data.cpu().numpy() masks = recover_masks(mask_prob, boxes, im.shape[0], im.shape[1]) # (N, ih, iw) uint8 [0-255] masks = (masks > 122.).astype(np.uint8) # (N, ih, iw) uint8 [0,1] # encode to rles rles = [] for m in masks: rle = COCOmask.encode(np.asfortranarray(m)) rles += [rle] return masks, rles
def forward_image(self, img_path, nms_thresh=.3, conf_thresh=.65): """ Arguments: - img_path : path to image - nms_thresh : nms threshold - conf_thresh: confidence threshold [0,1] Return "data" is a dict of - det_ids: list of det_ids, order consistent with dets and masks - dets : [{det_id, box, category_name, category_id, score}], box is [xywh] and category_id is coco_cat_id - masks : ndarray (n, im_h, im_w) uint8 [0,1] - Feats : - pool5 : Variable cuda (n, 1024, 7, 7) - fc7 : Variable cuda (n, 2048, 7, 7) - lfeats : Variable cuda (n, 5) - dif_lfeats: Variable cuda (n, 5*topK) - cxt_fc7 : Variable cuda (n, topK, 2048) - cxt_lfeats: Variable cuda (n, topK, 5) - cxt_det_ids : list of [surrounding_det_ids] for each det_id """ # read image im = imread(img_path) # 1st step: detect objects scores, boxes = self.mrcn.predict(img_path) # get head feats, i.e., net_conv # Variable cuda (1, 1024, h, w) net_conv = self.mrcn.net._predictions['net_conv'] im_info = self.mrcn.net._im_info # [[H, W, im_scale]] # get cls_to_dets, class_name -> [xyxys] which is (n, 5) cls_to_dets, num_dets = self.cls_to_detections(scores, boxes, nms_thresh, conf_thresh) # make sure num_dets > 0 thresh = conf_thresh while num_dets == 0: thresh -= 0.1 cls_to_dets, num_dets = self.cls_to_detections( scores, boxes, nms_thresh, thresh) # add to dets dets = [] det_id = 0 for category_name, detections in cls_to_dets.items(): # detections: list of (n, 5), [xyxyc] for detection in detections: x1, y1, x2, y2, sc = detection det = { 'det_id': det_id, 'box': [x1, y1, x2 - x1 + 1, y2 - y1 + 1], 'category_name': category_name, 'category_id': self.imdb._class_to_coco_cat_id[category_name], 'score': sc } dets += [det] det_id += 1 Dets = {det['det_id']: det for det in dets} det_ids = [det['det_id'] for det in dets] # 2nd step: get masks boxes = xywh_to_xyxy(np.array([det['box'] for det in dets ])) # xyxy (n, 4) ndarray labels = np.array( [self.imdb._class_to_ind[det['category_name']] for det in dets]) mask_prob = self.mrcn.net._predict_masks_from_boxes_and_labels( net_conv, boxes * im_info[0][2], labels) mask_prob = mask_prob.data.cpu().numpy() # (N, ih, iw) uint8 [0-255] masks = recover_masks(mask_prob, boxes, im.shape[0], im.shape[1]) masks = (masks > 122.).astype(np.uint8) # (N, ih, iw) uint8 [0,1] # 3rd step: compute features pool5, fc7 = self.mrcn.box_to_spatial_fc7( net_conv, im_info, boxes) # (n, 1024, 7, 7), (n, 2048, 7, 7) lfeats = self.compute_lfeats(det_ids, Dets, im) dif_lfeats = self.compute_dif_lfeats(det_ids, Dets) cxt_fc7, cxt_lfeats, cxt_det_ids = self.fetch_cxt_feats( det_ids, Dets, fc7, self.model_opt) # move to Variable cuda lfeats = Variable(torch.from_numpy(lfeats).cuda()) dif_lfeats = Variable(torch.from_numpy(dif_lfeats).cuda()) cxt_lfeats = Variable(torch.from_numpy(cxt_lfeats).cuda()) # return data = {} data['det_ids'] = det_ids data['dets'] = dets data['masks'] = masks data['cxt_det_ids'] = cxt_det_ids data['Feats'] = { 'pool5': pool5, 'fc7': fc7, 'lfeats': lfeats, 'dif_lfeats': dif_lfeats, 'cxt_fc7': cxt_fc7, 'cxt_lfeats': cxt_lfeats } return data
def test_net(net, imdb, weights_filename, max_per_image=100, thresh=0.): np.random.seed(cfg.RNG_SEED) """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] # all_rles[cls][image] = [rle] array of N rles all_rles = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] output_dir = get_output_dir(imdb, weights_filename) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} for i in range(num_images): im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes, net_conv, im_scale = im_detect(net, im) # (n, 81), (n, 81*4), (n, 1024, H, W), float _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in range(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(torch.from_numpy(cls_dets), cfg.TEST.NMS).numpy() if cls_dets.size > 0 else [] cls_dets = cls_dets[keep, :] all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] # run mask branch on all_boxes[:][i] accumulated_boxes = [] accumulated_labels = [] for j in range(1, imdb.num_classes): if all_boxes[j][i].shape[0] > 0: accumulated_boxes += [all_boxes[j][i][:, :4]] accumulated_labels += [j]*all_boxes[j][i].shape[0] accumulated_boxes = np.vstack(accumulated_boxes) # acculuate max_per_image boxes [xyxy] accumulated_labels = np.array(accumulated_labels, dtype=np.uint8) # n category labels mask_prob = net._predict_masks_from_boxes_and_labels(net_conv, accumulated_boxes * im_scale, # scaled boxes [xyxy] accumulated_labels) # (n, 14, 14) mask_prob = mask_prob.data.cpu().numpy() # convert to numpy masks = recover_masks(mask_prob, accumulated_boxes, im.shape[0], im.shape[1]) # (n, ih, iw) uint8 [0,1] masks = (masks > 122.).astype(np.uint8) # (n, ih, iw) uint8 [0,1] original size # add to all_rles rles = [COCOmask.encode(np.asfortranarray(m)) for m in masks] ri = 0 for j in range(1, imdb.num_classes): ri_next = ri+all_boxes[j][i].shape[0] all_rles[j][i] = rles[ri:ri_next] assert len(all_rles[j][i]) == all_boxes[j][i].shape[0] ri = ri_next _t['misc'].toc() print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time(), _t['misc'].average_time())) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') imdb.evaluate_detections(all_boxes, all_rles, output_dir)
def eval_split(loader, model, crit, split, opt, max_per_image=100, thresh=0.): verbose = opt.get('verbose', True) num_sents = opt.get('num_sents', -1) #assert split != 'train', 'Check the evaluation split. (comment this line if you are evaluating [train])' # set mode model.eval() # initialize n = 0 loss_evals = 0 acc = 0 num_sent = 0 #predictions = [] finish_flag = False #num_refs = {'train': 42404, 'val': 3811, 'testA': 1975, 'testB': 1810} #### RefCOCO #print('num_refs:', num_refs[split]) # all detections are collected into: # all_boxes[sent][cls] = N x 5 array of detections in # (x1, y1, x2, y2, score) #all_boxes = [[] for _ in range(81)] #all_boxes = [[[] for _ in range(81)] # for _ in range(num_refs[split])] # all_rles[sent][cls] = [rle] array of N rles #all_rles = [[[] for _ in range(81)] # for _ in range(num_refs[split])] cum_I, cum_U = 0, 0 eval_seg_iou_list = [.5, .6, .7, .8, .9] seg_correct = np.zeros(len(eval_seg_iou_list), dtype=np.int32) seg_total = 0 while True: #data = loader.getTestBatch(split, opt) #det_ids = data['det_ids'] #sent_ids = data['sent_ids'] #Feats = data['Feats'] #labels = data['labels'] data = loader.getTestBatch(split) image = data['data'] im_info = data['im_info'] gt_boxes = data['gt_boxes'] # scaled gt_masks = data['gt_masks'] labels = data['labels'] file_name = data['file_name'] bounds = data['bounds'] blobs = {} blobs['data'] = image blobs['im_info'] = im_info blobs['file_name'] = file_name blobs['bounds'] = bounds ##print('------------------------------------') #for i, sent_id in enumerate(sent_ids): for i in range(labels.shape[0]): blobs['gt_boxes'] = gt_boxes[i:i+1, :] blobs['gt_masks'] = gt_masks[i:i+1, :, :] label = labels[i:i+1, :] max_len = (label != 0).sum().data[0] blobs['labels'] = label[:, :max_len] # (1, max_len) blobs['sent_id'] = i scores, boxes, net_conv, im_scale = im_detect(model, blobs) # (n, 81), (n, 81*4), (n, 1024, H, W), float pred = np.where(scores == np.max(scores[:,1:])) pred_roi = pred[0][0] pred_class = pred[1][0] pred_box = boxes[pred_roi, pred_class*4:(pred_class+1)*4] #print('pred_box:', pred_box, 'pred_class:', pred_class) #print('scores:', scores.shape) # (266, 81) (300, 81) #print('boxes:', boxes.shape) # (266, 324) (300, 324) not scaled #print('net_conv:', net_conv.shape) # (1L, 1024L, 37L, 63L) (1L, 1024L, 37L, 63L) #print('im_scale:', im_scale) # 2.0 2.0 """ # skip j = 0, because it's the background class for j in range(1, 81): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(torch.from_numpy(cls_dets), cfg.TEST.NMS).numpy() if cls_dets.size > 0 else [] cls_dets = cls_dets[keep, :] all_boxes[j] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][:, -1] for j in range(1, 81)]) ##print('--------') if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] image_highest = np.max(image_scores) for j in range(1, 81): keep = np.where(all_boxes[j][:, -1] >= image_thresh)[0] all_boxes[j] = all_boxes[j][keep, :] # choose largest 100 boxes for image i for k in range(len(all_boxes[j])): if all_boxes[j][k, -1] >= image_highest: pred_box = all_boxes[j][k][:4] pred_class = j print('pred_box:', pred_box, 'pred_class:', pred_class) """ gt_box = blobs['gt_boxes'][0, :4] / im_scale iou = computeIoU_box(pred_box, gt_box) # both original size ##print('IoU:', iou * 100) if iou >= 0.5: acc += 1 loss_evals += 1 # run mask branch on all_boxes[i][:] accumulated_boxes = np.array([pred_box]) accumulated_labels = np.array([pred_class]) #accumulated_boxes = [] #accumulated_labels = [] #for j in range(1, 81): # if all_boxes[i][j].shape[0] > 0: # accumulated_boxes += [all_boxes[i][j][:, :4]] # accumulated_labels += [j]*all_boxes[i][j].shape[0] #accumulated_boxes = np.vstack(accumulated_boxes) # accumulate max_per_image boxes [xyxy] (100, 4) #accumulated_labels = np.array(accumulated_labels, dtype=np.uint8) # n category labels mask_prob = model._predict_masks_from_boxes_and_labels(net_conv, accumulated_boxes * im_scale, # scaled boxes [xyxy] accumulated_labels) # (n, 14, 14) mask_prob = mask_prob.data.cpu().numpy() # convert to numpy #print('accumulated_boxes:', accumulated_boxes, accumulated_boxes.shape) # (100, 4) #print('accumulated_labels:', accumulated_labels, accumulated_labels.shape) # (100,) #print('mask_prob:', np.min(mask_prob), np.max(mask_prob), mask_prob.shape) # 0~1 float (100, 14, 14) #print('size:', int(round(blobs['im_info'][0][0]/im_scale)), int(round(blobs['im_info'][0][1]/im_scale))) pred_mask = recover_masks(mask_prob, accumulated_boxes, int(round(blobs['im_info'][0][0]/im_scale)), int(round(blobs['im_info'][0][1]/im_scale))) # (n, ih, iw) uint8 [0,1] #print('pred_mask 0:', np.unique(pred_mask), pred_mask.shape) # 0~255 int (100, 294, 500) pred_mask = np.squeeze((pred_mask > 122.).astype(np.uint8), axis=0) # (n, ih, iw) uint8 [0,1] original size #print('pred_mask 1:', np.unique(pred_mask), pred_mask.shape) # add to all_rles #rles = [COCOmask.encode(np.asfortranarray(m)) for m in pred_mask] #ri = 0 #for j in range(1, 81): # ri_next = ri+all_boxes[i][j].shape[0] # all_rles[i][j] = rles[ri:ri_next] # assert len(all_rles[i][j]) == all_boxes[i][j].shape[0] # ri = ri_next gt_mask = imresize(np.squeeze(blobs['gt_masks'], axis=0), size=pred_mask.shape, interp='nearest') # compute iou I, U = computeIoU_seg(pred_mask, gt_mask) cum_I += I cum_U += U for n_eval_iou in range(len(eval_seg_iou_list)): eval_seg_iou = eval_seg_iou_list[n_eval_iou] seg_correct[n_eval_iou] += (I*1.0/U >= eval_seg_iou) seg_total += 1 # add info #entry = {} #entry['file_name'] = file_name #entry['sent'] = loader.decode_labels(blobs['labels'].data.cpu().numpy())[0] # gd-truth sent #entry['gt_box'] = gt_box #entry['pred_box'] = pred_box #predictions.append(entry) ##print(i, ':', entry['sent']) num_sent += 1 # if used up if num_sents > 0 and loss_evals >= num_sents: finish_flag = True break """ # add back mean image_vis = image + cfg.PIXEL_MEANS image_vis = imresize(image_vis[0], np.round(im_info[0][:2] / im_info[0][2])) # assume we only have 1 image # BGR to RGB (opencv uses BGR) image_vis = image_vis[np.newaxis, :,:,::-1].copy(order='C') pred_box_vis = np.append(pred_box * im_scale, pred_class) pred_box_vis = np.expand_dims(pred_box_vis, axis=0) #print('image_vis:', image_vis.shape) #print('gt_boxes:', blobs['gt_boxes'], blobs['gt_boxes'].shape) #print('pred_box:', pred_box_vis, pred_box_vis.shape) #print('im_info:', im_info, im_info.shape) box_gt = draw_bounding_boxes(image_vis.copy(), blobs['gt_boxes'], im_info) box_pred = draw_bounding_boxes(image_vis.copy(), pred_box_vis, im_info) image_box_gt = Image.fromarray(box_gt[0, :]) image_box_pred = Image.fromarray(box_pred[0, :]) box_dir = 'result_box' if not os.path.exists(box_dir): os.makedirs(box_dir) image_box_gt.save('{}/{}_{}_box_gt.png'.format(box_dir, file_name[:-4], i)) image_box_pred.save('{}/{}_{}_box_pred.png'.format(box_dir, file_name[:-4], i)) # gt seg seg_gt = Image.fromarray(gt_mask*255).convert('L') seg_gt_c = np.array(seg_gt.filter(ImageFilter.CONTOUR)) seg_gt_c = np.expand_dims(seg_gt_c, axis=2) seg_gt_c = np.concatenate((seg_gt_c, seg_gt_c, seg_gt_c), axis=2) seg_gt = np.array(seg_gt) seg_gt = np.expand_dims(seg_gt, axis=2) image_seg_gt = np.squeeze(box_gt, axis=0) + 0.5 * np.concatenate((seg_gt, seg_gt*0, seg_gt*0), axis=2) image_seg_gt[seg_gt_c==0] = 255 image_seg_gt[image_seg_gt>255] = 255 image_seg_gt = Image.fromarray(image_seg_gt.astype('uint8')) # pred seg seg_pred = Image.fromarray(pred_mask*255).convert('L') seg_pred_c = np.array(seg_pred.filter(ImageFilter.CONTOUR)) seg_pred_c = np.expand_dims(seg_pred_c, axis=2) seg_pred_c = np.concatenate((seg_pred_c, seg_pred_c, seg_pred_c), axis=2) seg_pred = np.array(seg_pred) seg_pred = np.expand_dims(seg_pred, axis=2) image_seg_pred = np.squeeze(box_pred, axis=0) + 0.5 * np.concatenate((seg_pred, seg_pred*0, seg_pred*0), axis=2) image_seg_pred[seg_pred_c==0] = 255 image_seg_pred[image_seg_pred>255] = 255 image_seg_pred = Image.fromarray(image_seg_pred.astype('uint8')) # save seg seg_dir = 'result_box_seg' if not os.path.exists(seg_dir): os.makedirs(seg_dir) image_seg_gt.save('{}/{:0>5d}_{}_{}_seg_gt.png'.format(seg_dir, loader.iterators[split], file_name[:-4], i)) image_seg_pred.save('{}/{}_{}_seg_pred.png'.format(seg_dir, file_name[:-4], i)) """ torch.cuda.empty_cache() # print ix0 = bounds['it_pos_now'] ix1 = bounds['it_max'] if verbose: print('evaluating [%s] ... image[%d/%d]\'s sents, det acc=%.2f%%, seg acc=%.2f%%, seg IoU=%.2f%%' % \ (split, ix0, ix1, acc*100.0/loss_evals, seg_correct[0]*100.0/seg_total, cum_I*100.0/cum_U)) # if we wrapped around the split if finish_flag or bounds['wrapped']: break return acc/loss_evals, eval_seg_iou_list, seg_correct, seg_total, cum_I, cum_U, num_sent