def caption(self, im_path, gt_objects=None, gt_regions=None, thr=0.0, nms=False, top_N=100, clip=True, use_beam_search=False): image = cv2.imread(im_path) # print 'image.shape', image.shape im_data, im_scales = self.get_image_blob_noscale(image) # print 'im_data.shape', im_data.shape # print 'im_scales', im_scales if gt_objects is not None: gt_objects[:, :4] = gt_objects[:, :4] * im_scales[0] if gt_regions is not None: gt_regions[:, :4] = gt_regions[:, :4] * im_scales[0] im_info = np.array( [[im_data.shape[1], im_data.shape[2], im_scales[0]]], dtype=np.float32) # pdb.set_trace() region_result = self(im_data, im_info, gt_objects, gt_regions=gt_regions, use_beam_search=use_beam_search)[2] region_caption, bbox_pred, region_rois, logprobs = region_result[:] boxes = region_rois.data.cpu().numpy()[:, 1:5] / im_info[0][2] box_deltas = bbox_pred.data.cpu().numpy() pred_boxes = bbox_transform_inv_hdn(boxes, box_deltas) if clip: pred_boxes = clip_boxes(pred_boxes, image.shape) # print 'im_scales[0]', im_scales[0] return (region_caption.numpy(), logprobs.numpy(), pred_boxes)
def describe(self, im_path, top_N=10): image = cv2.imread(im_path) # print 'image.shape', image.shape im_data, im_scales = self.get_image_blob_noscale(image) # print 'im_data.shape', im_data.shape # print 'im_scales', im_scales im_info = np.array( [[im_data.shape[1], im_data.shape[2], im_scales[0]]], dtype=np.float32) object_result, predicate_result, region_result = self(im_data, im_info) object_boxes, object_scores, object_inds, sub_assignment, obj_assignment, predicate_inds, region_assignment = self.interpret_result( object_result[0], object_result[1], object_result[2], predicate_result[0], predicate_result[1], im_info, image.shape) region_caption, bbox_pred, region_rois, logprobs = region_result[:] boxes = region_rois.data.cpu().numpy()[:, 1:5] / im_info[0][2] box_deltas = bbox_pred.data.cpu().numpy() pred_boxes = bbox_transform_inv_hdn(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, image.shape) # print 'im_scales[0]', im_scales[0] return (region_caption.numpy(), logprobs.numpy(), pred_boxes, object_boxes, object_inds, object_scores, sub_assignment, obj_assignment, predicate_inds, region_assignment)
def map_eval(self, cls_prob, bbox_pred, rois, gt_boxes, im_info, max_per_image=100, score_thresh=0.05, overlap_thresh=0.5, nms=True, nms_thresh=0.6): classes_scores = [] # length = 150 classes_tf = [] classes_gt_num = [] image_scores = np.array([]) for j in range(1, self.n_classes_obj): scores = cls_prob.data.cpu().numpy() boxes = rois.data.cpu().numpy()[:, 1:5] / im_info[0][2] # boxes = rois.data.cpu().numpy()[:, 1:5] # Apply bounding-box regression deltas box_deltas = bbox_pred.data.cpu().numpy() pred_boxes = bbox_transform_inv_hdn(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im_info[0][:2] / im_info[0][2]) # pred_boxes = clip_boxes(pred_boxes, im_info[0][:2]) # May be there is a problem, for one roi could be assigned to many gt boxes # How to set the score thresh is a big problem cls_scores, cls_tp, cls_gt_num = \ image_cls_eval(scores[:, j], pred_boxes[:, j*4:(j+1)*4], gt_boxes, j, score_thresh=score_thresh, overlap_thresh=overlap_thresh, nms=nms, nms_thresh=nms_thresh) classes_scores += [cls_scores] classes_tf += [cls_tp] classes_gt_num += [cls_gt_num] image_scores = np.append(image_scores, cls_scores) # Limit to max_per_image detections *over all classes* if image_scores.size > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for k in range(self.n_classes_obj-1): keep = np.where(classes_scores[k] >= image_thresh) classes_scores[k] = classes_scores[k][keep] classes_tf[k] = classes_tf[k][keep] return classes_scores, classes_tf, classes_gt_num
def describe(self, im_data, im_info, top_N=[50]): # image = cv2.imread(im_path) # im_data, im_scales = self.get_image_blob_noscale(image) # im_data = torch.from_numpy(im_data) # im_info = np.array( # [[im_data.shape[1], im_data.shape[2], im_scales[0]]], # dtype=np.float32) image_shape = np.array( [im_data.shape[2], im_data.shape[3], im_data.shape[1]]) print 'im_data.shape', im_data.shape, image_shape # print 'im_info ', im_info end = time.time() object_result, predicate_result, region_result = self(im_data, im_info) print('Time taken for prediction: ', time.time() - end) # class_pred_boxes, class_scores, class_inds, subject_inds, object_inds, predicate_inds, region_list, predicate_scores\ # , subject_scores, object_scores, relationship_scores = self.interpret_result(object_result[0], object_result[1], object_result[2], \ # predicate_result[0], predicate_result[1], \ # im_info, image.shape, top_N = top_N[0]) class_pred_boxes, class_scores, class_inds, sub_list, obj_list, pred_list, predicate_inds, \ predicate_scores, region_list = self.interpret_result(object_result[0], object_result[1], object_result[2], \ predicate_result[0], predicate_result[1], \ im_info, image_shape, top_N = top_N[0]) region_caption, bbox_pred, region_rois, logprobs = region_result[:] boxes = region_rois.data.cpu().numpy()[:, 1:5] / im_info[0][2] box_deltas = bbox_pred.data.cpu().numpy() pred_boxes = bbox_transform_inv_hdn(boxes, box_deltas) region_pred_boxes = clip_boxes(pred_boxes, image_shape) # result = { # 'region_caption' : region_caption.numpy(), # 'region_list' : region_list, # 'region_box' : region_pred_boxes, # 'region_logprobs' : logprobs.numpy(), # 'classes': { # 'bbox': class_pred_boxes, # 'scores': class_scores, # 'class': class_inds # }, # 'relationships': { # 'Subject_indexes': subject_inds, # 'Object_indexes': object_inds # } # } # print 'im_scales[0]', im_scales[0] return region_caption.numpy( ), region_list, region_pred_boxes, logprobs.numpy( ), class_pred_boxes, class_scores, class_inds, sub_list, obj_list, pred_list, predicate_inds, predicate_scores
def evaluate(self, im_data, im_info, gt_regions, thr=0.5, nms=False, top_Ns=[100], use_gt_boxes=False, use_gt_regions=False, only_predicate=False): # if use_gt_boxes: # gt_boxes_object = gt_objects[:, :4] * im_info[2] # else: # gt_boxes_object = None # if use_gt_regions: # gt_boxes_regions = gt_regions[:, :4] * im_info[0] # else: # gt_boxes_regions = None #how to properly evaluate???? region_result = \ self(im_data, im_info, gt_regions=gt_regions, graph_generation=True) region_caption, bbox_pred, region_rois, logprobs = region_result[:] boxes = region_rois.data.cpu().numpy()[:, 1:5] / im_info[0][2] box_deltas = bbox_pred.data.cpu().numpy() pred_boxes = bbox_transform_inv_hdn(boxes, box_deltas) #have caption, predicted box at hand. how to evaluate using them?? #pred_boxes = clip_boxes(pred_boxes, image.shape) rel_correct_cnt, rel_cnt = check_recall(region_rois, gt_regions, 50) # cls_prob_object, bbox_object, object_rois = object_result[:3] # cls_prob_predicate, mat_phrase = predicate_result[:2] # interpret the model output # obj_boxes, obj_scores, obj_inds, subject_inds, object_inds, \ # subject_boxes, object_boxes, predicate_inds = \ # self.interpret_HDN(cls_prob_object, bbox_object, object_rois, # cls_prob_predicate, mat_phrase, im_info, # nms=nms, top_N=max(top_Ns), use_gt_boxes=use_gt_boxes) # # gt_objects[:, :4] /= im_info[0][2] # rel_cnt, rel_correct_cnt = check_relationship_recall(gt_objects, gt_relationships, # subject_inds, object_inds, predicate_inds, # subject_boxes, object_boxes, top_Ns, thres=thr, # only_predicate=only_predicate) return rel_cnt, rel_correct_cnt
def object_detection(self, image_path, gt_boxes=None): min_score = 1 / 150. image = cv2.imread(image_path) # print 'image.shape', image.shape im_data, im_scales = self.get_image_blob_noscale(image) if gt_boxes is not None: gt_boxes[:, :4] = gt_boxes[:, :4] * im_scales[0] # print 'im_data.shape', im_data.shape # print 'im_scales', im_scales im_info = np.array( [[im_data.shape[1], im_data.shape[2], im_scales[0]]], dtype=np.float32) object_result = self(im_data, im_info)[0] cls_prob_object, bbox_object, object_rois = object_result[:] prob_object = F.softmax(cls_prob_object) prob = prob_object.cpu().data.numpy() boxes = object_rois.data.cpu().numpy()[:, 1:5] / im_info[0][2] fg_id = np.where(prob > min_score) box_id = fg_id[0] cls_id = fg_id[1] box_id = box_id[cls_id > 0] cls_id = cls_id[cls_id > 0] box_deltas = bbox_object.data.cpu().numpy() new_box_delta = np.asarray([ box_deltas[box_id[i], (cls_id[i] * 4):(cls_id[i] * 4 + 4)] for i in range(len(cls_id)) ], dtype=np.float) regressed_boxes = bbox_transform_inv_hdn(boxes[box_id], new_box_delta) regressed_boxes = clip_boxes(regressed_boxes, image.shape) object_score = np.asarray( [prob[box_id[i], cls_id[i]] for i in range(len(cls_id))], dtype=np.float) # print 'im_scales[0]', im_scales[0] return (cls_id, object_score, regressed_boxes)
def interpret_result(self, cls_prob, bbox_pred, rois, cls_prob_predicate, mat_phrase, im_info, im_shape, nms=True, clip=True, min_score=0.01, use_gt_boxes=False): scores, inds = cls_prob[:, 0:].data.max(1) # inds += 1 scores, inds = scores.cpu().numpy(), inds.cpu().numpy() predicate_scores, predicate_inds = cls_prob_predicate[:, 0:].data.max(1) # predicate_inds += 1 predicate_scores, predicate_inds = predicate_scores.cpu().numpy( ), predicate_inds.cpu().numpy() keep = np.where((inds > 0) & (scores >= min_score)) scores, inds = scores[keep], inds[keep] # Apply bounding-box regression deltas keep = keep[0] box_deltas = bbox_pred.data.cpu().numpy()[keep] box_deltas = np.asarray([ box_deltas[i, (inds[i] * 4):(inds[i] * 4 + 4)] for i in range(len(inds)) ], dtype=np.float) boxes = rois.data.cpu().numpy()[keep, 1:5] / im_info[0][2] if use_gt_boxes: nms = False clip = False pred_boxes = boxes else: pred_boxes = bbox_transform_inv_hdn(boxes, box_deltas) if clip: pred_boxes = clip_boxes(pred_boxes, im_shape) # nms if nms and pred_boxes.shape[0] > 0: pred_boxes, scores, inds, keep_keep = nms_detections(pred_boxes, scores, 0.3, inds=inds) keep = keep[keep_keep] sub_list = np.array([], dtype=int) obj_list = np.array([], dtype=int) pred_list = np.array([], dtype=int) # print 'keep', keep # print 'mat_phrase', mat_phrase for i in range(mat_phrase.shape[0]): sub_id = np.where(keep == mat_phrase[i, 0])[0] obj_id = np.where(keep == mat_phrase[i, 1])[0] if len(sub_id) > 0 and len(obj_id) > 0: sub_list = np.append(sub_list, sub_id[0]) obj_list = np.append(obj_list, obj_id[0]) pred_list = np.append(pred_list, i) predicate_scores = predicate_scores.squeeze()[pred_list] final_list = predicate_scores.argsort()[::-1] predicate_inds = predicate_inds.squeeze()[pred_list[final_list]] sub_list = sub_list[final_list] obj_list = obj_list[final_list] region_list = mat_phrase[pred_list[final_list], 2:] return pred_boxes, scores, inds, sub_list, obj_list, predicate_inds, region_list
def interpret_result(self, cls_prob, bbox_pred, rois, cls_prob_predicate, mat_phrase, im_info, im_shape, nms=True, clip=True, min_score=0.5, top_N=50, use_gt_boxes=False): scores, inds = cls_prob[:, 1:].data.max(1) inds += 1 scores, inds = scores.cpu().numpy(), inds.cpu().numpy() predicate_scores, predicate_inds = cls_prob_predicate[:, 1:].data.max(1) predicate_inds += 1 predicate_scores, predicate_inds = predicate_scores.cpu().numpy( ), predicate_inds.cpu().numpy() keep = np.where((inds > 0) & (scores >= min_score)) scores, inds = scores[keep], inds[keep] # Apply bounding-box regression deltas keep = keep[0] box_deltas = bbox_pred.data.cpu().numpy()[keep] box_deltas = np.asarray([ box_deltas[i, (inds[i] * 4):(inds[i] * 4 + 4)] for i in range(len(inds)) ], dtype=np.float) boxes = rois.data.cpu().numpy()[keep, 1:5] / im_info[0][2] if use_gt_boxes: nms = False clip = False pred_boxes = boxes else: pred_boxes = bbox_transform_inv_hdn(boxes, box_deltas) if clip: #print 'clipping pred boxes', im_shape pred_boxes = clip_boxes(pred_boxes, im_shape) # nms if nms and pred_boxes.shape[0] > 0: pred_boxes, scores, inds, keep_keep = nms_detections(pred_boxes, scores, 0.25, inds=inds) keep = keep[keep_keep] sub_list = np.array([], dtype=int) obj_list = np.array([], dtype=int) pred_list = np.array([], dtype=int) # print 'keep', keep # print 'mat_phrase', mat_phrase for i in range(mat_phrase.shape[0]): sub_id = np.where(keep == mat_phrase[i, 0])[0] obj_id = np.where(keep == mat_phrase[i, 1])[0] if len(sub_id) > 0 and len(obj_id) > 0: sub_list = np.append(sub_list, sub_id[0]) obj_list = np.append(obj_list, obj_id[0]) pred_list = np.append(pred_list, i) region_list = mat_phrase[pred_list, 2:] # predicate_scores = predicate_scores.squeeze()[pred_list] # subject_scores = scores[sub_list].squeeze() # object_scores = scores[obj_list].squeeze() # relationship_scores = predicate_scores * subject_scores * object_scores # #top_N_list = relationship_scores.argsort()[::-1][:top_N] # final_list = relationship_scores.argsort()[::-1][:top_N] # predicate_inds = predicate_inds.squeeze()[pred_list[final_list]] # sub_list = sub_list[final_list] # obj_list = obj_list[final_list] # subject_inds = inds[sub_list] # object_inds = inds[obj_list] # #inds = inds[final_list] # #scores = scores[final_list] # predicate_scores = predicate_scores[final_list] # subject_scores = subject_scores[final_list] # object_scores = object_scores[final_list] # relationship_scores = relationship_scores[final_list] #print ('Pred_list : ', pred_list.shape,' Total Scores: ',scores.shape, ' Keep_size: ', keep.shape) return pred_boxes, scores, inds, sub_list, obj_list, pred_list, predicate_inds, predicate_scores, region_list
def interpret_HDN(self, cls_prob, bbox_pred, rois, cls_prob_predicate, mat_phrase, im_info, nms=True, clip=True, min_score=0.0, top_N=100, use_gt_boxes=False): scores, inds = cls_prob[:, 1:].data.max(1) inds += 1 scores, inds = scores.cpu().numpy(), inds.cpu().numpy() #print 'cls_prob_predicate : ',cls_prob_predicate predicate_scores, predicate_inds = cls_prob_predicate[:, 1:].data.max(1) predicate_inds += 1 predicate_scores, predicate_inds = predicate_scores.cpu().numpy( ), predicate_inds.cpu().numpy() keep = np.where((inds > 0) & (scores >= min_score)) scores, inds = scores[keep], inds[keep] # Apply bounding-box regression deltas keep = keep[0] box_deltas = bbox_pred.data.cpu().numpy()[keep] box_deltas = np.asarray([ box_deltas[i, (inds[i] * 4):(inds[i] * 4 + 4)] for i in range(len(inds)) ], dtype=np.float) boxes = rois.data.cpu().numpy()[keep, 1:5] / im_info[0][2] if use_gt_boxes: nms = False clip = False pred_boxes = boxes else: pred_boxes = bbox_transform_inv_hdn(boxes, box_deltas) if clip: #print 'clipping pred boxes', im_info[0][:2] / im_info[0][2] pred_boxes = clip_boxes(pred_boxes, im_info[0][:2] / im_info[0][2]) # nms if nms and pred_boxes.shape[0] > 0: pred_boxes, scores, inds, keep_keep = nms_detections(pred_boxes, scores, 0.3, inds=inds) keep = keep[keep_keep] sub_list = np.array([], dtype=int) obj_list = np.array([], dtype=int) pred_list = np.array([], dtype=int) # print 'keep', keep # print 'mat_phrase', mat_phrase for i in range(mat_phrase.shape[0]): sub_id = np.where(keep == mat_phrase[i, 0])[0] obj_id = np.where(keep == mat_phrase[i, 1])[0] if len(sub_id) > 0 and len(obj_id) > 0: #print ('Sub id : ', sub_id, ', obj id: ', obj_id, ', Predicate: ', i) sub_list = np.append(sub_list, sub_id[0]) obj_list = np.append(obj_list, obj_id[0]) pred_list = np.append(pred_list, i) total_scores = predicate_scores.squeeze()[pred_list] \ * scores[sub_list].squeeze() * scores[obj_list].squeeze() top_N_list = total_scores.argsort()[::-1][:top_N] predicate_inds = predicate_inds.squeeze()[pred_list[top_N_list]] subject_boxes = pred_boxes[sub_list[top_N_list]] object_boxes = pred_boxes[obj_list[top_N_list]] predicate_scores = predicate_scores.squeeze()[pred_list] subject_inds = inds[sub_list[top_N_list]] object_inds = inds[obj_list[top_N_list]] return pred_boxes, scores, inds, subject_inds, object_inds, subject_boxes, object_boxes, predicate_inds, total_scores
def interpret_RMRPN(self, cls_prob_object, bbox_pred_object, rois_object, cls_prob_predicate, bbox_pred_predicate, rois_predicate, mat_phrase, rpn_scores_object, im_info, nms=True, clip=True, min_score=0.0, top_N=100, use_gt_boxes=False, use_rpn_scores=False): scores, inds = cls_prob_object[:, 1:].data.max(1) inds += 1 scores, inds = scores.cpu().numpy(), inds.cpu().numpy() predicate_scores, predicate_inds = cls_prob_predicate[:, 1:].data.max(1) predicate_inds += 1 predicate_scores, predicate_inds = predicate_scores.cpu().numpy(), predicate_inds.cpu().numpy() keep = np.where((inds > 0) & (scores >= min_score)) scores, inds = scores[keep], inds[keep] # Apply bounding-box regression deltas keep = keep[0] box_deltas = bbox_pred_object.data.cpu().numpy()[keep] box_deltas = np.asarray([ box_deltas[i, (inds[i] * 4): (inds[i] * 4 + 4)] for i in range(len(inds)) ], dtype=np.float) boxes = rois_object.data.cpu().numpy()[keep, 1:5] / im_info[0][2] if use_gt_boxes: nms = False clip = False pred_boxes = boxes else: pred_boxes = bbox_transform_inv_hdn(boxes, box_deltas) if clip: pred_boxes = clip_boxes(pred_boxes, im_info[0][:2] / im_info[0][2]) # nms if nms and pred_boxes.shape[0] > 0: pred_boxes, scores, inds, keep_keep = nms_detections(pred_boxes, scores, 0.60, inds=inds) keep = keep[keep_keep] sub_list = np.array([], dtype=int) obj_list = np.array([], dtype=int) pred_list = np.array([], dtype=int) # print 'keep', keep # print 'mat_phrase', mat_phrase # keep predicate(phrase) whose sub & obj kept for i in range(mat_phrase.shape[0]): sub_id = np.where(keep == mat_phrase[i, 0])[0] obj_id = np.where(keep == mat_phrase[i, 1])[0] if len(sub_id) > 0 and len(obj_id) > 0: sub_list = np.append(sub_list, sub_id[0]) obj_list = np.append(obj_list, obj_id[0]) pred_list = np.append(pred_list, i) # keep_inds = np.full(inds.size, -1) # keep_inds[keep] = np.arange(keep.size) # sub_list_n = keep_inds[mat_phrase[i, 0]] # obj_list_n = keep_inds[mat_phrase[i, 1]] # keep_pred_inds = np.where((sub_list_n > -1) & (obj_list_n > -1))[0] # sub_list_n = sub_list[keep_pred_inds] # obj_list_n = obj_list[keep_pred_inds] predicate_scores = predicate_scores[pred_list] predicate_inds = predicate_inds[pred_list] box_deltas_predicate = bbox_pred_predicate.data.cpu().numpy()[pred_list] box_deltas_predicate = np.asarray([ box_deltas_predicate[i, (predicate_inds[i] * 4) : (predicate_inds[i] * 4 + 4)] for i in range(len(predicate_inds)) ], dtype=np.float) boxes_predicate = rois_predicate.data.cpu().numpy()[pred_list, 1:5] / im_info[0][2] pred_boxes_predicate = bbox_transform_inv_hdn(boxes_predicate, box_deltas_predicate) if nms and pred_boxes_predicate.shape[0] > 0: pred_boxes_predicate, predicate_scores, predicate_inds, keep_pred_list = \ nms_detections(pred_boxes_predicate, predicate_scores, 0.60, inds=predicate_inds) sub_list = sub_list[keep_pred_list] obj_list = obj_list[keep_pred_list] if use_rpn_scores: total_scores = predicate_scores.squeeze() \ * scores[sub_list].squeeze() * scores[obj_list].squeeze() \ * rpn_scores_object[sub_list].squeeze() * rpn_scores_object[obj_list].squeeze() else: total_scores = predicate_scores.squeeze() \ * scores[sub_list].squeeze() * scores[obj_list].squeeze() # keep top N phrase top_N_list = total_scores.argsort()[::-1][:top_N] predicate_inds = predicate_inds[top_N_list] pred_boxes_predicate = pred_boxes_predicate[top_N_list] subject_inds = inds[sub_list[top_N_list]] object_inds = inds[obj_list[top_N_list]] subject_boxes = pred_boxes[sub_list[top_N_list]] object_boxes = pred_boxes[obj_list[top_N_list]] return pred_boxes, scores, inds, subject_inds, object_inds, subject_boxes, object_boxes, \ predicate_inds, pred_boxes_predicate