def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode): debug_dir = os.path.join(result_dir, "debug") if not os.path.exists(debug_dir): os.makedirs(debug_dir) if db.split != "trainval": db_inds = db.db_inds[:100] if debug else db.db_inds else: db_inds = db.db_inds[:100] if debug else db.db_inds[:5000] num_images = db_inds.size K = db.configs["top_k"] ae_threshold = db.configs["ae_threshold"] nms_kernel = db.configs["nms_kernel"] scales = db.configs["test_scales"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] categories = db.configs["categories"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] top_bboxes = {} for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"): db_ind = db_inds[ind] image_id = db.image_ids(db_ind) image_file = db.image_file(db_ind) image = cv2.imread(image_file) height, width = image.shape[0:2] detections = [] center_points = [] for scale in scales: new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) dets, center = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel) dets = dets.reshape(2, -1, 8) center = center.reshape(2, -1, 4) dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] center[1, :, [0]] = out_width - center[1, :, [0]] dets = dets.reshape(1, -1, 8) center = center.reshape(1, -1, 4) _rescale_dets(dets, ratios, borders, sizes) center[..., [0]] /= ratios[:, 1][:, None, None] center[..., [1]] /= ratios[:, 0][:, None, None] center[..., [0]] -= borders[:, 2][:, None, None] center[..., [1]] -= borders[:, 0][:, None, None] np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]]) np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]]) dets[:, :, 0:4] /= scale center[:, :, 0:2] /= scale if scale == 1: center_points.append(center) detections.append(dets) detections = np.concatenate(detections, axis=1) center_points = np.concatenate(center_points, axis=1) classes = detections[..., -1] classes = classes[0] detections = detections[0] center_points = center_points[0] valid_ind = detections[:, 4] > -1 valid_detections = detections[valid_ind] box_width = valid_detections[:, 2] - valid_detections[:, 0] box_height = valid_detections[:, 3] - valid_detections[:, 1] s_ind = (box_width * box_height <= 22500) l_ind = (box_width * box_height > 22500) s_detections = valid_detections[s_ind] l_detections = valid_detections[l_ind] s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3 s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3 s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3 s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3 s_temp_score = copy.copy(s_detections[:, 4]) s_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] s_left_x = s_left_x[np.newaxis, :] s_right_x = s_right_x[np.newaxis, :] s_top_y = s_top_y[np.newaxis, :] s_bottom_y = s_bottom_y[np.newaxis, :] ind_lx = (center_x - s_left_x) > 0 ind_rx = (center_x - s_right_x) < 0 ind_ty = (center_y - s_top_y) > 0 ind_by = (center_y - s_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0 ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_s_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score], axis=0) s_detections[:, 4][ind_s_new_score] = ( s_temp_score[ind_s_new_score] * 2 + center_points[index_s_new_score, 3]) / 3 l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5 l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5 l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5 l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5 l_temp_score = copy.copy(l_detections[:, 4]) l_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] l_left_x = l_left_x[np.newaxis, :] l_right_x = l_right_x[np.newaxis, :] l_top_y = l_top_y[np.newaxis, :] l_bottom_y = l_bottom_y[np.newaxis, :] ind_lx = (center_x - l_left_x) > 0 ind_rx = (center_x - l_right_x) < 0 ind_ty = (center_y - l_top_y) > 0 ind_by = (center_y - l_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0 ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_l_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0) l_detections[:, 4][ind_l_new_score] = ( l_temp_score[ind_l_new_score] * 2 + center_points[index_l_new_score, 3]) / 3 detections = np.concatenate([l_detections, s_detections], axis=0) detections = detections[np.argsort(-detections[:, 4])] classes = detections[..., -1] #for i in range(detections.shape[0]): # box_width = detections[i,2]-detections[i,0] # box_height = detections[i,3]-detections[i,1] # if box_width*box_height<=22500 and detections[i,4]!=-1: # left_x = (2*detections[i,0]+1*detections[i,2])/3 # right_x = (1*detections[i,0]+2*detections[i,2])/3 # top_y = (2*detections[i,1]+1*detections[i,3])/3 # bottom_y = (1*detections[i,1]+2*detections[i,3])/3 # temp_score = copy.copy(detections[i,4]) # detections[i,4] = -1 # for j in range(center_points.shape[0]): # if (classes[i] == center_points[j,2])and \ # (center_points[j,0]>left_x and center_points[j,0]< right_x) and \ # ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)): # detections[i,4] = (temp_score*2 + center_points[j,3])/3 # break # elif box_width*box_height > 22500 and detections[i,4]!=-1: # left_x = (3*detections[i,0]+2*detections[i,2])/5 # right_x = (2*detections[i,0]+3*detections[i,2])/5 # top_y = (3*detections[i,1]+2*detections[i,3])/5 # bottom_y = (2*detections[i,1]+3*detections[i,3])/5 # temp_score = copy.copy(detections[i,4]) # detections[i,4] = -1 # for j in range(center_points.shape[0]): # if (classes[i] == center_points[j,2])and \ # (center_points[j,0]>left_x and center_points[j,0]< right_x) and \ # ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)): # detections[i,4] = (temp_score*2 + center_points[j,3])/3 # break # reject detections with negative scores keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] top_bboxes[image_id] = {} for j in range(categories): keep_inds = (classes == j) top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype( np.float32) if merge_bbox: soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm) top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5] scores = np.hstack( [top_bboxes[image_id][j][:, -1] for j in range(1, categories + 1)]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh) top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds] if debug: image_file = db.image_file(db_ind) image = cv2.imread(image_file) im = image[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) fig = ax.imshow(im, aspect='equal') plt.axis('off') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) #bboxes = {} for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] >= 0.4) cat_name = db.class_name(j) for bbox in top_bboxes[image_id][j][keep_inds]: bbox = bbox[0:4].astype(np.int32) xmin = bbox[0] ymin = bbox[1] xmax = bbox[2] ymax = bbox[3] #if (xmax - xmin) * (ymax - ymin) > 5184: ax.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor=colours[j - 1], linewidth=4.0)) ax.text(xmin + 1, ymin - 3, '{:s}'.format(cat_name), bbox=dict(facecolor=colours[j - 1], ec='black', lw=2, alpha=0.5), fontsize=15, color='white', weight='bold') debug_file1 = os.path.join(debug_dir, "{}.pdf".format(db_ind)) debug_file2 = os.path.join(debug_dir, "{}.jpg".format(db_ind)) plt.savefig(debug_file1) plt.savefig(debug_file2) plt.close() #cv2.imwrite(debug_file, image, [int(cv2.IMWRITE_JPEG_QUALITY), 100]) result_json = os.path.join(result_dir, "results.json") detections = db.convert_to_coco(top_bboxes) with open(result_json, "w") as f: json.dump(detections, f) cls_ids = list(range(1, categories + 1)) image_ids = [db.image_ids(ind) for ind in db_inds] db.evaluate(result_json, cls_ids, image_ids) return 0
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode): result_json = os.path.join(result_dir, "results.json") debug_dir = os.path.join(result_dir, "debug") if not os.path.exists(debug_dir): os.makedirs(debug_dir) if db.split != "trainval": db_inds = db.db_inds[:100] if debug else db.db_inds else: db_inds = db.db_inds[:100] if debug else db.db_inds[:5000] num_images = db_inds.size K = db.configs["top_k"] ae_threshold = db.configs["ae_threshold"] nms_kernel = db.configs["nms_kernel"] scales = db.configs["test_scales"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] categories = db.configs["categories"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] if True: top_bboxes = {} for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"): db_ind = db_inds[ind] image_id = db.image_ids(db_ind) image_file = db.image_file(db_ind) image = cv2.imread(image_file) image_xy = np.zeros((image.shape[0], image.shape[1], 2), dtype=np.float32) x_mark = np.arange(image.shape[1], dtype=np.float32) / image.shape[1] for i in range(image.shape[0]): image_xy[i, :, 0] = x_mark y_mark = np.arange(image.shape[0], dtype=np.float32) / image.shape[0] for i in range(image.shape[1]): image_xy[:, i, 1] = y_mark height, width = image.shape[0:2] detections = [] for scale in scales: new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, 5, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image_xy = cv2.resize(image_xy, (new_width, new_height)) resized_image, border, offset = crop_image( resized_image, new_center, [inp_height, inp_width]) resized_image_xy, border, offset = crop_image( resized_image_xy, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) images[0, 0:3] = resized_image.transpose((2, 0, 1)) images[0, 3:5] = resized_image_xy.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] images = torch.from_numpy(images) dets, dets_tl, dets_br, flag = decode_func( nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel) if not flag: print("error when try to test %s" % image_file) continue dets = dets.reshape(1, -1, 8) _rescale_dets(dets, ratios, borders, sizes) dets[:, :, 0:4] /= scale detections.append(dets) if len(detections) == 0: continue detections = np.concatenate(detections, axis=1) classes = detections[..., -1] classes = classes[0] detections = detections[0] # reject detections with negative scores keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] top_bboxes[image_id] = {} for j in range(categories): keep_inds = (classes == j) top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype( np.float32) if merge_bbox: nms.soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: nms.soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm) top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5] scores = np.hstack([ top_bboxes[image_id][j][:, -1] for j in range(1, categories + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh) top_bboxes[image_id][j] = top_bboxes[image_id][j][ keep_inds] if debug: image_file = db.image_file(db_ind) image = cv2.imread(image_file) bboxes = {} for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] > 0.5) cat_name = db.class_name(j) cat_size = cv2.getTextSize(cat_name, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0] color = np.random.random((3, )) * 0.6 + 0.4 color = color * 255 color = color.astype(np.int32).tolist() for bbox in top_bboxes[image_id][j][keep_inds]: bbox = bbox[0:4].astype(np.int32) if bbox[1] - cat_size[1] - 2 < 0: cv2.rectangle(image, (bbox[0], bbox[1] + 2), (bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2), color, -1) cv2.putText(image, cat_name, (bbox[0], bbox[1] + cat_size[1] + 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1) else: cv2.rectangle(image, (bbox[0], bbox[1] - cat_size[1] - 2), (bbox[0] + cat_size[0], bbox[1] - 2), color, -1) cv2.putText(image, cat_name, (bbox[0], bbox[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1) cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) debug_file = os.path.join(debug_dir, "{}.jpg".format(db_ind)) detections = db.convert_to_coco(top_bboxes) with open(result_json, "w") as f: json.dump(detections, f) image_ids = [db.image_ids(ind) for ind in db_inds] with open(result_json, "r") as f: result_json = json.load(f) for cls_type in range(1, categories + 1): db.evaluate(result_json, [cls_type], image_ids) return 0
def kp_detection(self, image, db, result_dir, debug=False): K = db.configs["top_k"] ae_threshold = db.configs["ae_threshold"] nms_kernel = db.configs["nms_kernel"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] categories = db.configs["categories"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] top_bboxes = {} if True: #db_ind = db_inds[ind] image_id = 0 height, width = image.shape[0:2] detections = [] center_points = [] if True: scale = 1 new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image( resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) dets, center = self.kp_decode(images, K, ae_threshold=ae_threshold, kernel=nms_kernel) dets = dets.reshape(2, -1, 8) center = center.reshape(2, -1, 4) dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] center[1, :, [0]] = out_width - center[1, :, [0]] dets = dets.reshape(1, -1, 8) center = center.reshape(1, -1, 4) self._rescale_dets(dets, ratios, borders, sizes) center[..., [0]] /= ratios[:, 1][:, None, None] center[..., [1]] /= ratios[:, 0][:, None, None] center[..., [0]] -= borders[:, 2][:, None, None] center[..., [1]] -= borders[:, 0][:, None, None] np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]]) np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]]) dets[:, :, 0:4] /= scale center[:, :, 0:2] /= scale if scale == 1: center_points.append(center) detections.append(dets) detections = np.concatenate(detections, axis=1) center_points = np.concatenate(center_points, axis=1) classes = detections[..., -1] classes = classes[0] detections = detections[0] center_points = center_points[0] valid_ind = detections[:, 4] > -1 valid_detections = detections[valid_ind] box_width = valid_detections[:, 2] - valid_detections[:, 0] box_height = valid_detections[:, 3] - valid_detections[:, 1] s_ind = (box_width * box_height <= 22500) l_ind = (box_width * box_height > 22500) s_detections = valid_detections[s_ind] l_detections = valid_detections[l_ind] s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3 s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3 s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3 s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3 s_temp_score = copy.copy(s_detections[:, 4]) s_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] s_left_x = s_left_x[np.newaxis, :] s_right_x = s_right_x[np.newaxis, :] s_top_y = s_top_y[np.newaxis, :] s_bottom_y = s_bottom_y[np.newaxis, :] ind_lx = (center_x - s_left_x) > 0 ind_rx = (center_x - s_right_x) < 0 ind_ty = (center_y - s_top_y) > 0 ind_by = (center_y - s_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0 ind_s_new_score = np.max( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_s_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score], axis=0) s_detections[:, 4][ind_s_new_score] = ( s_temp_score[ind_s_new_score] * 2 + center_points[index_s_new_score, 3]) / 3 l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5 l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5 l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5 l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5 l_temp_score = copy.copy(l_detections[:, 4]) l_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] l_left_x = l_left_x[np.newaxis, :] l_right_x = l_right_x[np.newaxis, :] l_top_y = l_top_y[np.newaxis, :] l_bottom_y = l_bottom_y[np.newaxis, :] ind_lx = (center_x - l_left_x) > 0 ind_rx = (center_x - l_right_x) < 0 ind_ty = (center_y - l_top_y) > 0 ind_by = (center_y - l_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0 ind_l_new_score = np.max( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_l_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0) l_detections[:, 4][ind_l_new_score] = ( l_temp_score[ind_l_new_score] * 2 + center_points[index_l_new_score, 3]) / 3 detections = np.concatenate([l_detections, s_detections], axis=0) detections = detections[np.argsort(-detections[:, 4])] classes = detections[..., -1] keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] top_bboxes[image_id] = {} for j in range(categories): keep_inds = (classes == j) top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype( np.float32) if merge_bbox: soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm) top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5] scores = np.hstack([ top_bboxes[image_id][j][:, -1] for j in range(1, categories + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh) top_bboxes[image_id][j] = top_bboxes[image_id][j][ keep_inds] return top_bboxes[image_id] return 0
def inference(db, nnet, image, decode_func=kp_decode): K = db.configs["top_k"] ae_threshold = db.configs["ae_threshold"] nms_kernel = db.configs["nms_kernel"] scales = db.configs["test_scales"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] categories = db.configs["categories"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] height, width = image.shape[0:2] detections, center_points = [], [] for scale in scales: new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) dets, center = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel) dets = dets.reshape(2, -1, 8) # bboxes, scores, tl_scores, br_scores, clses center = center.reshape(2, -1, 4) # ct_xs, ct_ys, ct_clses, ct_scores dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] # flip center[1, :, [0]] = out_width - center[1, :, [0]] # horizontal flip dets = dets.reshape(1, -1, 8) center = center.reshape(1, -1, 4) _rescale_dets(dets, ratios, borders, sizes) center[..., [0]] /= ratios[:, 1][:, None, None] # remap to origin image center[..., [1]] /= ratios[:, 0][:, None, None] center[..., [0]] -= borders[:, 2][:, None, None] center[..., [1]] -= borders[:, 0][:, None, None] np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]]) np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]]) dets[:, :, 0:4] /= scale center[:, :, 0:2] /= scale # remap to origin image if scale == 1: center_points.append(center) detections.append(dets) detections = np.concatenate(detections, axis=1) center_points = np.concatenate(center_points, axis=1) classes = detections[..., -1] classes = classes[0] detections = detections[0] center_points = center_points[0] valid_ind = detections[:, 4] > -1 valid_detections = detections[valid_ind] box_width = valid_detections[:, 2] - valid_detections[:, 0] box_height = valid_detections[:, 3] - valid_detections[:, 1] s_ind = (box_width * box_height <= 22500) l_ind = (box_width * box_height > 22500) s_detections = valid_detections[s_ind] l_detections = valid_detections[l_ind] # trisection s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3 # x + (y-x)/3 s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3 # x +2(y-x)/3 s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3 s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3 s_temp_score = copy.copy(s_detections[:, 4]) s_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] s_left_x = s_left_x[np.newaxis, :] s_right_x = s_right_x[np.newaxis, :] s_top_y = s_top_y[np.newaxis, :] s_bottom_y = s_bottom_y[np.newaxis, :] # located in center region ind_lx = (center_x - s_left_x) > 0 ind_rx = (center_x - s_right_x) < 0 ind_ty = (center_y - s_top_y) > 0 ind_by = (center_y - s_bottom_y) < 0 # same classes ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0 ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_s_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score], axis=0) # select the box having center located in the center region s_detections[:, 4][ind_s_new_score] = ( s_temp_score[ind_s_new_score] * 2 + center_points[index_s_new_score, 3]) / 3 l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5 l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5 l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5 l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5 l_temp_score = copy.copy(l_detections[:, 4]) l_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] l_left_x = l_left_x[np.newaxis, :] l_right_x = l_right_x[np.newaxis, :] l_top_y = l_top_y[np.newaxis, :] l_bottom_y = l_bottom_y[np.newaxis, :] ind_lx = (center_x - l_left_x) > 0 ind_rx = (center_x - l_right_x) < 0 ind_ty = (center_y - l_top_y) > 0 ind_by = (center_y - l_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0 ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_l_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0) l_detections[:, 4][ind_l_new_score] = ( l_temp_score[ind_l_new_score] * 2 + center_points[index_l_new_score, 3]) / 3 detections = np.concatenate([l_detections, s_detections], axis=0) detections = detections[np.argsort( -detections[:, 4])] # resort according to new scores classes = detections[..., -1] # reject detections with negative scores keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] # soft_nms top_bboxes = {} for j in range(categories): keep_inds = (classes == j) top_bboxes[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32) if merge_bbox: soft_nms_merge(top_bboxes[j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: soft_nms(top_bboxes[j + 1], Nt=nms_threshold, method=nms_algorithm) top_bboxes[j + 1] = top_bboxes[j + 1][:, 0:5] scores = np.hstack( [top_bboxes[j][:, -1] for j in range(1, categories + 1)]) # select boxes if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (top_bboxes[j][:, -1] >= thresh) top_bboxes[j] = top_bboxes[j][keep_inds] return top_bboxes
def apply_detection(image, nnet, scales, decode_func, top_k, avg, std, categories, merge_bbox, max_per_image=100, ae_threshold=0.5, nms_kernel=3, nms_algorithm=2, nms_threshold=0.45, weight_exp=1): height, width = image.shape[0:2] detections = [] center_points = [] for scale in scales: new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) # N | M = M if N <= M else (N%M)*M+1 inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, avg, std) images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) # do detection dets, center = decode_func(nnet, images, top_k, ae_threshold=ae_threshold, kernel=nms_kernel) # post processing dets = dets.reshape(2, -1, 8) center = center.reshape(2, -1, 4) dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] center[1, :, [0]] = out_width - center[1, :, [0]] dets = dets.reshape(1, -1, 8) center = center.reshape(1, -1, 4) _rescale_dets(dets, ratios, borders, sizes) center[..., [0]] /= ratios[:, 1][:, None, None] center[..., [1]] /= ratios[:, 0][:, None, None] center[..., [0]] -= borders[:, 2][:, None, None] center[..., [1]] -= borders[:, 0][:, None, None] np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]]) np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]]) dets[:, :, 0:4] /= scale center[:, :, 0:2] /= scale if scale == 1: center_points.append(center) detections.append(dets) detections = np.concatenate(detections, axis=1) center_points = np.concatenate(center_points, axis=1) classes = detections[..., -1] classes = classes[0] detections = detections[0] center_points = center_points[0] valid_ind = detections[:, 4] > -1 valid_detections = detections[valid_ind] box_width = valid_detections[:, 2] - valid_detections[:, 0] box_height = valid_detections[:, 3] - valid_detections[:, 1] s_ind = (box_width * box_height <= 22500) l_ind = (box_width * box_height > 22500) s_detections = valid_detections[s_ind] l_detections = valid_detections[l_ind] s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3 s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3 s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3 s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3 s_temp_score = copy.copy(s_detections[:, 4]) s_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] s_left_x = s_left_x[np.newaxis, :] s_right_x = s_right_x[np.newaxis, :] s_top_y = s_top_y[np.newaxis, :] s_bottom_y = s_bottom_y[np.newaxis, :] ind_lx = (center_x - s_left_x) > 0 ind_rx = (center_x - s_right_x) < 0 ind_ty = (center_y - s_top_y) > 0 ind_by = (center_y - s_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0 ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_s_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score], axis=0) s_detections[:, 4][ind_s_new_score] = ( s_temp_score[ind_s_new_score] * 2 + center_points[index_s_new_score, 3]) / 3 l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5 l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5 l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5 l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5 l_temp_score = copy.copy(l_detections[:, 4]) l_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] l_left_x = l_left_x[np.newaxis, :] l_right_x = l_right_x[np.newaxis, :] l_top_y = l_top_y[np.newaxis, :] l_bottom_y = l_bottom_y[np.newaxis, :] ind_lx = (center_x - l_left_x) > 0 ind_rx = (center_x - l_right_x) < 0 ind_ty = (center_y - l_top_y) > 0 ind_by = (center_y - l_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0 ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_l_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0) l_detections[:, 4][ind_l_new_score] = ( l_temp_score[ind_l_new_score] * 2 + center_points[index_l_new_score, 3]) / 3 detections = np.concatenate([l_detections, s_detections], axis=0) detections = detections[np.argsort(-detections[:, 4])] classes = detections[..., -1] #for i in range(detections.shape[0]): # box_width = detections[i,2]-detections[i,0] # box_height = detections[i,3]-detections[i,1] # if box_width*box_height<=22500 and detections[i,4]!=-1: # left_x = (2*detections[i,0]+1*detections[i,2])/3 # right_x = (1*detections[i,0]+2*detections[i,2])/3 # top_y = (2*detections[i,1]+1*detections[i,3])/3 # bottom_y = (1*detections[i,1]+2*detections[i,3])/3 # temp_score = copy.copy(detections[i,4]) # detections[i,4] = -1 # for j in range(center_points.shape[0]): # if (classes[i] == center_points[j,2])and \ # (center_points[j,0]>left_x and center_points[j,0]< right_x) and \ # ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)): # detections[i,4] = (temp_score*2 + center_points[j,3])/3 # break # elif box_width*box_height > 22500 and detections[i,4]!=-1: # left_x = (3*detections[i,0]+2*detections[i,2])/5 # right_x = (2*detections[i,0]+3*detections[i,2])/5 # top_y = (3*detections[i,1]+2*detections[i,3])/5 # bottom_y = (2*detections[i,1]+3*detections[i,3])/5 # temp_score = copy.copy(detections[i,4]) # detections[i,4] = -1 # for j in range(center_points.shape[0]): # if (classes[i] == center_points[j,2])and \ # (center_points[j,0]>left_x and center_points[j,0]< right_x) and \ # ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)): # detections[i,4] = (temp_score*2 + center_points[j,3])/3 # break # reject detections with negative scores keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] final_bboxes = {} for j in range(categories): keep_inds = (classes == j) final_bboxes[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32) if merge_bbox: soft_nms_merge(final_bboxes[j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: soft_nms(final_bboxes[j + 1], Nt=nms_threshold, method=nms_algorithm) final_bboxes[j + 1] = final_bboxes[j + 1][:, 0:5] scores = np.hstack( [final_bboxes[j][:, -1] for j in range(1, categories + 1)]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (final_bboxes[j][:, -1] >= thresh) final_bboxes[j] = final_bboxes[j][keep_inds] return final_bboxes
def test_MatrixNetCorners(db, nnet, result_dir, debug=False, decode_func=kp_decode): debug_dir = os.path.join(result_dir, "debug") if not os.path.exists(debug_dir): os.makedirs(debug_dir) if db.split != "trainval": db_inds = db.db_inds[:200] if debug else db.db_inds else: db_inds = db.db_inds[:100] if debug else db.db_inds[:100] num_images = db_inds.size K = db.configs["top_k"] matching_threshold = db.configs["matching_threshold"] nms_kernel = db.configs["nms_kernel"] flag_flip_images=db.configs["test_flip_images"] max_dim = db.configs["test_image_max_dim"] scales = db.configs["test_scales"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] categories = db.configs["categories"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] layers_range = db.configs["layers_range"] input_size = db.configs["input_size"] output_kernel_size = db.configs["output_kernel_size"] _dict={} output_sizes=[] for i,l in enumerate(layers_range): for j,e in enumerate(l): if e !=-1: output_sizes.append([input_size[0]//(8*2**(j)), input_size[1]//(8*2**(i))]) _dict[(i+1)*10+(j+1)]=e layers_range=[_dict[i] for i in sorted(_dict)] layers_range = [[lr[0] * os[0]/input_size[0], lr[1] * os[0]/input_size[0], lr[2] * os[1]/input_size[1], lr[3] * os[1]/input_size[1]] for (lr, os) in zip (layers_range, output_sizes)] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] top_bboxes = {} for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"): db_ind = db_inds[ind] image_id = db.image_ids(db_ind) image_file = db.image_file(db_ind) image = cv2.imread(image_file) height, width = image.shape[0:2] detections = [] for scale in scales: org_scale = scale scale = scale * min((max_dim)/float(height), (max_dim)/float(width)) new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) inp_height = ((new_height // 128) + 1) * 128 inp_width = ((new_width // 128) + 1) * 128 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) out_height, out_width = ((inp_height) // 8, (inp_width) // 8) height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] if flag_flip_images: images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) dets = decode_func(nnet, images, K, matching_threshold=matching_threshold, kernel=nms_kernel, layers_range=layers_range, output_kernel_size = output_kernel_size, output_sizes=output_sizes,input_size=input_size) if flag_flip_images: dets = dets.reshape(2, -1, 8) dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] dets = dets.reshape(1, -1, 8) _rescale_dets(dets, ratios, borders, sizes) dets[:, :, 0:4] /= scale detections.append(dets) detections = np.concatenate(detections, axis=1) classes = detections[..., -1] classes = classes[0] detections = detections[0] # reject detections with negative scores keep_inds = (detections[:, 4] > 0) detections = detections[keep_inds] classes = classes[keep_inds] top_bboxes[image_id] = {} for j in range(categories): keep_inds = (classes == j) top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype(np.float32) if merge_bbox: soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm) top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5] scores = np.hstack([ top_bboxes[image_id][j][:, -1] for j in range(1, categories + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh) top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds] if debug: image_file = db.image_file(db_ind) image = cv2.imread(image_file) bboxes = {} for j in range(categories, 0, -1): keep_inds = (top_bboxes[image_id][j][:, -1] > 0.2) cat_name = db.class_name(j) cat_size = cv2.getTextSize(cat_name, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0] color = np.random.random((3, )) * 0.6 + 0.4 color = color * 255 color = color.astype(np.int32).tolist() for bbox in top_bboxes[image_id][j][keep_inds]: bbox = bbox[0:4].astype(np.int32) if bbox[1] - cat_size[1] - 2 < 0: cv2.rectangle(image, (bbox[0], bbox[1] + 2), (bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2), color, -1 ) cv2.putText(image, cat_name, (bbox[0], bbox[1] + cat_size[1] + 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1 ) else: cv2.rectangle(image, (bbox[0], bbox[1] - cat_size[1] - 2), (bbox[0] + cat_size[0], bbox[1] - 2), color, -1 ) cv2.putText(image, cat_name, (bbox[0], bbox[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1 ) cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2 ) debug_file = os.path.join(debug_dir, "{}.jpg".format(db_ind)) print(debug_file) cv2.imwrite(debug_file,image) result_json = os.path.join(result_dir, "results.json") detections = db.convert_to_coco(top_bboxes) with open(result_json, "w") as f: json.dump(detections, f) cls_ids = list(range(1, categories + 1)) image_ids = [db.image_ids(ind) for ind in db_inds] db.evaluate(result_json, cls_ids, image_ids) return 0
classes = classes[0] detections = detections[0] # reject detections with negative scores keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] top_bboxes[image_id] = {} for j in range(categories): keep_inds = (classes == j) top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype( np.float32) if merge_bbox: soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm) top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5] scores = np.hstack( [top_bboxes[image_id][j][:, -1] for j in range(1, categories + 1)]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh) top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds]
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode): debug_dir = os.path.join(result_dir, "debug") if not os.path.exists(debug_dir): os.makedirs(debug_dir) partial_num = 3000 db_inds = db.db_inds[:partial_num] if debug else db.db_inds K = db.configs["top_k"] ae_threshold = db.configs["ae_threshold"] nms_kernel = db.configs["nms_kernel"] scales = db.configs["test_scales"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] seq_length = db.configs["max_query_len"] bert_model = db.configs["bert_model"] textdim = 768 if bert_model == 'bert-base-uncased' else 1024 top_bboxes = {} best_bboxes = {} for ind in tqdm(range(db_inds.size), ncols=80, desc="locating kps"): db_ind = db_inds[ind] image_file = db.images[db_ind][0] image, bert_feature, gt_detections, phrase = db.detections_with_phrase( db_ind) height, width = image.shape[0:2] detections = [] center_points = [] tl_hms = [] br_hms = [] ct_hms = [] for scale in scales: new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) bert_features = np.zeros((1, textdim), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) bert_features[0] = bert_feature out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] # Flip to perform detection twice images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) bert_features = np.concatenate((bert_features, bert_features), axis=0) images = torch.from_numpy(images) bert_features = torch.from_numpy(bert_features) dets, center, heatmaps = decode_func(nnet, [images, bert_features], K, ae_threshold=ae_threshold, kernel=nms_kernel) dets = dets.reshape(2, -1, 8) center = center.reshape(2, -1, 4) dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] center[1, :, [0]] = out_width - center[1, :, [0]] dets = dets.reshape(1, -1, 8) center = center.reshape(1, -1, 4) tl_hm, br_hm, ct_hm = heatmaps _rescale_dets(dets, ratios, borders, sizes) center[..., [0]] /= ratios[:, 1][:, None, None] center[..., [1]] /= ratios[:, 0][:, None, None] center[..., [0]] -= borders[:, 2][:, None, None] center[..., [1]] -= borders[:, 0][:, None, None] np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]]) np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]]) dets[:, :, 0:4] /= scale center[:, :, 0:2] /= scale if scale == 1: center_points.append(center) tl_hms.append(tl_hm) br_hms.append(br_hm) ct_hms.append(ct_hm) detections.append(dets) detections = np.concatenate(detections, axis=1) center_points = np.concatenate(center_points, axis=1) tl_hms = np.concatenate(tl_hms, axis=1) br_hms = np.concatenate(br_hms, axis=1) ct_hms = np.concatenate(ct_hms, axis=1) classes = detections[..., -1] classes = classes[0] detections = detections[0] center_points = center_points[0] tl_hms = tl_hms[0] br_hms = br_hms[0] ct_hms = ct_hms[0] valid_ind = detections[:, 4] > -1 valid_detections = detections[valid_ind] box_width = valid_detections[:, 2] - valid_detections[:, 0] box_height = valid_detections[:, 3] - valid_detections[:, 1] s_ind = (box_width * box_height <= 22500) l_ind = (box_width * box_height > 22500) s_detections = valid_detections[s_ind] l_detections = valid_detections[l_ind] s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3 s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3 s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3 s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3 s_temp_score = copy.copy(s_detections[:, 4]) s_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] s_left_x = s_left_x[np.newaxis, :] s_right_x = s_right_x[np.newaxis, :] s_top_y = s_top_y[np.newaxis, :] s_bottom_y = s_bottom_y[np.newaxis, :] ind_lx = (center_x - s_left_x) > 0 ind_rx = (center_x - s_right_x) < 0 ind_ty = (center_y - s_top_y) > 0 ind_by = (center_y - s_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0 ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_s_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score], axis=0) s_detections[:, 4][ind_s_new_score] = ( s_temp_score[ind_s_new_score] * 2 + center_points[index_s_new_score, 3]) / 3 l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5 l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5 l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5 l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5 l_temp_score = copy.copy(l_detections[:, 4]) l_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] l_left_x = l_left_x[np.newaxis, :] l_right_x = l_right_x[np.newaxis, :] l_top_y = l_top_y[np.newaxis, :] l_bottom_y = l_bottom_y[np.newaxis, :] ind_lx = (center_x - l_left_x) > 0 ind_rx = (center_x - l_right_x) < 0 ind_ty = (center_y - l_top_y) > 0 ind_by = (center_y - l_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0 ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_l_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0) l_detections[:, 4][ind_l_new_score] = ( l_temp_score[ind_l_new_score] * 2 + center_points[index_l_new_score, 3]) / 3 detections = np.concatenate([l_detections, s_detections], axis=0) detections = detections[np.argsort(-detections[:, 4])] classes = detections[..., -1] # reject detections with negative scores keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] top_bboxes[db_ind] = {} top_bboxes[db_ind] = detections[:, 0:7].astype(np.float32) if merge_bbox: soft_nms_merge(top_bboxes[db_ind], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: soft_nms(top_bboxes[db_ind], Nt=nms_threshold, method=nms_algorithm) top_bboxes[db_ind] = top_bboxes[db_ind][:, 0:5] scores = top_bboxes[db_ind][:, -1] if scores is not None and len(scores) > 0: best_bboxes[db_ind] = top_bboxes[db_ind][np.argmax(scores)] else: best_bboxes[db_ind] = None if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] keep_inds = (top_bboxes[db_ind][:, -1] >= thresh) top_bboxes[db_ind] = top_bboxes[db_ind][keep_inds] if debug: image_file = db.image_file(db_ind) image = cv2.imread(image_file) im = image[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(28, 12)) ax = plt.subplot(152) fig = ax.imshow(im, aspect='equal') plt.axis('off') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) if best_bboxes[db_ind] is not None: bbox = best_bboxes[db_ind].astype(np.int32) xmin = bbox[0] ymin = bbox[1] xmax = bbox[2] ymax = bbox[3] ax.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor='red', linewidth=5.0)) ax.text(xmin + 1, ymin - 3, 'prediction', bbox=dict(facecolor='red', ec='black', lw=2, alpha=0.5), fontsize=15, color='white', weight='bold') ax = plt.subplot(151) fig = ax.imshow(im, aspect='equal') plt.axis('off') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) bbox = gt_detections[0].astype(np.int32) xmin = bbox[0] ymin = bbox[1] xmax = bbox[2] ymax = bbox[3] ax.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor='red', linewidth=5.0)) ax.text(xmin + 1, ymin - 3, phrase, bbox=dict(facecolor='red', ec='black', lw=2, alpha=0.5), fontsize=15, color='white', weight='bold') ax = plt.subplot(153) ax.imshow(tl_hms[0], cmap='jet') plt.axis('off') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) ax = plt.subplot(154) ax.imshow(br_hms[0], cmap='jet') plt.axis('off') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) ax = plt.subplot(155) ax.imshow(ct_hms[0], cmap='jet') plt.axis('off') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) # debug_file1 = os.path.join(debug_dir, "{}.pdf".format(db_ind)) debug_file2 = os.path.join(debug_dir, "{}.jpg".format(db_ind)) # plt.savefig(debug_file1) plt.savefig(debug_file2) plt.close() result_json = os.path.join(result_dir, "results.json") detections = db.convert_to_json(top_bboxes) with open(result_json, "w") as f: json.dump(detections, f) db.evaluate(best_bboxes) return 0
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode): debug_dir = os.path.join(result_dir, "debug") NT = 20 # NT:测试图片的数量 if not os.path.exists(debug_dir): os.makedirs(debug_dir) # 创建目录 if db.split != "trainval": db_inds = db.db_inds[:NT] if debug else db.db_inds # 如果不是debug模式,则将数据集中的每张图片进行检测 else: db_inds = db.db_inds[:NT] if debug else db.db_inds[:5000] # debug模式,则只选NT张图片 num_images = db_inds.size # 检测图片的个数 K = db.configs["top_k"] # 每张图片保留的检测结果 ae_threshold = db.configs["ae_threshold"] # IoU大小 nms_kernel = db.configs["nms_kernel"] scales = db.configs["test_scales"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] categories = db.configs["categories"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] top_bboxes = {} # 用来记录top-k的检测框 for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"): # 获取图片 db_ind = db_inds[ind] image_id = db.image_ids(db_ind) image_file = db.image_file(db_ind) image = cv2.imread(image_file) height, width = image.shape[0:2] # 记录检测结果以及中心点 detections = [] center_points = [] for scale in scales: # 当前尺度下图片的一系列处理 new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) inp_height = new_height | 127 # 防止超边框 inp_width = new_width | 127 # 防止超边框 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) # 检测函数 dets, center = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel) dets = dets.reshape(2, -1, 8) center = center.reshape(2, -1, 4) dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] center[1, :, [0]] = out_width - center[1, :, [0]] dets = dets.reshape(1, -1, 8) center = center.reshape(1, -1, 4) _rescale_dets(dets, ratios, borders, sizes) center[..., [0]] /= ratios[:, 1][:, None, None] center[..., [1]] /= ratios[:, 0][:, None, None] center[..., [0]] -= borders[:, 2][:, None, None] center[..., [1]] -= borders[:, 0][:, None, None] np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]]) np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]]) dets[:, :, 0:4] /= scale center[:, :, 0:2] /= scale if scale == 1: center_points.append(center) # 只记录选图大小的中心点 detections.append(dets) # 检测结果 # 对当前图片的检测结果进行整理 detections = np.concatenate(detections, axis=1) center_points = np.concatenate(center_points, axis=1) classes = detections[..., -1] # 检测类别信息 classes = classes[0] # 类别 detections = detections[0] center_points = center_points[0] valid_ind = detections[:, 4] > -1 valid_detections = detections[valid_ind] box_width = valid_detections[:, 2] - valid_detections[:, 0] box_height = valid_detections[:, 3] - valid_detections[:, 1] s_ind = (box_width * box_height <= 22500) l_ind = (box_width * box_height > 22500) s_detections = valid_detections[s_ind] l_detections = valid_detections[l_ind] s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3 s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3 s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3 s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3 s_temp_score = copy.copy(s_detections[:, 4]) # 每个bbx对应的Score s_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] s_left_x = s_left_x[np.newaxis, :] s_right_x = s_right_x[np.newaxis, :] s_top_y = s_top_y[np.newaxis, :] s_bottom_y = s_bottom_y[np.newaxis, :] ind_lx = (center_x - s_left_x) > 0 ind_rx = (center_x - s_right_x) < 0 ind_ty = (center_y - s_top_y) > 0 ind_by = (center_y - s_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0 ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_s_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score], axis=0) s_detections[:, 4][ind_s_new_score] = (s_temp_score[ind_s_new_score] * 2 + center_points[ index_s_new_score, 3]) / 3 l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5 l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5 l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5 l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5 l_temp_score = copy.copy(l_detections[:, 4]) l_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] l_left_x = l_left_x[np.newaxis, :] l_right_x = l_right_x[np.newaxis, :] l_top_y = l_top_y[np.newaxis, :] l_bottom_y = l_bottom_y[np.newaxis, :] ind_lx = (center_x - l_left_x) > 0 ind_rx = (center_x - l_right_x) < 0 ind_ty = (center_y - l_top_y) > 0 ind_by = (center_y - l_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0 ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_l_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0) l_detections[:, 4][ind_l_new_score] = (l_temp_score[ind_l_new_score] * 2 + center_points[ index_l_new_score, 3]) / 3 detections = np.concatenate([l_detections, s_detections], axis=0) detections = detections[np.argsort(-detections[:, 4])] classes = detections[..., -1] keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] # NMS处理 top_bboxes[image_id] = {} for j in range(categories): keep_inds = (classes == j) top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype(np.float32) if merge_bbox: soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm) top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5] scores = np.hstack([ top_bboxes[image_id][j][:, -1] for j in range(1, categories + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh) top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds] # debug模式 if debug: image_file = db.image_file(db_ind) _, filename0 = os.path.split(image_file) # 分离出文件名 img_name0, _ = os.path.splitext(filename0) # 去掉后缀的文件 FileTXT = open(debug_dir + "/" + img_name0 + ".txt", mode="a") # 文件流,用来记录检测框位置 image = cv2.imread(image_file) im = image[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) fig = ax.imshow(im, aspect='equal') plt.axis('off') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) # bboxes = {} for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] >= 0.4) cat_name = db.class_name(j) for bbox in top_bboxes[image_id][j][keep_inds]: bbox = bbox[0:4].astype(np.int32) xmin = bbox[0] ymin = bbox[1] xmax = bbox[2] ymax = bbox[3] FileTXT.write(str(1) + ' ' + str(int(xmin)) + ' ' + str(int(ymin)) + ' ' + str(int(xmax)) + ' ' + str(int(ymax)) + ' ' + str(1)) FileTXT.write('\n') # bbx位置大小信息 # 画框 ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor=colours[j - 1], linewidth=4.0)) ax.text(xmin + 1, ymin - 3, '{:s}'.format(cat_name), bbox=dict(facecolor=colours[j - 1], ec='black', lw=2, alpha=0.5), fontsize=15, color='white', weight='bold') # debug_file1 = os.path.join(debug_dir, "{}.pdf".format(db_ind)) # 用来生成pdf图片 debug_file2 = os.path.join(debug_dir, "{}.jpg".format(img_name0)) # jpg格式 # plt.savefig(debug_file1) plt.savefig(debug_file2) # 保存图片 plt.close() result_json = os.path.join(result_dir, "results.json") # 用json脚本存储检测结果 detections = db.convert_to_coco(top_bboxes) with open(result_json, "w") as f: json.dump(detections, f) cls_ids = list(range(1, categories + 1)) image_ids = [db.image_ids(ind) for ind in db_inds] db.evaluate(result_json, cls_ids, image_ids) # 验证 return 0
def kp_detection(db, nnet, result_dir, debug=True, decode_func=kp_decode): db_inds = db.db_inds[:10] if debug else db.db_inds num_images = db_inds.size K = db.configs["top_k"] ae_threshold = db.configs["ae_threshold"] nms_kernel = db.configs["nms_kernel"] scales = db.configs["test_scales"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] categories = db.configs["categories"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] top_bboxes = {} for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"): db_ind = db_inds[ind] image_id = db.image_ids(db_ind) image_file = db.image_file(db_ind) image = cv2.imread(image_file) # Paths result_path = result_dir + "/{}".format(image_id[:-4]) result_json = os.path.join(result_path, "results.json") result_debug = os.path.join(result_path, "{}.jpg".format(db_ind)) if pexists(result_json): continue # Create dirs Path(result_path).mkdir(parents=True, exist_ok=True) height, width = image.shape[0:2] detections = [] center_points = [] for scale in scales: new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) dets, center = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel) dets = dets.reshape(2, -1, 8) center = center.reshape(2, -1, 4) dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] center[1, :, [0]] = out_width - center[1, :, [0]] dets = dets.reshape(1, -1, 8) center = center.reshape(1, -1, 4) _rescale_dets(dets, ratios, borders, sizes) center[..., [0]] /= ratios[:, 1][:, None, None] center[..., [1]] /= ratios[:, 0][:, None, None] center[..., [0]] -= borders[:, 2][:, None, None] center[..., [1]] -= borders[:, 0][:, None, None] np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]]) np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]]) dets[:, :, 0:4] /= scale center[:, :, 0:2] /= scale if scale == 1: center_points.append(center) detections.append(dets) detections = np.concatenate(detections, axis=1) center_points = np.concatenate(center_points, axis=1) classes = detections[..., -1] classes = classes[0] detections = detections[0] center_points = center_points[0] valid_ind = detections[:, 4] > -1 valid_detections = detections[valid_ind] box_width = valid_detections[:, 2] - valid_detections[:, 0] box_height = valid_detections[:, 3] - valid_detections[:, 1] s_ind = (box_width * box_height <= 22500) l_ind = (box_width * box_height > 22500) s_detections = valid_detections[s_ind] l_detections = valid_detections[l_ind] s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3 s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3 s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3 s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3 s_temp_score = copy.copy(s_detections[:, 4]) s_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] s_left_x = s_left_x[np.newaxis, :] s_right_x = s_right_x[np.newaxis, :] s_top_y = s_top_y[np.newaxis, :] s_bottom_y = s_bottom_y[np.newaxis, :] ind_lx = (center_x - s_left_x) > 0 ind_rx = (center_x - s_right_x) < 0 ind_ty = (center_y - s_top_y) > 0 ind_by = (center_y - s_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0 ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_s_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score], axis=0) s_detections[:, 4][ind_s_new_score] = ( s_temp_score[ind_s_new_score] * 2 + center_points[index_s_new_score, 3]) / 3 l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5 l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5 l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5 l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5 l_temp_score = copy.copy(l_detections[:, 4]) l_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] l_left_x = l_left_x[np.newaxis, :] l_right_x = l_right_x[np.newaxis, :] l_top_y = l_top_y[np.newaxis, :] l_bottom_y = l_bottom_y[np.newaxis, :] ind_lx = (center_x - l_left_x) > 0 ind_rx = (center_x - l_right_x) < 0 ind_ty = (center_y - l_top_y) > 0 ind_by = (center_y - l_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0 ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_l_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0) l_detections[:, 4][ind_l_new_score] = ( l_temp_score[ind_l_new_score] * 2 + center_points[index_l_new_score, 3]) / 3 detections = np.concatenate([l_detections, s_detections], axis=0) detections = detections[np.argsort(-detections[:, 4])] classes = detections[..., -1] keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] top_bboxes[image_id] = {} for j in range(categories): keep_inds = (classes == j) top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype( np.float32) if merge_bbox: soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm) top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5] scores = np.hstack( [top_bboxes[image_id][j][:, -1] for j in range(1, categories + 1)]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh) top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds] detections = db.parse_detections(top_bboxes[image_id]) # if no valid detections if len(detections) == 0: # shutil.rmtree(Path(result_dir + "/{}".format(image_id[:-4]))) continue else: # Save JSON with open(result_json, "w") as f: json.dump(detections, f) # Save also images with labels if debug: # Get image image_file = db.image_file(db_ind) image = cv2.imread(image_file) im = image[:, :, (2, 1, 0)] # Create matplotlib fig fig, ax = plt.subplots(figsize=(12, 12)) fig = ax.imshow(im, aspect='equal') plt.axis('off') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) for x in detections: bbox = x["bbox"] # Get points from width and height bbox[2] += bbox[0] bbox[3] += bbox[1] xmin = bbox[0] ymin = bbox[1] xmax = bbox[2] ymax = bbox[3] ax.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor=colours[j - 1], linewidth=4.0)) ax.text(xmin + 1, ymin - 3, '{:s}'.format(x["category_id"]), bbox=dict(facecolor=colours[j - 1], ec='black', lw=2, alpha=0.5), fontsize=15, color='white', weight='bold') plt.savefig(result_debug) plt.close() return 0
def test(db, split, testiter, debug=False, suffix=None): result_dir = system_configs.result_dir result_dir = os.path.join(result_dir, str(testiter), split) class_name = [] for i in range(1, len(db._coco.cats)): # if db._coco.cats[i] is None: # continue # else: ind = db._cat_ids[i] class_name.append(db._coco.cats[ind]['name']) if suffix is not None: result_dir = os.path.join(result_dir, suffix) make_dirs([result_dir]) test_iter = system_configs.max_iter if testiter is None else testiter print("loading parameters at iteration: {}".format(test_iter)) print("building neural network...") nnet = NetworkFactory(db) print("loading parameters...") nnet.load_params(test_iter) # test_file = "test.{}".format(db.data) # testing = importlib.import_module(test_file).testing nnet.cuda() nnet.eval_mode() debug_dir = os.path.join(result_dir, "debug") if not os.path.exists(debug_dir): os.makedirs(debug_dir) if db.split != "trainval": db_inds = db.db_inds[:100] if debug else db.db_inds else: db_inds = db.db_inds[:100] if debug else db.db_inds[:5000] K = db.configs["top_k"] ae_threshold = db.configs["ae_threshold"] nms_kernel = db.configs["nms_kernel"] scales = db.configs["test_scales"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] categories = db.configs["categories"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] img_name = os.listdir(db._image_dir) for i in range(0, len(img_name)): top_bboxes = {} # for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"): db_ind = i + 1 # image_id = db.image_ids(db_ind) image_id = img_name[i] image_file = db._image_dir + '/' + img_name[i] image = cv2.imread(image_file) height, width = image.shape[0:2] detections = [] for scale in scales: new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) dets = kp_decode(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel) dets = dets.reshape(2, -1, 8) dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] dets = dets.reshape(1, -1, 8) _rescale_dets(dets, ratios, borders, sizes) dets[:, :, 0:4] /= scale detections.append(dets) detections = np.concatenate(detections, axis=1) classes = detections[..., -1] classes = classes[0] detections = detections[0] # reject detections with negative scores keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] top_bboxes[image_id] = {} for j in range(categories): keep_inds = (classes == j) top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype(np.float32) if merge_bbox: soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm) top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5] scores = np.hstack([ top_bboxes[image_id][j][:, -1] for j in range(1, categories + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh) top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds] # result_json = os.path.join(result_dir, "results.json") detections = db.convert_to_list(top_bboxes) print('demo for {}'.format(image_id)) img = cv2.imread(image_file) box = [] if detections is not None: for i in range(len(detections)): name = db._coco.cats[detections[i][1]]['name'] #db._coco.cats[ind]['name'] confi = detections[i][-1] if confi <0.3: continue for j in range(0, 4): box.append(detections[i][j + 2]) cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 255), 1) # cv2.putText(img, name[0] + ' ' + '{:.3f}'.format(confi), (int(box[0]), int(box[1] - 10)), # cv2.FONT_ITALIC, 1, (0, 0, 255), 1) while (box): box.pop(-1) cv2.imshow('Detecting image...', img) # timer.total_time = 0 if cv2.waitKey(3000) & 0xFF == ord('q'): break print(detections)
def post_process(db, debug, num_images, weight_exp, merge_bbox, categories, nms_threshold, max_per_image, nms_algorithm, det_queue, top_bboxes_queue): top_bboxes = {} for ind in range(0, num_images): det_bboxes = det_queue.get(block=True) detections = det_bboxes[0] classes = det_bboxes[1] image_id = det_bboxes[2] top_bboxes[image_id] = {} for j in range(categories): keep_inds = (classes == j) top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype(np.float32) if merge_bbox: soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm) top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5] scores = np.hstack([ top_bboxes[image_id][j][:, -1] for j in range(1, categories + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh) top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds] # if debug: # image_file = db.image_file(ind) # image = cv2.imread(image_file) # im = image[:, :, (2, 1, 0)] # fig, ax = plt.subplots(figsize=(12, 12)) # fig = ax.imshow(im, aspect='equal') # plt.axis('off') # fig.axes.get_xaxis().set_visible(False) # fig.axes.get_yaxis().set_visible(False) # #bboxes = {} # for j in range(1, categories + 1): # keep_inds = (top_bboxes[image_id][j][:, -1] >= 0) # cat_name = db.class_name(j) # for bbox in top_bboxes[image_id][j][keep_inds]: # bbox = bbox[0:4].astype(np.int32) # xmin = bbox[0] # ymin = bbox[1] # xmax = bbox[2] # ymax = bbox[3] # #if (xmax - xmin) * (ymax - ymin) > 5184: # ax.add_patch(plt.Rectangle((xmin, ymin),xmax - xmin, ymax - ymin, fill=False, edgecolor= colours[j-1], # linewidth=4.0)) # ax.text(xmin+1, ymin-3, '{:s}'.format(cat_name), bbox=dict(facecolor= colours[j-1], ec='black', lw=2,alpha=0.5), # fontsize=15, color='white', weight='bold') # #debug_file1 = os.path.join("validations/{}.pdf".format(db_ind)) # #debug_file2 = os.path.join("validations/{}.jpg".format(db_ind)) # #plt.savefig(debug_file1) # #plt.savefig(debug_file2) # plt.close() # #cv2.imwrite(debug_file, image, [int(cv2.IMWRITE_JPEG_QUALITY), 100]) top_bboxes_queue.put(top_bboxes)
def kp_detection_image(image, db: LV, nnet: NetworkFactory, debug=False, decode_func=kp_decode, db_ind=None, debug_dir=None): """对单张图做detection :param image: 使用cv2.imread读入的图 :param db: :param nnet: :param debug: :param decode_func: :param db_ind: :param debug_dir: :return: {[1-5] -> (该类中检测到的数目, 5)}, 分别为tl_xs, tl_ys, br_xs, br_ys, scores """ if debug and (db_ind is None or debug_dir is None): raise ValueError( "db_ind and debug_dir should be specified when debug is turned on") K = db.configs["top_k"] ae_threshold = db.configs["ae_threshold"] nms_kernel = db.configs["nms_kernel"] scales = db.configs["test_scales"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] categories = db.configs["categories"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] height, width = image.shape[0:2] detections = [] center_points = [] for scale in scales: new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) # 不懂为什么要做这个按位或 inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) # (inp_height + 1)、(inp_width + 1)肯定可以被4除尽 out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width # 先按照scale来resize resized_image = cv2.resize(image, (new_width, new_height)) # 然后使用scale后的image的中心点,与inp_height、inp_width进行crop # 由于inp_height、inp_width一定是比new_height、new_width大的,故这一步 # 实际上是在按照中心,扩大图片,并在周围补黑边。 resized_image, border, offset = crop_image( resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) # resized_image是(H, W, C),现在改成(C, H, W)以供pytorch使用 images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border # 这个size是有内容的图片大小,resized_image的大小为[inp_height, inp_width] sizes[0] = [int(height * scale), int(width * scale)] # 这个是out比上inp ratios[0] = [height_ratio, width_ratio] # 这个似乎是把原图和垂直翻折后的图片放在一起 images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) # dets: (batch, 2 * num_dets, 8) # center: (batch, 2 * K, 4) dets, center = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel) dets = dets.reshape(2, -1, 8) center = center.reshape(2, -1, 4) # 这两步是把垂直翻折后图片的检测结果,变换到原图上 dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] center[1, :, [0]] = out_width - center[1, :, [0]] dets = dets.reshape(1, -1, 8) # (1, 2 * num_dets, 8) center = center.reshape(1, -1, 4) # (1, 2 * K, 4) # 去除在原图中不合法的框 _rescale_dets(dets, ratios, borders, sizes) center[..., [0]] /= ratios[:, 1][:, None, None] center[..., [1]] /= ratios[:, 0][:, None, None] center[..., [0]] -= borders[:, 2][:, None, None] center[..., [1]] -= borders[:, 0][:, None, None] np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]]) np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]]) # 回复到原图中的坐标 dets[:, :, 0:4] /= scale center[:, :, 0:2] /= scale # center point只使用scale为1的时候 if scale == 1: center_points.append(center) detections.append(dets) # 把所有scale下检测出的统一合并起来 detections = np.concatenate(detections, axis=1) # (1, 2 * num_dets * len(scales), 8) center_points = np.concatenate(center_points, axis=1) # (1, 2 * K, 4) classes = detections[..., -1] classes = classes[0] # (2 * num_dets * len(scales),) detections = detections[0] # (2 * num_dets * len(scales), 8) center_points = center_points[0] # (2 * K, 4) # 获得所有的合法候选框 valid_ind = detections[:, 4] > -1 valid_detections = detections[valid_ind] # (合法候选框, 8) box_width = valid_detections[:, 2] - valid_detections[:, 0] # (合法候选框,) box_height = valid_detections[:, 3] - valid_detections[:, 1] # (合法候选框,) # 小候选框与大候选框 s_ind = (box_width * box_height <= 22500) l_ind = (box_width * box_height > 22500) s_detections = valid_detections[s_ind] # (小框, 8) l_detections = valid_detections[l_ind] # (大框, 8) # 小框:判断中心区域是否有中心点 # 只要中心区域有一个同类中心点即可,分数按最高的算 s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3 s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3 s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3 s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3 s_temp_score = copy.copy(s_detections[:, 4]) s_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] s_left_x = s_left_x[np.newaxis, :] s_right_x = s_right_x[np.newaxis, :] s_top_y = s_top_y[np.newaxis, :] s_bottom_y = s_bottom_y[np.newaxis, :] ind_lx = (center_x - s_left_x) > 0 ind_rx = (center_x - s_right_x) < 0 ind_ty = (center_y - s_top_y) > 0 ind_by = (center_y - s_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0 ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_s_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score], axis=0) s_detections[:, 4][ind_s_new_score] = \ (s_temp_score[ind_s_new_score] * 2 + center_points[index_s_new_score, 3]) / 3 # 大框:判断中心区域是否有中心点 l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5 l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5 l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5 l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5 l_temp_score = copy.copy(l_detections[:, 4]) l_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] l_left_x = l_left_x[np.newaxis, :] l_right_x = l_right_x[np.newaxis, :] l_top_y = l_top_y[np.newaxis, :] l_bottom_y = l_bottom_y[np.newaxis, :] ind_lx = (center_x - l_left_x) > 0 ind_rx = (center_x - l_right_x) < 0 ind_ty = (center_y - l_top_y) > 0 ind_by = (center_y - l_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][ np.newaxis, :]) == 0 ind_l_new_score = np.max( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_l_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0) l_detections[:, 4][ind_l_new_score] = \ (l_temp_score[ind_l_new_score] * 2 + center_points[index_l_new_score, 3]) / 3 # 合并大框小框的检测结果,并按照score排序 detections = np.concatenate([l_detections, s_detections], axis=0) detections = detections[np.argsort(-detections[:, 4])] classes = detections[..., -1] # reject detections with negative scores keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] ret = {} for j in range(categories): keep_inds = (classes == j) ret[j + 1] = detections[keep_inds][:, 0:7].astype( np.float32) if merge_bbox: soft_nms_merge(ret[j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: soft_nms(ret[j + 1], Nt=nms_threshold, method=nms_algorithm) ret[j + 1] = ret[j + 1][:, 0:5] scores = np.hstack([ ret[j][:, -1] for j in range(1, categories + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (ret[j][:, -1] >= thresh) ret[j] = ret[j][keep_inds] if debug: image_file = db.image_file(db_ind) image = cv2.imread(image_file) im = image[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) fig = ax.imshow(im, aspect='equal') plt.axis('off') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) # bboxes = {} for j in range(1, categories + 1): keep_inds = (ret[j][:, -1] >= 0.4) # 这边调整画图时接收的阈值 cat_name = db.class_name(j) for bbox in ret[j][keep_inds]: score = bbox[4] bbox = bbox[0:4].astype(np.int32) xmin = bbox[0] ymin = bbox[1] xmax = bbox[2] ymax = bbox[3] # if (xmax - xmin) * (ymax - ymin) > 5184: ax.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor=colours[j - 1], linewidth=4.0)) ax.text(xmin + 1, ymin - 3, '{} {:.3f}'.format(cat_name, score), bbox=dict(facecolor=colours[j - 1], ec='black', lw=2, alpha=0.5), fontsize=15, color='white', weight='bold') # debug_file1 = os.path.join(debug_dir, "{}.pdf".format(db_ind)) debug_file2 = os.path.join(debug_dir, "{}.jpg".format(db_ind)) # plt.savefig(debug_file1) plt.savefig(debug_file2, bbox_inches='tight', pad_inches=0) plt.close() # cv2.imwrite(debug_file, image, [int(cv2.IMWRITE_JPEG_QUALITY), 100]) # 同时保存gt图以供对比 db.display(db_ind, os.path.join(debug_dir, "{}_gt.jpg".format(db_ind)), show=False) return ret