def eval_with_plac(num_imgs, eval_dir, img_root, showbox, annotation_dir): # with open('/home/yjr/DataSet/VOC/VOC_test/VOC2007/ImageSets/Main/aeroplane_test.txt') as f: # all_lines = f.readlines() # test_imgname_list = [a_line.split()[0].strip() for a_line in all_lines] test_imgname_list = [ item for item in os.listdir(eval_dir) if item.endswith(('.jpg', 'jpeg', '.png', '.tif', '.tiff')) ] if num_imgs == np.inf: real_test_imgname_list = test_imgname_list else: real_test_imgname_list = test_imgname_list[:num_imgs] img_plac = tf.placeholder(dtype=tf.float32, shape=[None, None, 3]) img = img_plac - tf.constant([103.939, 116.779, 123.68]) img_batch = short_side_resize_for_inference_data(img, cfgs.SHORT_SIDE_LEN) h, w = img.shape[0], img.shape[1] gt_boxes_label = tf.placeholder(dtype=tf.float32, shape=[None, 5]) gt_boxes_label_batch = tf.expand_dims(gt_boxes_label, axis=0) image_height, image_width = tf.shape(img_batch)[1], tf.shape(img_batch)[2] _, share_net = get_network_byname(net_name=cfgs.NET_NAME, inputs=img_batch, num_classes=None, is_training=False, output_stride=None, global_pool=False, spatial_squeeze=False) feature_pyramid = build_fpn.build_feature_pyramid(share_net) rpn = build_rpn.RPN(feature_pyramid=feature_pyramid, image_height=image_height, image_width=image_width, gtboxes_and_label=gt_boxes_label_batch, is_training=False) rpn_proposals_boxes, rpn_proposals_scores = rpn.rpn_proposals( is_training=False) fast_rcnn = build_fast_rcnn.FAST_RCNN( feature_pyramid=feature_pyramid, rpn_proposals_boxes=rpn_proposals_boxes, gtboxes_and_label=gt_boxes_label_batch, origin_image=img_batch, is_training=True, image_height=image_height, image_width=image_width) detections = fast_rcnn.head_detection() detection_boxes, detection_category,detection_scores = tf.squeeze(detections[:,:,:4],axis=0),\ tf.squeeze(detections[:,:,4],axis=0),\ tf.squeeze(detections[:,:,5],axis=0) indices = tf.reshape( tf.where(tf.greater_equal(detection_scores, cfgs.FINAL_SCORE_THRESHOLD)), [-1]) detection_boxes = tf.gather(detection_boxes, indices) detection_scores = tf.gather(detection_scores, indices) detection_category = tf.gather(detection_category, indices) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = restore_model.get_restorer( test=True, checkpoint_path=cfgs.chekpoint_path) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') all_boxes = [] for i, a_img_name in enumerate(real_test_imgname_list): raw_img = cv2.imread(os.path.join(img_root, a_img_name)) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] start = time.time() resized_img, detected_boxes, detected_scores, detected_categories = \ sess.run( [img, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: raw_img} # cv is BGR. But need RGB ) print(a_img_name, detected_boxes, detected_scores, detected_categories) end = time.time() ymin, xmin, ymax, xmax = detected_boxes[:, 0], detected_boxes[:, 1], \ detected_boxes[:, 2], detected_boxes[:, 3] resized_h, resized_w = resized_img.shape[1], resized_img.shape[2] xmin = xmin * raw_w / resized_w xmax = xmax * raw_w / resized_w ymin = ymin * raw_h / resized_h ymax = ymax * raw_h / resized_h boxes = np.transpose(np.stack([xmin, ymin, xmax, ymax])) dets = np.hstack((detected_categories.reshape(-1, 1), detected_scores.reshape(-1, 1), boxes)) all_boxes.append(dets) view_bar('{} image cost {}s'.format(a_img_name, (end - start)), i + 1, len(real_test_imgname_list)) voc_evaluate_detections(all_boxes=all_boxes, test_annotation_path=annotation_dir, test_imgid_list=real_test_imgname_list)
def detect_img(file_paths, des_folder, det_th, h_len, w_len, h_overlap, w_overlap, file_ext, show_res=False): with tf.Graph().as_default(): img_plac = tf.placeholder(shape=[None, None, 3], dtype=tf.uint8) img_tensor = tf.cast(img_plac, tf.float32) - tf.constant( [103.939, 116.779, 123.68]) img_batch = image_preprocess.short_side_resize_for_inference_data( img_tensor, target_shortside_len=cfgs.SHORT_SIDE_LEN, is_resize=False) # *********************************************************************************************** # * share net * # *********************************************************************************************** _, share_net = get_network_byname(net_name=cfgs.NET_NAME, inputs=img_batch, num_classes=None, is_training=True, output_stride=None, global_pool=False, spatial_squeeze=False) # *********************************************************************************************** # * RPN * # *********************************************************************************************** rpn = build_rpn.RPN( net_name=cfgs.NET_NAME, inputs=img_batch, gtboxes_and_label=None, is_training=False, share_head=cfgs.SHARE_HEAD, share_net=share_net, stride=cfgs.STRIDE, anchor_ratios=cfgs.ANCHOR_RATIOS, anchor_scales=cfgs.ANCHOR_SCALES, scale_factors=cfgs.SCALE_FACTORS, base_anchor_size_list=cfgs. BASE_ANCHOR_SIZE_LIST, # P2, P3, P4, P5, P6 level=cfgs.LEVEL, top_k_nms=cfgs.RPN_TOP_K_NMS, rpn_nms_iou_threshold=cfgs.RPN_NMS_IOU_THRESHOLD, max_proposals_num=cfgs.MAX_PROPOSAL_NUM, rpn_iou_positive_threshold=cfgs.RPN_IOU_POSITIVE_THRESHOLD, rpn_iou_negative_threshold=cfgs.RPN_IOU_NEGATIVE_THRESHOLD, rpn_mini_batch_size=cfgs.RPN_MINIBATCH_SIZE, rpn_positives_ratio=cfgs.RPN_POSITIVE_RATE, remove_outside_anchors=False, # whether remove anchors outside rpn_weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME]) # rpn predict proposals rpn_proposals_boxes, rpn_proposals_scores = rpn.rpn_proposals( ) # rpn_score shape: [300, ] # *********************************************************************************************** # * Fast RCNN * # *********************************************************************************************** fast_rcnn = build_fast_rcnn.FastRCNN( feature_pyramid=rpn.feature_pyramid, rpn_proposals_boxes=rpn_proposals_boxes, rpn_proposals_scores=rpn_proposals_scores, img_shape=tf.shape(img_batch), roi_size=cfgs.ROI_SIZE, roi_pool_kernel_size=cfgs.ROI_POOL_KERNEL_SIZE, scale_factors=cfgs.SCALE_FACTORS, gtboxes_and_label=None, gtboxes_and_label_minAreaRectangle=None, fast_rcnn_nms_iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD, fast_rcnn_maximum_boxes_per_img=100, fast_rcnn_nms_max_boxes_per_class=cfgs. FAST_RCNN_NMS_MAX_BOXES_PER_CLASS, show_detections_score_threshold=det_th, # show detections which score >= 0.6 num_classes=cfgs.CLASS_NUM, fast_rcnn_minibatch_size=cfgs.FAST_RCNN_MINIBATCH_SIZE, fast_rcnn_positives_ratio=cfgs.FAST_RCNN_POSITIVE_RATE, fast_rcnn_positives_iou_threshold=cfgs. FAST_RCNN_IOU_POSITIVE_THRESHOLD, # iou>0.5 is positive, iou<0.5 is negative use_dropout=cfgs.USE_DROPOUT, weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME], is_training=False, level=cfgs.LEVEL, head_quadrant=None) fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category, \ fast_rcnn_decode_boxes_rotate, fast_rcnn_score_rotate, fast_rcnn_head_quadrant, \ num_of_objects_rotate, detection_category_rotate = fast_rcnn.fast_rcnn_predict() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = restore_model.get_restorer() config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) for img_path in file_paths: start = timer() # gdal.AllRegister() # ds = gdal.Open(img_path, gdalconst.GA_ReadOnly) # if ds is None: # print("Image %s open failed!" % img_path) # sys.exit() img = cv2.imread(img_path) box_res = [] label_res = [] score_res = [] box_res_rotate = [] label_res_rotate = [] score_res_rotate = [] head_rotate = [] # imgH = ds.RasterYSize # imgW = ds.RasterXSize imgH = img.shape[0] imgW = img.shape[1] for hh in range(0, imgH, h_len - h_overlap): # hh = max(0, hh - 100) h_size = min(h_len, imgH - hh) if h_size < 10: break for ww in range(0, imgW, w_len - w_overlap): # ww = max(0, ww - 200) w_size = min(w_len, imgW - ww) if w_size < 10: break # src_img = ds.ReadAsArray(ww, hh, w_size, h_size) src_img = img[hh:(hh + h_size), ww:(ww + w_size), :] # if len(src_img.shape) == 2: # src_img = cv2.cvtColor(src_img, cv2.COLOR_GRAY2RGB) # else: # src_img = chw2hwc(src_img) # boxes, labels, scores = sess.run([fast_rcnn_decode_boxes, detection_category, fast_rcnn_score], # feed_dict={img_plac: src_img}) boxes_rotate, labels_rotate, scores_rotate, _fast_rcnn_head_quadrant = \ sess.run([fast_rcnn_decode_boxes_rotate, detection_category_rotate, fast_rcnn_score_rotate, fast_rcnn_head_quadrant], feed_dict={img_plac: src_img}) # if show_res: # visualize_detection(src_img, boxes, scores) # if len(boxes) > 0: # for ii in range(len(boxes)): # box = boxes[ii] # box[0] = box[0] + hh # box[1] = box[1] + ww # box[2] = box[2] + hh # box[3] = box[3] + ww # box_res.append(box) # label_res.append(labels[ii]) # score_res.append(scores[ii]) if len(boxes_rotate) > 0: for ii in range(len(boxes_rotate)): box_rotate = boxes_rotate[ii] box_rotate[0] = box_rotate[0] + hh box_rotate[1] = box_rotate[1] + ww box_res_rotate.append(box_rotate) label_res_rotate.append(labels_rotate[ii]) score_res_rotate.append(scores_rotate[ii]) head_rotate.append( _fast_rcnn_head_quadrant[ii]) # ds = None time_elapsed = timer() - start print("{} detection time : {:.4f} sec".format( img_path.split('/')[-1].split('.')[0], time_elapsed)) mkdir(des_folder) if len(head_rotate) != 0: # img_np = draw_box_cv(np.array(img, np.float32) - np.array([103.939, 116.779, 123.68]), # boxes=np.array(box_res), # labels=np.array(label_res), # scores=np.array(score_res)) img_np_rotate = draw_rotate_box_cv( np.array(img, np.float32) - np.array([103.939, 116.779, 123.68]), boxes=np.array(box_res_rotate), labels=np.array(label_res_rotate), scores=np.array(score_res_rotate), head=np.argmax(head_rotate, axis=1)) geo_points = get_points(box_res_rotate, np.argmax(head_rotate, axis=1)) print('********************************') print(np.array(geo_points)[:, 8:10]) print('********************************') print(np.argmax(head_rotate, axis=1)) print('********************************') print(box_res_rotate) xml_name = img_path.replace(file_ext, ".xml") # writer_XML(xml_name, geo_points, label_res, imgW, imgH) # cv2.imwrite(des_folder + '/{}_horizontal_fpn.jpg'.format(img_path.split('/')[-1].split('.')[0]), img_np) cv2.imwrite( des_folder + '/{}_rotate_fpn.jpg'.format( img_path.split('/')[-1].split('.')[0]), img_np_rotate) # clip_obj_imgs(src_img, box_res, label_res, score_res, des_folder) # print(img_path) # det_xml_path =img_path.replace(".tif", ".det.xml") # obj_to_det_xml(img_path, box_res, label_res, score_res, det_xml_path) coord.request_stop() coord.join(threads)
def eval_with_plac(img_dir, det_net, num_imgs, image_ext, draw_imgs=False): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH) if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant( cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') all_boxes_r = [] imgs = os.listdir(img_dir) pbar = tqdm(imgs) for a_img_name in pbar: a_img_name = a_img_name.split(image_ext)[0] raw_img = cv2.imread(os.path.join(img_dir, a_img_name + image_ext)) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] resized_img, det_boxes_r_, det_scores_r_, det_category_r_ = \ sess.run( [img_batch, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: raw_img[:, :, ::-1]} ) if draw_imgs: detected_indices = det_scores_r_ >= cfgs.VIS_SCORE detected_scores = det_scores_r_[detected_indices] detected_boxes = det_boxes_r_[detected_indices] detected_categories = det_category_r_[detected_indices] det_detections_r = draw_box_in_img.draw_boxes_with_label_and_scores( np.squeeze(resized_img, 0), boxes=detected_boxes, labels=detected_categories, scores=detected_scores, method=1, in_graph=True) save_dir = os.path.join('test_hrsc', cfgs.VERSION, 'hrsc2016_img_vis') tools.mkdir(save_dir) cv2.imwrite(save_dir + '/{}.jpg'.format(a_img_name), det_detections_r[:, :, ::-1]) if det_boxes_r_.shape[0] != 0: resized_h, resized_w = resized_img.shape[1], resized_img.shape[ 2] det_boxes_r_ = forward_convert(det_boxes_r_, False) det_boxes_r_[:, 0::2] *= (raw_w / resized_w) det_boxes_r_[:, 1::2] *= (raw_h / resized_h) det_boxes_r_ = backward_convert(det_boxes_r_, False) x_c, y_c, w, h, theta = det_boxes_r_[:, 0], det_boxes_r_[:, 1], det_boxes_r_[:, 2], \ det_boxes_r_[:, 3], det_boxes_r_[:, 4] boxes_r = np.transpose(np.stack([x_c, y_c, w, h, theta])) dets_r = np.hstack((det_category_r_.reshape(-1, 1), det_scores_r_.reshape(-1, 1), boxes_r)) all_boxes_r.append(dets_r) pbar.set_description("Eval image %s" % a_img_name) # fw1 = open(cfgs.VERSION + '_detections_r.pkl', 'wb') # pickle.dump(all_boxes_r, fw1) return all_boxes_r
def inference(det_net, data_dir): TIME = 0 TIME_NUM = 0 # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) img_batch = tf.cast(img_plac, tf.float32) img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_resize=IS_RESIZE) det_boxes_h, det_scores_h, det_category_h, \ det_boxes_r, det_scores_r, det_category_r = det_net.build_whole_detection_network(input_img_batch=img_batch, gtboxes_h_batch=None, gtboxes_r_batch=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') imgs = os.listdir(data_dir) # print(imgs) for i, a_img_name in enumerate(imgs): file_text, extension = os.path.splitext(a_img_name) if extension != ".jpg" and extension != ".tif" and extension != ".png": continue # f = open('./res_icdar_r/res_{}.txt'.format(a_img_name.split('.jpg')[0]), 'w') raw_img = cv2.imread(os.path.join(data_dir, a_img_name)) # raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] start = time.time() resized_img, det_boxes_h_, det_scores_h_, det_category_h_, \ det_boxes_r_, det_scores_r_, det_category_r_ = \ sess.run( [img_batch, det_boxes_h, det_scores_h, det_category_h, det_boxes_r, det_scores_r, det_category_r], feed_dict={img_plac: raw_img} ) end = time.time() TIME += end - start TIME_NUM += 1 if WRITE_VOC == True: boxes = np.array(det_boxes_r_, np.int64) scores = np.array(det_scores_r_, np.float32) labels = np.array(det_category_r_, np.int32) det_save_dir = cfgs.INFERENCE_SAVE_PATH write_voc_results_file(boxes, labels, scores, a_img_name.split('.')[0], det_save_dir) write_pixel_results(boxes, labels, scores, a_img_name.split('.')[0], det_save_dir) det_detections_h = draw_box_in_img.draw_box_cv( np.squeeze(resized_img, 0), boxes=det_boxes_h_, labels=det_category_h_, scores=det_scores_h_) det_detections_r = draw_box_in_img.draw_rotate_box_cv( np.squeeze(resized_img, 0), boxes=det_boxes_r_, labels=det_category_r_, scores=det_scores_r_) save_dir = os.path.join(cfgs.INFERENCE_SAVE_PATH, cfgs.VERSION) tools.mkdir(save_dir) if OUTPUT_H_IMG: cv2.imwrite(save_dir + '/' + a_img_name + '_h.jpg', det_detections_h) cv2.imwrite(save_dir + '/' + a_img_name + '_r.jpg', det_detections_r) view_bar('{} cost {}s'.format(a_img_name, (end - start)), i + 1, len(imgs)) print('avg time:{}'.format(TIME / TIME_NUM))
def eval_with_plac(det_net, real_test_imgname_list, img_root, draw_imgs=False): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) img_batch = short_side_resize_for_inference_data(img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH) # if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: # img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) # else: # img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = (img_batch - tf.constant(cfgs.PIXEL_MEAN)) / (tf.constant(cfgs.PIXEL_STD)*255) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=None) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') all_boxes = [] for i, a_img_name in enumerate(real_test_imgname_list): raw_img = cv2.imread(os.path.join(img_root, a_img_name)) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] start = time.time() resized_img, detected_boxes, detected_scores, detected_categories = \ sess.run( [img_batch, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: raw_img[:, :, ::-1]} # cv is BGR. But need RGB ) end = time.time() # print("{} cost time : {} ".format(img_name, (end - start))) if draw_imgs: show_indices = detected_scores >= cfgs.SHOW_SCORE_THRSHOLD show_scores = detected_scores[show_indices] show_boxes = detected_boxes[show_indices] show_categories = detected_categories[show_indices] draw_img = np.squeeze(resized_img, 0) # if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: # draw_img = (draw_img * np.array(cfgs.PIXEL_STD) + np.array(cfgs.PIXEL_MEAN_)) * 255 # else: # draw_img = draw_img + np.array(cfgs.PIXEL_MEAN) draw_img = draw_img * (np.array(cfgs.PIXEL_STD)*255) + np.array(cfgs.PIXEL_MEAN) final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(draw_img, boxes=show_boxes, labels=show_categories, scores=show_scores, in_graph=False) if not os.path.exists(cfgs.TEST_SAVE_PATH): os.makedirs(cfgs.TEST_SAVE_PATH) cv2.imwrite(cfgs.TEST_SAVE_PATH + '/' + a_img_name + '.jpg', final_detections[:, :, ::-1]) xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \ detected_boxes[:, 2], detected_boxes[:, 3] resized_h, resized_w = resized_img.shape[1], resized_img.shape[2] xmin = xmin * raw_w / resized_w xmax = xmax * raw_w / resized_w ymin = ymin * raw_h / resized_h ymax = ymax * raw_h / resized_h boxes = np.transpose(np.stack([xmin, ymin, xmax, ymax])) dets = np.hstack((detected_categories.reshape(-1, 1), detected_scores.reshape(-1, 1), boxes)) all_boxes.append(dets) tools.view_bar('{} image cost {}s'.format(a_img_name, (end - start)), i + 1, len(real_test_imgname_list)) save_dir = os.path.join(cfgs.EVALUATE_DIR, cfgs.VERSION) if not os.path.exists(save_dir): os.makedirs(save_dir) fw1 = open(os.path.join(save_dir, 'detections.pkl'), 'wb') pickle.dump(all_boxes, fw1) return all_boxes
def eval_with_plac(img_dir, det_net, num_imgs, image_ext, draw_imgs=False): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not GBR img_batch = tf.cast(img_plac, tf.float32) img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = short_side_resize_for_inference_data(img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN) det_boxes_h, det_scores_h, det_category_h, \ det_boxes_r, det_scores_r, det_category_r = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_h_batch=None, gtboxes_r_batch=None) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') all_boxes_r = [] imgs = os.listdir(img_dir) for i, a_img_name in enumerate(imgs): a_img_name = a_img_name.split(image_ext)[0] raw_img = cv2.imread(os.path.join(img_dir, a_img_name + image_ext)) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] start = time.time() resized_img, det_boxes_r_, det_scores_r_, det_category_r_ = \ sess.run( [img_batch, det_boxes_h, det_scores_h, det_category_h, det_boxes_r, det_scores_r, det_category_r], feed_dict={img_plac: raw_img} ) end = time.time() # print("{} cost time : {} ".format(img_name, (end - start))) if draw_imgs: det_detections_r = draw_box_in_img.draw_rotate_box_cv(np.squeeze(resized_img, 0), boxes=det_boxes_r_, labels=det_category_r_, scores=det_scores_r_) save_dir = os.path.join(cfgs.TEST_SAVE_PATH, cfgs.VERSION) tools.mkdir(save_dir) cv2.imwrite(save_dir + '/' + a_img_name + '_r.jpg', det_detections_r[:, :, ::-1]) x_c, y_c, w, h, theta = det_boxes_r_[:, 0], det_boxes_r_[:, 1], det_boxes_r_[:, 2], \ det_boxes_r_[:, 3], det_boxes_r_[:, 4] resized_h, resized_w = resized_img.shape[1], resized_img.shape[2] x_c = x_c * raw_w / resized_w y_c = y_c * raw_h / resized_h boxes_r = np.transpose(np.stack([x_c, y_c, w, h, theta])) dets_r = np.hstack((det_category_r_.reshape(-1, 1), det_scores_r_.reshape(-1, 1), boxes_r)) all_boxes_r.append(dets_r) tools.view_bar('{} image cost {}s'.format(a_img_name, (end - start)), i + 1, len(imgs)) fw2 = open(cfgs.VERSION + '_detections_r.pkl', 'w') pickle.dump(all_boxes_r, fw2)
def detect(det_net, inference_save_path, real_test_imgname_list): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not GBR img_batch = tf.cast(img_plac, tf.float32) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH) if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant( cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) # [1, None, None, 3] detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') for i, a_img_name in enumerate(real_test_imgname_list): raw_img = cv2.imread(a_img_name) start = time.time() resized_img, detected_boxes, detected_scores, detected_categories = \ sess.run( [img_batch, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: raw_img[:, :, ::-1]} ) end = time.time() # print("{} cost time : {} ".format(img_name, (end - start))) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \ detected_boxes[:, 2], detected_boxes[:, 3] resized_h, resized_w = resized_img.shape[1], resized_img.shape[2] xmin = xmin * raw_w / resized_w xmax = xmax * raw_w / resized_w ymin = ymin * raw_h / resized_h ymax = ymax * raw_h / resized_h detected_boxes = np.transpose(np.stack([xmin, ymin, xmax, ymax])) show_indices = detected_scores >= cfgs.SHOW_SCORE_THRSHOLD show_scores = detected_scores[show_indices] show_boxes = detected_boxes[show_indices] show_categories = detected_categories[show_indices] # if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: # raw_img = (raw_img / 255 - np.array(cfgs.PIXEL_MEAN_)) /np.array(cfgs.PIXEL_STD) # else: # raw_img = raw_img - np.array(cfgs.PIXEL_MEAN) final_detections = draw_box_in_img.draw_boxes_with_label_and_scores( raw_img, boxes=show_boxes, labels=show_categories, scores=show_scores, in_graph=False) nake_name = a_img_name.split('/')[-1] # print (inference_save_path + '/' + nake_name) cv2.imwrite(inference_save_path + '/' + nake_name, final_detections[:, :, ::-1]) tools.view_bar( '{} image cost {}s'.format(a_img_name, (end - start)), i + 1, len(real_test_imgname_list))
def eval_with_plac(img_dir, det_net, num_imgs, image_ext, draw_imgs, test_annotation_path): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not GBR img_batch = tf.cast(img_plac, tf.float32) img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_resize=False) det_boxes_h, det_scores_h, det_category_h, \ det_boxes_r, det_scores_r, det_category_r = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_h_batch=None, gtboxes_r_batch=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) global_step_tensor = slim.get_or_create_global_step() eval_result = [] last_checkpoint_name = None while True: restorer, restore_ckpt = det_net.get_restorer() #saver = tf.train.Saver(max_to_keep=10) start_time = time.time() model_path = os.path.splitext(os.path.basename(restore_ckpt))[0] if model_path == None: print("Wait for available checkpoint") elif last_checkpoint_name == model_path: print( "Already evaluated checkpoint {}, we will try evaluation in {} seconds" .format(model_path, EVAL_INTERVAL)) #continue else: print('Last ckpt was {}, new ckpt is {}'.format( last_checkpoint_name, model_path)) last_checkpoint_name = model_path config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) sess.run(global_step_tensor.initializer) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model', restore_ckpt) global_stepnp = tf.train.global_step(sess, global_step_tensor) print('#########################', global_stepnp) all_boxes_h = [] all_boxes_r = [] imgs = os.listdir(img_dir) imgs_len = len(imgs) none_detected_image = [] for i, a_img_name in enumerate(imgs[:]): a_img_name = a_img_name.split(image_ext)[0] image_name = a_img_name + image_ext print('\n', a_img_name) raw_img = cv2.imread( os.path.join(img_dir, a_img_name + image_ext)) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] start = time.time() resized_img, det_boxes_h_, det_scores_h_, det_category_h_, \ det_boxes_r_, det_scores_r_, det_category_r_ = \ sess.run( [img_batch, det_boxes_h, det_scores_h, det_category_h, det_boxes_r, det_scores_r, det_category_r], feed_dict={img_plac: raw_img} ) end = time.time() print("det category H : ", det_category_h_) print("det category R : ", det_category_r_) # print("{} cost time : {} ".format(img_name, (end - start))) if draw_imgs: det_detections_h = draw_box_in_img.draw_box_cv( np.squeeze(resized_img, 0), boxes=det_boxes_h_, labels=det_category_h_, scores=det_scores_h_) det_detections_r = draw_box_in_img.draw_rotate_box_cv( np.squeeze(resized_img, 0), boxes=det_boxes_r_, labels=det_category_r_, scores=det_scores_r_) save_dir = os.path.join(cfgs.TEST_SAVE_PATH, cfgs.VERSION) tools.mkdir(save_dir) cv2.imwrite(save_dir + '/' + a_img_name + '_h.jpg', det_detections_h[:, :, ::-1]) cv2.imwrite(save_dir + '/' + a_img_name + '_r.jpg', det_detections_r[:, :, ::-1]) xmin, ymin, xmax, ymax = det_boxes_h_[:, 0], det_boxes_h_[:, 1], \ det_boxes_h_[:, 2], det_boxes_h_[:, 3] if det_boxes_r_.shape[0] != 0: #print('### Has box ###') resized_h, resized_w = resized_img.shape[ 1], resized_img.shape[2] det_boxes_r_ = forward_convert(det_boxes_r_, False) det_boxes_r_[:, 0::2] *= (raw_w / resized_w) det_boxes_r_[:, 1::2] *= (raw_h / resized_h) det_boxes_r_ = back_forward_convert( det_boxes_r_, False) x_c, y_c, w, h, theta = det_boxes_r_[:, 0], det_boxes_r_[:, 1], det_boxes_r_[:, 2], \ det_boxes_r_[:, 3], det_boxes_r_[:, 4] xmin = xmin * raw_w / resized_w xmax = xmax * raw_w / resized_w ymin = ymin * raw_h / resized_h ymax = ymax * raw_h / resized_h boxes_h = np.transpose( np.stack([xmin, ymin, xmax, ymax])) boxes_r = np.transpose( np.stack([x_c, y_c, w, h, theta])) dets_h = np.hstack((det_category_h_.reshape(-1, 1), det_scores_h_.reshape(-1, 1), boxes_h)) dets_r = np.hstack((det_category_r_.reshape(-1, 1), det_scores_r_.reshape(-1, 1), boxes_r)) all_boxes_h.append(dets_h) all_boxes_r.append(dets_r) else: imgs.remove(image_name) none_detected_image.append(image_name) print('No detected') tools.view_bar( '{} image cost {}s'.format(a_img_name, (end - start)), i + 1, imgs_len) fw1 = open(cfgs.VERSION + '_detections_h.pkl', 'wb') fw2 = open(cfgs.VERSION + '_detections_r.pkl', 'wb') pickle.dump(all_boxes_h, fw1) pickle.dump(all_boxes_r, fw2) # with open(cfgs.VERSION + '_detections_h.pkl', 'rb') as f1: # all_boxes_h = pickle.load(f1, encoding='unicode') # print(10 * "###") # print(len(all_boxes_h)) # # with open(cfgs.VERSION + '_detections_r.pkl', 'rb') as f2: # all_boxes_r = pickle.load(f2, encoding='unicode') # # print(len(all_boxes_r)) # imgs = os.listdir(img_dir) real_test_imgname_list = [i.split(image_ext)[0] for i in imgs] print(10 * "**") print('horizon eval:') # print(len(all_boxes_h), len(all_boxes_r)) # print(len(real_test_imgname_list)) mAP_h, recall_h, precision_h, total_mAP_h, total_recall_h, total_precision_h = voc_eval.voc_evaluate_detections( all_boxes=all_boxes_h, test_imgid_list=real_test_imgname_list, test_annotation_path=test_annotation_path) print('mAP_h: ', mAP_h) print('mRecall_h:', recall_h) print('mPrecision_h:', precision_h) print('total_mAP_h: ', total_mAP_h) print('total_recall_h_list:', total_recall_h) print('total_precision_h_list:', total_precision_h) print(10 * "**") print('rotation eval:') mAP_r, recall_r, precision_r, total_mAP_r, total_recall_r, total_precision_r = voc_eval_r.voc_evaluate_detections( all_boxes=all_boxes_r, test_imgid_list=real_test_imgname_list, test_annotation_path=test_annotation_path) f1score_h_check = (1 + 1**2) * precision_h * recall_h / ( 1**2 * precision_h + recall_h) f1score_h = calc_fscore(precision_h, recall_h, 1) f1score_r_check = (1 + 1**2) * precision_r * recall_r / ( 1**2 * precision_r + recall_r) f1score_r = calc_fscore(precision_r, recall_r, 1) print(10 * '##') print('mAP_r:', mAP_r) print('mRecall_r:', recall_r) print('mPrecision_r:', precision_r) print('total_mAP_r_list: ', total_mAP_r) print('total_recall_r_list:', total_recall_r) print('total_precision_r_list:', total_precision_r) print('f1score_r:', f1score_r) summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION + '/eval_0') tools.mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) mAP_h_summ = tf.Summary() mAP_h_summ.value.add(tag='EVAL_Global/mAP_h', simple_value=mAP_h) summary_writer.add_summary(mAP_h_summ, global_stepnp) mAP_r_summ = tf.Summary() mAP_r_summ.value.add(tag='EVAL_Global/mAP_r', simple_value=mAP_r) summary_writer.add_summary(mAP_r_summ, global_stepnp) mRecall_h_summ = tf.Summary() mRecall_h_summ.value.add(tag='EVAL_Global/Recall_h', simple_value=recall_h) summary_writer.add_summary(mRecall_h_summ, global_stepnp) mRecall_r_summ = tf.Summary() mRecall_r_summ.value.add(tag='EVAL_Global/Recall_r', simple_value=recall_r) summary_writer.add_summary(mRecall_r_summ, global_stepnp) mPrecision_h_summ = tf.Summary() mPrecision_h_summ.value.add(tag='EVAL_Global/Precision_h', simple_value=precision_h) summary_writer.add_summary(mPrecision_h_summ, global_stepnp) mPrecision_r_summ = tf.Summary() mPrecision_r_summ.value.add(tag='EVAL_Global/Precision_r', simple_value=precision_r) summary_writer.add_summary(mPrecision_r_summ, global_stepnp) mF1Score_h_summ = tf.Summary() mF1Score_h_summ.value.add(tag='EVAL_Global/F1Score_h', simple_value=f1score_h) summary_writer.add_summary(mF1Score_h_summ, global_stepnp) mF1Score_r_summ = tf.Summary() mF1Score_r_summ.value.add(tag='EVAL_Global/F1Score_r', simple_value=f1score_r) summary_writer.add_summary(mF1Score_r_summ, global_stepnp) mAP_h_class_dict = {} mAP_r_class_dict = {} recall_h_class_dict = {} recall_r_class_dict = {} precision_h_class_dict = {} precision_r_class_dict = {} f1score_h_class_dict = {} f1score_r_class_dict = {} label_list = list(NAME_LABEL_MAP.keys()) label_list.remove('back_ground') for cls in label_list: mAP_h_class_dict["cls_%s_mAP_h_summ" % cls] = tf.Summary() mAP_r_class_dict["cls_%s_mAP_r_summ" % cls] = tf.Summary() recall_h_class_dict["cls_%s_recall_h_summ" % cls] = tf.Summary() recall_r_class_dict["cls_%s_recall_r_summ" % cls] = tf.Summary() precision_h_class_dict["cls_%s_precision_h_summ" % cls] = tf.Summary() precision_r_class_dict["cls_%s_precision_r_summ" % cls] = tf.Summary() f1score_h_class_dict["cls_%s_f1score_h_summ" % cls] = tf.Summary() f1score_r_class_dict["cls_%s_f1score_r_summ" % cls] = tf.Summary() for cls in label_list: mAP_h_class_dict["cls_%s_mAP_h_summ" % cls].value.add( tag='EVAL_Class_mAP/{}_mAP_h'.format(cls), simple_value=total_mAP_h[cls]) mAP_r_class_dict["cls_%s_mAP_r_summ" % cls].value.add( tag='EVAL_Class_mAP/{}_mAP_r'.format(cls), simple_value=total_mAP_r[cls]) recall_h_class_dict[ "cls_%s_recall_h_summ" % cls].value.add( tag='EVAL_Class_recall/{}_recall_h'.format(cls), simple_value=total_recall_h[cls]) recall_r_class_dict[ "cls_%s_recall_r_summ" % cls].value.add( tag='EVAL_Class_recall/{}_recall_r'.format(cls), simple_value=total_recall_r[cls]) precision_h_class_dict[ "cls_%s_precision_h_summ" % cls].value.add( tag='EVAL_Class_precision/{}_precision_h'.format( cls), simple_value=total_precision_h[cls]) precision_r_class_dict[ "cls_%s_precision_r_summ" % cls].value.add( tag='EVAL_Class_precision/{}_precision_r'.format( cls), simple_value=total_precision_r[cls]) f1score_h_cls = calc_fscore(total_precision_h[cls], total_recall_h[cls], 1) f1score_r_cls = calc_fscore(total_precision_r[cls], total_recall_r[cls], 1) f1score_h_class_dict[ "cls_%s_f1score_h_summ" % cls].value.add( tag='EVAL_Class_f1score/{}_f1score_h'.format(cls), simple_value=f1score_h_cls) f1score_r_class_dict[ "cls_%s_f1score_r_summ" % cls].value.add( tag='EVAL_Class_f1score/{}_f1score_r'.format(cls), simple_value=f1score_r_cls) for cls in label_list: summary_writer.add_summary( mAP_h_class_dict["cls_%s_mAP_h_summ" % cls], global_stepnp) summary_writer.add_summary( mAP_r_class_dict["cls_%s_mAP_r_summ" % cls], global_stepnp) summary_writer.add_summary( recall_h_class_dict["cls_%s_recall_h_summ" % cls], global_stepnp) summary_writer.add_summary( recall_r_class_dict["cls_%s_recall_r_summ" % cls], global_stepnp) summary_writer.add_summary( precision_h_class_dict["cls_%s_precision_h_summ" % cls], global_stepnp) summary_writer.add_summary( precision_r_class_dict["cls_%s_precision_r_summ" % cls], global_stepnp) summary_writer.add_summary( f1score_h_class_dict["cls_%s_f1score_h_summ" % cls], global_stepnp) summary_writer.add_summary( f1score_r_class_dict["cls_%s_f1score_r_summ" % cls], global_stepnp) summary_writer.flush() if not os.path.exists(save_dir): os.makedirs(save_dir) save_ckpt = os.path.join(save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt') #saver.save(sess, save_ckpt) print(' weights had been saved') time_to_next_eval = start_time + EVAL_INTERVAL - time.time() if time_to_next_eval > 0: time.sleep(time_to_next_eval)
def detect(det_net, inference_save_path, real_test_imgname_list): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not GBR img_batch = tf.cast(img_plac, tf.float32) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH) if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant( cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) # [1, None, None, 3] detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') for i, a_img_name in enumerate(real_test_imgname_list): raw_img = cv2.imread(a_img_name) start = time.time() resized_img, detected_boxes, detected_scores, detected_categories = \ sess.run( [img_batch, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: raw_img[:, :, ::-1]} # cv is BGR. But need RGB ) end = time.time() # print("{} cost time : {} ".format(img_name, (end - start))) show_indices = detected_scores >= cfgs.VIS_SCORE show_scores = detected_scores[show_indices] show_boxes = detected_boxes[show_indices] show_categories = detected_categories[show_indices] draw_img = np.squeeze(resized_img, 0) if cfgs.NET_NAME in [ 'resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d' ]: draw_img = (draw_img * np.array(cfgs.PIXEL_STD) + np.array(cfgs.PIXEL_MEAN_)) * 255 else: draw_img = draw_img + np.array(cfgs.PIXEL_MEAN) final_detections = draw_box_in_img.draw_boxes_with_label_and_scores( draw_img, boxes=show_boxes, labels=show_categories, scores=show_scores, method=1, head=np.ones_like(show_scores) * -1, in_graph=False) nake_name = a_img_name.split('/')[-1] # print (inference_save_path + '/' + nake_name) cv2.imwrite(inference_save_path + '/' + nake_name, final_detections[:, :, ::-1]) tools.view_bar( '{} image cost {}s'.format(nake_name, (end - start)), i + 1, len(real_test_imgname_list))
def worker(gpu_id, images, det_net, args, result_queue): os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN[0], length_limitation=cfgs.IMG_MAX_LENGTH) if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant( cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=None, gtboxes_r_batch=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model %d ...' % gpu_id) for img_path in images: # if '2043' not in img_path: # continue img = cv2.imread(img_path) box_res = [] label_res = [] score_res = [] imgH = img.shape[0] imgW = img.shape[1] if imgH < args.h_len: temp = np.zeros([args.h_len, imgW, 3], np.float32) temp[0:imgH, :, :] = img img = temp imgH = args.h_len if imgW < args.w_len: temp = np.zeros([imgH, args.w_len, 3], np.float32) temp[:, 0:imgW, :] = img img = temp imgW = args.w_len for hh in range(0, imgH, args.h_len - args.h_overlap): if imgH - hh - 1 < args.h_len: hh_ = imgH - args.h_len else: hh_ = hh for ww in range(0, imgW, args.w_len - args.w_overlap): if imgW - ww - 1 < args.w_len: ww_ = imgW - args.w_len else: ww_ = ww src_img = img[hh_:(hh_ + args.h_len), ww_:(ww_ + args.w_len), :] resized_img, det_boxes_h_, det_scores_h_, det_category_h_ = \ sess.run( [img_batch, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: src_img[:, :, ::-1]} ) resized_h, resized_w = resized_img.shape[ 1], resized_img.shape[2] src_h, src_w = src_img.shape[0], src_img.shape[1] if len(det_boxes_h_) > 0: det_boxes_h_[:, 0::2] *= (src_w / resized_w) det_boxes_h_[:, 1::2] *= (src_h / resized_h) for ii in range(len(det_boxes_h_)): box = det_boxes_h_[ii] box[0] = box[0] + ww_ box[1] = box[1] + hh_ box[2] = box[2] + ww_ box[3] = box[3] + hh_ box_res.append(box) label_res.append(det_category_h_[ii]) score_res.append(det_scores_h_[ii]) box_res = np.array(box_res) label_res = np.array(label_res) score_res = np.array(score_res) filter_indices = score_res >= 0.05 score_res = score_res[filter_indices] box_res = box_res[filter_indices] label_res = label_res[filter_indices] box_res_ = [] label_res_ = [] score_res_ = [] threshold = { 'roundabout': 0.35, 'tennis-court': 0.35, 'swimming-pool': 0.4, 'storage-tank': 0.3, 'soccer-ball-field': 0.3, 'small-vehicle': 0.4, 'ship': 0.35, 'plane': 0.35, 'large-vehicle': 0.4, 'helicopter': 0.4, 'harbor': 0.3, 'ground-track-field': 0.4, 'bridge': 0.3, 'basketball-court': 0.4, 'baseball-diamond': 0.3 } for sub_class in range(1, cfgs.CLASS_NUM + 1): index = np.where(label_res == sub_class)[0] if len(index) == 0: continue tmp_boxes_h = box_res[index] tmp_label_h = label_res[index] tmp_score_h = score_res[index] tmp_boxes_h = np.array(tmp_boxes_h) tmp = np.zeros( [tmp_boxes_h.shape[0], tmp_boxes_h.shape[1] + 1]) tmp[:, 0:-1] = tmp_boxes_h tmp[:, -1] = np.array(tmp_score_h) if cfgs.SOFT_NMS: inx = soft_nms(np.array(tmp, np.float32), 0.5, Nt=threshold[LABEL_NAME_MAP[sub_class]], threshold=0.001, method=2) # 2 means Gaussian else: inx = nms(np.array(tmp, np.float32), threshold[LABEL_NAME_MAP[sub_class]]) box_res_.extend(np.array(tmp_boxes_h)[inx]) score_res_.extend(np.array(tmp_score_h)[inx]) label_res_.extend(np.array(tmp_label_h)[inx]) result_dict = { 'boxes': np.array(box_res_), 'scores': np.array(score_res_), 'labels': np.array(label_res_), 'image_id': img_path } result_queue.put_nowait(result_dict)
def inference(det_net, data_dir): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) img_batch = tf.cast(img_plac, tf.float32) img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = short_side_resize_for_inference_data(img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN) rois, roi_scores, det_boxes_h, det_scores_h, det_category_h, \ all_boxes_r, all_scores_r, all_category_r = det_net.build_whole_detection_network(input_img_batch=img_batch, gtboxes_h_batch=None, gtboxes_r_batch=None) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') imgs = os.listdir(data_dir) for i, a_img_name in enumerate(imgs): raw_img = cv2.imread(os.path.join(data_dir, a_img_name)) # raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] start = time.time() resized_img, rois_, roi_scores_, det_boxes_h_, det_scores_h_, det_category_h_, \ all_boxes_r_, all_scores_r_, all_category_r_ = \ sess.run( [img_batch, rois, roi_scores, det_boxes_h, det_scores_h, det_category_h, all_boxes_r, all_scores_r, all_category_r], feed_dict={img_plac: raw_img} ) end = time.time() print('all rois shape:', rois_.shape) all_boxes_new = all_boxes_r_ # [-1, 5] all_scores_new = all_scores_r_ # [-1] all_category_new = all_category_r_ # [-1] print('all dets shape:', all_boxes_new.shape) # draw all rois from proposals #rois_img_all = mylibs.draw_rois_scores(np.squeeze(resized_img, 0), rois_, roi_scores_) #score_gre_05 = np.reshape(np.where(np.greater_equal(roi_scores_, 0.5)), -1) #score_gre_05_rois = rois_[score_gre_05] #score_gre_05_scores = roi_scores_[score_gre_05] #rois_img_part = mylibs.draw_rois_scores(np.squeeze(resized_img, 0), score_gre_05_rois, score_gre_05_scores) # draw all 800 detection boxes all_indices = np.reshape(np.where(np.greater_equal(all_scores_new, cfgs.SHOW_SCORE_THRSHOLD)), -1) left_boxes = all_boxes_new[all_indices] left_scores = all_scores_new[all_indices] left_category = all_category_new[all_indices] #print('greater than score shape:', left_boxes.shape) detection_r = draw_box_in_img.draw_rotate_box_cv(np.squeeze(resized_img, 0), boxes=left_boxes, labels=left_category, scores=left_scores, imgname=a_img_name) """ while True: # nms keep = mylibs.nmsRotate(all_boxes_new, all_scores_new, cfgs.FAST_RCNN_NMS_IOU_THRESHOLD, cfgs.FAST_RCNN_NMS_MAX_BOXES_PER_CLASS) final_boxes = all_boxes_new[keep] final_scores = all_scores_new[keep] final_category = all_category_new[keep] kept_indices = np.reshape(np.where(np.greater_equal(final_scores, cfgs.SHOW_SCORE_THRSHOLD)), -1) det_boxes_new = final_boxes[kept_indices] det_scores_new = final_scores[kept_indices] det_category_new = final_category[kept_indices] # detected boxes contours, angles = mylibs.draw_rotate_box_cv_my(det_boxes_new, det_category_new) dtbox = mylibs.getRboxDegree(contours, angles) # n #print('dtbox shape is', dtbox.shape) fuv, features = mylibs.geneTestImageFeats(dtbox) # n-2 if fuv.shape[0] == 0: print(a_img_name, 'left none bones') break dtbox_idx = mylibs.svmPred(fuv, features) # deleted bones index #print('dtbox_idx shape is', dtbox_idx.shape) contour_idx = np.reshape(dtbox[dtbox_idx][:, -1], -1).astype(np.int32) kept_idx = kept_indices[contour_idx] keep_index = keep[kept_idx] #print('keep_index shape is', keep_index.shape) #n = det_boxes_new.shape[0] if len(dtbox_idx) == 0: # no svm deleted bones break else: # delete some rows all_boxes_new = np.delete(all_boxes_new, keep_index, axis=0) all_scores_new = np.delete(all_scores_new, keep_index, axis=0) all_category_new = np.delete(all_category_new, keep_index, axis=0) # unsorted fcontours = contours fangles = angles fscores = det_scores_new """ """ # save contours and angles to showSVMdecision scores detection_r = draw_box_in_img.draw_rotate_box_cv(np.squeeze(resized_img, 0), boxes=det_boxes_new, labels=det_category_new, scores=det_scores_new, imgname=a_img_name) """ """ # final_dtbox[i, :] = [x0, y0, x1, y1, x2, y2, x3, y3, dg, ycenter, from_idx] final_dtbox = mylibs.getRboxDegree(fcontours, fangles) dt_idx = final_dtbox[:, -1].astype(np.int32) # sorted # sorted fcontours = fcontours[dt_idx] fangles = fangles[dt_idx] fscores = fscores[dt_idx] img, tcontours, tangles = mylibs.draw_contour_box(np.squeeze(resized_img, 0), fcontours, fangles, fscores) t_dtbox = mylibs.getRboxDegree(tcontours, tangles) cobb_img = mylibs.getCobb(t_dtbox, img) """ save_dir = os.path.join(cfgs.INFERENCE_SAVE_PATH, cfgs.VERSION) tools.mkdir(save_dir) cv2.imwrite(save_dir + '/' + a_img_name + '_roi.jpg', detection_r) view_bar('{} cost {}s'.format(a_img_name, (end - start)), i + 1, len(imgs)) print()
def worker(gpu_id, images, det_net, result_queue): os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH, is_resize=not args.multi_scale) if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant( cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category, detection_boxes_angle = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None, gt_smooth_label=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model %d ...' % gpu_id) for a_img in images: raw_img = cv2.imread(a_img) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] img_short_side_len_list = cfgs.IMG_SHORT_SIDE_LEN if args.multi_scale else [ cfgs.IMG_SHORT_SIDE_LEN ] det_boxes_r_all, det_scores_r_all, det_category_r_all = [], [], [] for short_size in img_short_side_len_list: max_len = cfgs.IMG_MAX_LENGTH if raw_h < raw_w: new_h, new_w = short_size, min( int(short_size * float(raw_w) / raw_h), max_len) else: new_h, new_w = min(int(short_size * float(raw_h) / raw_w), max_len), short_size img_resize = cv2.resize(raw_img, (new_w, new_h)) resized_img, detected_boxes, detected_scores, detected_categories = \ sess.run( [img_batch, detection_boxes_angle, detection_scores, detection_category], feed_dict={img_plac: img_resize[:, :, ::-1]} ) detected_indices = detected_scores >= cfgs.VIS_SCORE detected_scores = detected_scores[detected_indices] detected_boxes = detected_boxes[detected_indices] detected_categories = detected_categories[detected_indices] if detected_boxes.shape[0] == 0: continue resized_h, resized_w = resized_img.shape[1], resized_img.shape[ 2] detected_boxes = forward_convert(detected_boxes, False) detected_boxes[:, 0::2] *= (raw_w / resized_w) detected_boxes[:, 1::2] *= (raw_h / resized_h) # detected_boxes = backward_convert(detected_boxes, False) det_boxes_r_all.extend(detected_boxes) det_scores_r_all.extend(detected_scores) det_category_r_all.extend(detected_categories) det_boxes_r_all = np.array(det_boxes_r_all) det_scores_r_all = np.array(det_scores_r_all) det_category_r_all = np.array(det_category_r_all) if det_scores_r_all.shape[0] == 0: continue box_res_rotate_ = [] label_res_rotate_ = [] score_res_rotate_ = [] for sub_class in range(1, cfgs.CLASS_NUM + 1): index = np.where(det_category_r_all == sub_class)[0] if len(index) == 0: continue tmp_boxes_r = det_boxes_r_all[index] tmp_label_r = det_category_r_all[index] tmp_score_r = det_scores_r_all[index] tmp_boxes_r_ = backward_convert(tmp_boxes_r, False) try: inx = nms_rotate.nms_rotate_cpu( boxes=np.array(tmp_boxes_r_), scores=np.array(tmp_score_r), iou_threshold=cfgs.NMS_IOU_THRESHOLD, max_output_size=5000) except: tmp_boxes_r_ = np.array(tmp_boxes_r_) tmp = np.zeros( [tmp_boxes_r_.shape[0], tmp_boxes_r_.shape[1] + 1]) tmp[:, 0:-1] = tmp_boxes_r_ tmp[:, -1] = np.array(tmp_score_r) # Note: the IoU of two same rectangles is 0, which is calculated by rotate_gpu_nms jitter = np.zeros( [tmp_boxes_r_.shape[0], tmp_boxes_r_.shape[1] + 1]) jitter[:, 0] += np.random.rand(tmp_boxes_r_.shape[0], ) / 1000 inx = rotate_gpu_nms( np.array(tmp, np.float32) + np.array(jitter, np.float32), float(cfgs.NMS_IOU_THRESHOLD), 0) box_res_rotate_.extend(np.array(tmp_boxes_r)[inx]) score_res_rotate_.extend(np.array(tmp_score_r)[inx]) label_res_rotate_.extend(np.array(tmp_label_r)[inx]) box_res_rotate_ = np.array(box_res_rotate_) score_res_rotate_ = np.array(score_res_rotate_) label_res_rotate_ = np.array(label_res_rotate_) result_dict = { 'scales': [1, 1], 'boxes': box_res_rotate_, 'scores': score_res_rotate_, 'labels': label_res_rotate_, 'image_id': a_img } result_queue.put_nowait(result_dict)
def detect(det_net, src_dir, res_dir, draw_imgs): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not GBR img_batch = tf.cast(img_plac, tf.float32) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH) img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) # [1, None, None, 3] result_dict = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') sub_folders = os.listdir(src_dir) for sub_folder in sub_folders: folder_dir = os.path.join(src_dir, sub_folder) real_test_imgname_list = [ os.path.join(folder_dir, img_name) for img_name in os.listdir(folder_dir) ] tools.mkdir(os.path.join(res_dir, sub_folder)) for i, a_img_name in enumerate(real_test_imgname_list): raw_img = cv2.imread(a_img_name) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] start = time.time() resized_img, result_dict_ = \ sess.run( [img_batch, result_dict], feed_dict={img_plac: raw_img[:, :, ::-1]} # cv is BGR. But need RGB ) end = time.time() detected_boxes, detected_scores, detected_categories = merge_result( result_dict_) nake_name = a_img_name.split('/')[-1] xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \ detected_boxes[:, 2], detected_boxes[:, 3] resized_h, resized_w = resized_img.shape[1], resized_img.shape[ 2] xmin = xmin * raw_w / resized_w xmax = xmax * raw_w / resized_w ymin = ymin * raw_h / resized_h ymax = ymax * raw_h / resized_h boxes = np.transpose(np.stack([xmin, ymin, xmax, ymax])) dets = np.hstack((detected_categories.reshape(-1, 1), detected_scores.reshape(-1, 1), boxes)) show_indices = detected_scores >= cfgs.SHOW_SCORE_THRSHOLD show_scores = detected_scores[show_indices] show_boxes = boxes[show_indices] show_categories = detected_categories[show_indices] f = open( os.path.join(res_dir, sub_folder) + '/' + nake_name.split('.')[0] + '.txt', 'w') f.write('{}\n'.format(nake_name.split('.')[0])) # f.write('{}\n'.format(dets.shape[0])) # for inx in range(dets.shape[0]): # # f.write('%d %d %d %d %.3f\n' % (int(dets[inx][2]), # int(dets[inx][3]), # int(dets[inx][4]) - int(dets[inx][2]), # int(dets[inx][5]) - int(dets[inx][3]), # dets[inx][1])) f.write('{}\n'.format(show_boxes.shape[0])) for inx in range(show_boxes.shape[0]): f.write('%d %d %d %d %.3f\n' % (int(show_boxes[inx][0]), int(show_boxes[inx][1]), int(show_boxes[inx][2]) - int(show_boxes[inx][0]), int(show_boxes[inx][3]) - int(show_boxes[inx][1]), show_scores[inx])) if draw_imgs: final_detections = draw_box_in_img.draw_boxes_with_label_and_scores( raw_img - np.array(cfgs.PIXEL_MEAN), boxes=show_boxes, labels=show_categories, scores=show_scores) tools.mkdir(cfgs.TEST_SAVE_PATH) cv2.imwrite(cfgs.TEST_SAVE_PATH + '/' + nake_name, final_detections) tools.view_bar( '{} image cost {}s'.format(a_img_name, (end - start)), i + 1, len(real_test_imgname_list))
def worker(gpu_id, images, det_net, result_queue): os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH) if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant( cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model %d ...' % gpu_id) for a_img in images: raw_img = cv2.imread(a_img) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] resized_img, detected_boxes, detected_scores, detected_categories = \ sess.run( [img_batch, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: raw_img[:, :, ::-1]} ) detected_boxes = forward_convert(detected_boxes, False) detected_indices = detected_scores >= cfgs.VIS_SCORE detected_scores = detected_scores[detected_indices] detected_boxes = detected_boxes[detected_indices] detected_categories = detected_categories[detected_indices] resized_h, resized_w = resized_img.shape[1], resized_img.shape[2] scales = [raw_w / resized_w, raw_h / resized_h] result_dict = { 'scales': scales, 'boxes': detected_boxes, 'scores': detected_scores, 'labels': detected_categories, 'image_id': a_img } result_queue.put_nowait(result_dict)
def inference(): with tf.Graph().as_default(): img_plac = tf.placeholder(shape=[None, None, 3], dtype=tf.uint8) img_tensor = tf.cast(img_plac, tf.float32) - tf.constant( [103.939, 116.779, 123.68]) img_batch = image_preprocess.short_side_resize_for_inference_data( img_tensor, target_shortside_len=cfgs.SHORT_SIDE_LEN) # *********************************************************************************************** # * share net * # *********************************************************************************************** _, share_net = get_network_byname(net_name=cfgs.NET_NAME, inputs=img_batch, num_classes=None, is_training=True, output_stride=None, global_pool=False, spatial_squeeze=False) # *********************************************************************************************** # * RPN * # *********************************************************************************************** rpn = build_rpn.RPN( net_name=cfgs.NET_NAME, inputs=img_batch, gtboxes_and_label=None, is_training=False, share_head=cfgs.SHARE_HEAD, share_net=share_net, stride=cfgs.STRIDE, anchor_ratios=cfgs.ANCHOR_RATIOS, anchor_scales=cfgs.ANCHOR_SCALES, scale_factors=cfgs.SCALE_FACTORS, base_anchor_size_list=cfgs. BASE_ANCHOR_SIZE_LIST, # P2, P3, P4, P5, P6 level=cfgs.LEVEL, top_k_nms=cfgs.RPN_TOP_K_NMS, rpn_nms_iou_threshold=cfgs.RPN_NMS_IOU_THRESHOLD, max_proposals_num=cfgs.MAX_PROPOSAL_NUM, rpn_iou_positive_threshold=cfgs.RPN_IOU_POSITIVE_THRESHOLD, rpn_iou_negative_threshold=cfgs.RPN_IOU_NEGATIVE_THRESHOLD, rpn_mini_batch_size=cfgs.RPN_MINIBATCH_SIZE, rpn_positives_ratio=cfgs.RPN_POSITIVE_RATE, remove_outside_anchors=False, # whether remove anchors outside rpn_weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME]) # rpn predict proposals rpn_proposals_boxes, rpn_proposals_scores = rpn.rpn_proposals( ) # rpn_score shape: [300, ] # *********************************************************************************************** # * Fast RCNN * # *********************************************************************************************** fast_rcnn = build_fast_rcnn.FastRCNN( feature_pyramid=rpn.feature_pyramid, rpn_proposals_boxes=rpn_proposals_boxes, rpn_proposals_scores=rpn_proposals_scores, img_shape=tf.shape(img_batch), roi_size=cfgs.ROI_SIZE, roi_pool_kernel_size=cfgs.ROI_POOL_KERNEL_SIZE, scale_factors=cfgs.SCALE_FACTORS, gtboxes_and_label=None, gtboxes_and_label_minAreaRectangle=None, fast_rcnn_nms_iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD, fast_rcnn_maximum_boxes_per_img=100, fast_rcnn_nms_max_boxes_per_class=cfgs. FAST_RCNN_NMS_MAX_BOXES_PER_CLASS, show_detections_score_threshold=cfgs.FINAL_SCORE_THRESHOLD, # show detections which score >= 0.6 num_classes=cfgs.CLASS_NUM, fast_rcnn_minibatch_size=cfgs.FAST_RCNN_MINIBATCH_SIZE, fast_rcnn_positives_ratio=cfgs.FAST_RCNN_POSITIVE_RATE, fast_rcnn_positives_iou_threshold=cfgs. FAST_RCNN_IOU_POSITIVE_THRESHOLD, # iou>0.5 is positive, iou<0.5 is negative use_dropout=cfgs.USE_DROPOUT, weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME], is_training=False, level=cfgs.LEVEL) fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \ fast_rcnn.fast_rcnn_predict() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = restore_model.get_restorer() config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) imgs, img_names = get_imgs() for i, img in enumerate(imgs): start = time.time() _img_batch, _fast_rcnn_decode_boxes, _fast_rcnn_score, _detection_category = \ sess.run([img_batch, fast_rcnn_decode_boxes, fast_rcnn_score, detection_category], feed_dict={img_plac: img}) end = time.time() img_np = np.squeeze(_img_batch, axis=0) img_horizontal_np = draw_box_cv(img_np, boxes=_fast_rcnn_decode_boxes, labels=_detection_category, scores=_fast_rcnn_score) mkdir(cfgs.INFERENCE_SAVE_PATH) cv2.imwrite( cfgs.INFERENCE_SAVE_PATH + '/{}_horizontal_fpn.jpg'.format(img_names[i]), img_horizontal_np) view_bar('{} cost {}s'.format(img_names[i], (end - start)), i + 1, len(imgs)) coord.request_stop() coord.join(threads)
def inference(det_net, data_dir): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) img_batch = tf.cast(img_plac, tf.float32) img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN) det_boxes_h, det_scores_h, det_category_h, \ det_boxes_r, det_scores_r, det_category_r = det_net.build_whole_detection_network(input_img_batch=img_batch, gtboxes_h_batch=None, gtboxes_r_batch=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') imgs = os.listdir(data_dir) save_dir = os.path.join(cfgs.INFERENCE_SAVE_PATH, cfgs.VERSION) tools.mkdir(save_dir) for i, a_img_name in enumerate(imgs): raw_img = cv2.imread(os.path.join(data_dir, a_img_name)) # raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] start = time.time() resized_img, det_boxes_h_, det_scores_h_, det_category_h_, \ det_boxes_r_, det_scores_r_, det_category_r_ = \ sess.run( [img_batch, det_boxes_h, det_scores_h, det_category_h, det_boxes_r, det_scores_r, det_category_r], feed_dict={img_plac: raw_img} ) end = time.time() # det_boxes_r_ [x1, y1, x2, y2, h] det_detections_h = draw_box_in_img.draw_box_cv( np.squeeze(resized_img, 0), boxes=det_boxes_h_, labels=det_category_h_, scores=det_scores_h_) det_detections_r = mylibs.draw_r2cnn_box(np.squeeze( resized_img, 0), boxes=det_boxes_r_, labels=det_category_r_, scores=det_scores_r_) # draw angles deg_img, dtbox = mylibs.get_r2cnn_degree(det_detections_r, det_boxes_r_) anglePath = cfgs.INFERENCE_SAVE_PATH + '/dtbox' if not os.path.exists(anglePath): os.makedirs(anglePath) savePath = os.path.join(anglePath, a_img_name + '.mat') sio.savemat(savePath, {'dtbox': dtbox}) # draw cobb images #cobb_img = mylibs.get_r2cnn_cobb(deg_img, dtbox) cv2.imwrite(save_dir + '/' + a_img_name + '_r.jpg', deg_img) view_bar('{} cost {}s'.format(a_img_name, (end - start)), i + 1, len(imgs))
def detect_img(file_paths, des_folder, det_th, h_len, w_len, h_overlap, w_overlap, show_res=False): with tf.Graph().as_default(): img_plac = tf.placeholder(shape=[None, None, 3], dtype=tf.uint8) img_tensor = tf.cast(img_plac, tf.float32) - tf.constant( [103.939, 116.779, 123.68]) img_batch = image_preprocess.short_side_resize_for_inference_data( img_tensor, target_shortside_len=cfgs.SHORT_SIDE_LEN, is_resize=False) # *********************************************************************************************** # * share net * # *********************************************************************************************** _, share_net = get_network_byname(net_name=cfgs.NET_NAME, inputs=img_batch, num_classes=None, is_training=True, output_stride=None, global_pool=False, spatial_squeeze=False) # *********************************************************************************************** # * RPN * # *********************************************************************************************** rpn = build_rpn.RPN( net_name=cfgs.NET_NAME, inputs=img_batch, gtboxes_and_label=None, is_training=False, share_head=cfgs.SHARE_HEAD, share_net=share_net, stride=cfgs.STRIDE, anchor_ratios=cfgs.ANCHOR_RATIOS, anchor_scales=cfgs.ANCHOR_SCALES, scale_factors=cfgs.SCALE_FACTORS, base_anchor_size_list=cfgs. BASE_ANCHOR_SIZE_LIST, # P2, P3, P4, P5, P6 level=cfgs.LEVEL, top_k_nms=cfgs.RPN_TOP_K_NMS, rpn_nms_iou_threshold=cfgs.RPN_NMS_IOU_THRESHOLD, max_proposals_num=cfgs.MAX_PROPOSAL_NUM, rpn_iou_positive_threshold=cfgs.RPN_IOU_POSITIVE_THRESHOLD, rpn_iou_negative_threshold=cfgs.RPN_IOU_NEGATIVE_THRESHOLD, rpn_mini_batch_size=cfgs.RPN_MINIBATCH_SIZE, rpn_positives_ratio=cfgs.RPN_POSITIVE_RATE, remove_outside_anchors=False, # whether remove anchors outside rpn_weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME]) # rpn predict proposals rpn_proposals_boxes, rpn_proposals_scores = rpn.rpn_proposals( ) # rpn_score shape: [300, ] # *********************************************************************************************** # * Fast RCNN * # *********************************************************************************************** fast_rcnn = build_fast_rcnn.FastRCNN( img_batch=img_batch, feature_pyramid=rpn.feature_pyramid, rpn_proposals_boxes=rpn_proposals_boxes, rpn_proposals_scores=rpn_proposals_scores, img_shape=tf.shape(img_batch), roi_size=cfgs.ROI_SIZE, scale_factors=cfgs.SCALE_FACTORS, roi_pool_kernel_size=cfgs.ROI_POOL_KERNEL_SIZE, gtboxes_and_label=None, fast_rcnn_nms_iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD, fast_rcnn_maximum_boxes_per_img=100, fast_rcnn_nms_max_boxes_per_class=cfgs. FAST_RCNN_NMS_MAX_BOXES_PER_CLASS, show_detections_score_threshold=det_th, # show detections which score >= 0.6 num_classes=cfgs.CLASS_NUM, fast_rcnn_minibatch_size=cfgs.FAST_RCNN_MINIBATCH_SIZE, fast_rcnn_positives_ratio=cfgs.FAST_RCNN_POSITIVE_RATE, fast_rcnn_positives_iou_threshold=cfgs. FAST_RCNN_IOU_POSITIVE_THRESHOLD, use_dropout=False, weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME], is_training=False, level=cfgs.LEVEL) fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \ fast_rcnn.fast_rcnn_predict() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = restore_model.get_restorer() config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) for img_path in file_paths: start = timer() img = cv2.imread(img_path) box_res = [] label_res = [] score_res = [] imgH = img.shape[0] imgW = img.shape[1] for hh in range(0, imgH, h_len - h_overlap): if imgH - hh - 1 < h_len: hh_ = imgH - h_len else: hh_ = hh for ww in range(0, imgW, w_len - w_overlap): if imgW - ww - 1 < w_len: ww_ = imgW - w_len else: ww_ = ww src_img = img[hh_:(hh_ + h_len), ww_:(ww_ + w_len), :] boxes, labels, scores = sess.run( [ fast_rcnn_decode_boxes, detection_category, fast_rcnn_score ], feed_dict={img_plac: src_img}) if show_res: visualize_detection(src_img, boxes, scores) if len(boxes) > 0: for ii in range(len(boxes)): box = boxes[ii] box[0] = box[0] + hh_ box[1] = box[1] + ww_ box[2] = box[2] + hh_ box[3] = box[3] + ww_ box_res.append(box) label_res.append(labels[ii]) score_res.append(scores[ii]) box_res = np.array(box_res) label_res = np.array(label_res) score_res = np.array(score_res) box_res_, label_res_, score_res_ = [], [], [] for sub_class in range(1, cfgs.CLASS_NUM + 1): index = np.where(label_res == sub_class)[0] if len(index) == 0: continue tmp_boxes_h = box_res[index] tmp_label_h = label_res[index] tmp_score_h = score_res[index] tmp_boxes_h = np.array(tmp_boxes_h) tmp = np.zeros( [tmp_boxes_h.shape[0], tmp_boxes_h.shape[1] + 1]) tmp[:, 0:-1] = tmp_boxes_h tmp[:, -1] = np.array(tmp_score_h) inx = nms.py_cpu_nms(dets=np.array(tmp, np.float32), thresh=0.7, max_output_size=500) box_res_.extend(np.array(tmp_boxes_h)[inx]) score_res_.extend(np.array(tmp_score_h)[inx]) label_res_.extend(np.array(tmp_label_h)[inx]) time_elapsed = timer() - start print("{} detection time : {:.4f} sec".format( img_path.split('/')[-1].split('.')[0], time_elapsed)) mkdir(des_folder) img_np = draw_box_cv(np.array(img, np.float32) - np.array([103.939, 116.779, 123.68]), boxes=np.array(box_res_), labels=np.array(label_res_), scores=np.array(score_res_)) cv2.imwrite( des_folder + '/{}_fpn.jpg'.format( img_path.split('/')[-1].split('.')[0]), img_np) coord.request_stop() coord.join(threads)
def inference(det_net, device_index): # preprocessing data img_placeholder = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) img_batch = tf.cast(img_placeholder, tf.float32) img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_resize=False) # img_batch: [1, h, w, c] det_boxes_h, det_scores_h, det_category_h, \ det_boxes_r, det_scores_r, det_category_r = det_net.build_whole_detection_network(input_img_batch=img_batch, gtboxes_r_batch=None, gtboxes_h_batch=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') cap = cv2.VideoCapture(device_index) fps = cap.get(cv2.CAP_PROP_FPS) # size = (width, height) size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) tmp_directory = current_directory + "/../tmp" if not os.path.exists(tmp_directory): os.makedirs(tmp_directory) # Define the codec and create VideoWriter object fourcc = cv2.VideoWriter_fourcc(*'XVID') video_writer = cv2.VideoWriter( '%s/OBB_camera_face.avi' % (tmp_directory), fourcc, fps, size) cv2.namedWindow("Press q on keyboard to exit.", cv2.WINDOW_NORMAL) while (cap.isOpened()): # Capture frame-by-frame ret, frame = cap.read() if True != ret: break start = time.time() # print("tf.shape(frame):", sess.run(tf.shape(frame))) # tf.shape(frame): [480 640 3] resized_img, det_boxes_h_, det_scores_h_, det_category_h_, det_boxes_r_, det_scores_r_, det_category_r_ = \ sess.run( [img_batch, det_boxes_h, det_scores_h, det_category_h, det_boxes_r, det_scores_r, det_category_r], feed_dict={img_placeholder: frame}) # feed_dict={img_placeholder: frame} Color image loaded by OpenCV is in BGR mode. # feed_dict={img_plac: frame[:, :, ::-1]} BGR mode -> RGB mode. end = time.time() # print("resized_img.shape:", resized_img.shape) det_detections_h = draw_box_in_img.draw_box_cv( np.squeeze(resized_img, 0), boxes=det_boxes_h_, labels=det_category_h_, scores=det_scores_h_) det_detections_r = draw_box_in_img.draw_rotate_box_cv( np.squeeze(resized_img, 0), boxes=det_boxes_r_, labels=det_category_r_, scores=det_scores_r_) # height, width, number of channels in image # height = image.shape[0], width = image.shape[1], channels = image.shape[2] det_detections_h = cv2.resize(det_detections_h, (det_detections_h.shape[1] // 2, det_detections_h.shape[0] // 2)) cv2.putText(det_detections_h, text="HBB - %3.2fps" % (1 / (end - start)), org=(10, 10), fontFace=1, fontScale=1, color=(0, 255, 0)) det_detections_r = cv2.resize(det_detections_r, (det_detections_r.shape[1] // 2, det_detections_r.shape[0] // 2)) cv2.putText(det_detections_r, text="OBB - %3.2fps" % (1.0 / (end - start)), org=(10, 10), fontFace=1, fontScale=1, color=(0, 255, 0)) # Stack arrays in sequence horizontally (column wise). hstack_data = np.hstack((det_detections_h, det_detections_r)) video_writer.write(frame) # Display the resulting frame cv2.imshow("Press q on keyboard to exit.", hstack_data) # Press q on keyboard to exit. if cv2.waitKey(1) & 0xFF == ord('q'): break # When everything done, release the capture. cap.release() video_writer.release() cv2.destroyAllWindows()
def inference(det_net, data_dir): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) img_batch = tf.cast(img_plac, tf.float32) img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN) det_boxes_h, det_scores_h, det_category_h, \ det_boxes_r, det_scores_r, det_category_r = det_net.build_whole_detection_network(input_img_batch=img_batch, gtboxes_h_batch=None, gtboxes_r_batch=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') imgs = os.listdir(data_dir) for i, a_img_name in enumerate(imgs): # f = open('./res_icdar_r/res_{}.txt'.format(a_img_name.split('.jpg')[0]), 'w') raw_img = cv2.imread(os.path.join(data_dir, a_img_name)) # raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] start = time.time() resized_img, det_boxes_h_, det_scores_h_, det_category_h_, \ det_boxes_r_, det_scores_r_, det_category_r_ = \ sess.run( [img_batch, det_boxes_h, det_scores_h, det_category_h, det_boxes_r, det_scores_r, det_category_r], feed_dict={img_plac: raw_img} ) end = time.time() # res_r = coordinate_convert.forward_convert(det_boxes_r_, False) # res_r = np.array(res_r, np.int32) # for r in res_r: # f.write('{},{},{},{},{},{},{},{}\n'.format(r[0], r[1], r[2], r[3], # r[4], r[5], r[6], r[7])) # f.close() det_detections_h = draw_box_in_img.draw_box_cv( np.squeeze(resized_img, 0), boxes=det_boxes_h_, labels=det_category_h_, scores=det_scores_h_) det_detections_r = draw_box_in_img.draw_rotate_box_cv( np.squeeze(resized_img, 0), boxes=det_boxes_r_, labels=det_category_r_, scores=det_scores_r_) save_dir = os.path.join(cfgs.INFERENCE_SAVE_PATH, cfgs.VERSION) tools.mkdir(save_dir) cv2.imwrite(save_dir + '/' + a_img_name + '_h.jpg', det_detections_h) cv2.imwrite(save_dir + '/' + a_img_name + '_r.jpg', det_detections_r) view_bar('{} cost {}s'.format(a_img_name, (end - start)), i + 1, len(imgs))
def detect(det_net, inference_save_path, rgb_real_test_imgname_list, ir_real_test_imgname_list): # 1. preprocess img rgb_img_plac = tf.placeholder(tf.uint8, [None, None, 3], 'rgb') # is RGB. not GBR rgb_img_batch = tf.cast(rgb_img_plac, tf.float32) rgb_img_batch = short_side_resize_for_inference_data(img_tensor=rgb_img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH) ir_img_plac = tf.placeholder(tf.uint8, [None, None, 3], 'ir') # is RGB. not GBR ir_img_batch = tf.cast(ir_img_plac, tf.float32) ir_img_batch = short_side_resize_for_inference_data(img_tensor=ir_img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH) rgb_img_batch = rgb_img_batch - tf.constant(cfgs.RGB_PIXEL_MEAN) ir_img_batch = ir_img_batch - tf.constant(cfgs.IR_PIXEL_MEAN) #img_batch = (img_batch - tf.constant(cfgs.PIXEL_MEAN)) / (tf.constant(cfgs.PIXEL_STD)*255) rgb_img_batch = tf.expand_dims(rgb_img_batch, axis=0) # [1, None, None, 3] ir_img_batch = tf.expand_dims(ir_img_batch, axis=0) # [1, None, None, 3] detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( rgb_input_img_batch=rgb_img_batch, ir_input_img_batch=ir_img_batch, seg_mask_batch = None, gtboxes_batch=None) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) restorer, restore_ckpt, model_variables = det_net.get_restorer_test() print(restore_ckpt) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') for i, rgb_img_name in enumerate(rgb_real_test_imgname_list): rgb_raw_img = cv2.imread(rgb_img_name)[:, :, ::-1] ir_raw_img = cv2.imread(ir_real_test_imgname_list[i])[:, :, ::-1] start = time.time() rgb_resized_img, ir_resized_img, detected_boxes, detected_scores, detected_categories = \ sess.run( [rgb_img_batch, ir_img_batch, detection_boxes, detection_scores, detection_category], feed_dict={rgb_img_plac: rgb_raw_img, ir_img_plac: ir_raw_img} ) end = time.time() # print("{} cost time : {} ".format(img_name, (end - start))) raw_h, raw_w = ir_raw_img.shape[0], rgb_raw_img.shape[1] xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \ detected_boxes[:, 2], detected_boxes[:, 3] resized_h, resized_w = rgb_resized_img.shape[1], ir_resized_img.shape[2] xmin = xmin * raw_w / resized_w xmax = xmax * raw_w / resized_w ymin = ymin * raw_h / resized_h ymax = ymax * raw_h / resized_h detected_boxes = np.transpose(np.stack([xmin, ymin, xmax, ymax])) show_indices = detected_scores >= cfgs.SHOW_SCORE_THRSHOLD show_scores = detected_scores[show_indices] show_boxes = detected_boxes[show_indices] show_categories = detected_categories[show_indices] nake_name = rgb_img_name.split('/')[-1] f1 = open(inference_save_path + '/txt/' + nake_name.split('.')[0]+'.txt', 'w') final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(rgb_raw_img - np.array(cfgs.RGB_PIXEL_MEAN), boxes=show_boxes, labels=show_categories, scores=show_scores, txt_file=f1, img_name=nake_name.split('.')[0]) # print (inference_save_path + '/' + nake_name) cv2.imwrite(inference_save_path + '/img/' + nake_name, final_detections[:, :, ::-1]) tools.view_bar('{} image cost {}s'.format(rgb_img_name, (end - start)), i + 1, len(ir_real_test_imgname_list))
def detect_img(file_paths, des_folder, paramPath, bakpath, det_th, h_len, w_len, h_overlap, w_overlap, file_ext, show_res=False): with tf.Graph().as_default(): img_plac = tf.placeholder(shape=[None, None, 3], dtype=tf.uint8) img_tensor = tf.cast(img_plac, tf.float32) - tf.constant( [103.939, 116.779, 123.68]) img_batch = image_preprocess.short_side_resize_for_inference_data( img_tensor, target_shortside_len=cfgs.SHORT_SIDE_LEN, is_resize=False) # *********************************************************************************************** # * share net * # *********************************************************************************************** _, share_net = get_network_byname(net_name=cfgs.NET_NAME, inputs=img_batch, num_classes=None, is_training=True, output_stride=None, global_pool=False, spatial_squeeze=False) # *********************************************************************************************** # * RPN * # *********************************************************************************************** rpn = build_rpn.RPN( net_name=cfgs.NET_NAME, inputs=img_batch, gtboxes_and_label=None, is_training=False, share_head=cfgs.SHARE_HEAD, share_net=share_net, stride=cfgs.STRIDE, anchor_ratios=cfgs.ANCHOR_RATIOS, anchor_scales=cfgs.ANCHOR_SCALES, scale_factors=cfgs.SCALE_FACTORS, base_anchor_size_list=cfgs. BASE_ANCHOR_SIZE_LIST, # P2, P3, P4, P5, P6 level=cfgs.LEVEL, top_k_nms=cfgs.RPN_TOP_K_NMS, rpn_nms_iou_threshold=cfgs.RPN_NMS_IOU_THRESHOLD, max_proposals_num=cfgs.MAX_PROPOSAL_NUM, rpn_iou_positive_threshold=cfgs.RPN_IOU_POSITIVE_THRESHOLD, rpn_iou_negative_threshold=cfgs.RPN_IOU_NEGATIVE_THRESHOLD, rpn_mini_batch_size=cfgs.RPN_MINIBATCH_SIZE, rpn_positives_ratio=cfgs.RPN_POSITIVE_RATE, remove_outside_anchors=False, # whether remove anchors outside rpn_weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME]) # rpn predict proposals rpn_proposals_boxes, rpn_proposals_scores = rpn.rpn_proposals( ) # rpn_score shape: [300, ] # *********************************************************************************************** # * Fast RCNN * # *********************************************************************************************** fast_rcnn = build_fast_rcnn.FastRCNN( feature_pyramid=rpn.feature_pyramid, rpn_proposals_boxes=rpn_proposals_boxes, rpn_proposals_scores=rpn_proposals_scores, img_shape=tf.shape(img_batch), roi_size=cfgs.ROI_SIZE, roi_pool_kernel_size=cfgs.ROI_POOL_KERNEL_SIZE, scale_factors=cfgs.SCALE_FACTORS, gtboxes_and_label=None, gtboxes_and_label_minAreaRectangle=None, fast_rcnn_nms_iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD, fast_rcnn_maximum_boxes_per_img=100, fast_rcnn_nms_max_boxes_per_class=cfgs. FAST_RCNN_NMS_MAX_BOXES_PER_CLASS, show_detections_score_threshold=det_th, # show detections which score >= 0.6 num_classes=cfgs.CLASS_NUM, fast_rcnn_minibatch_size=cfgs.FAST_RCNN_MINIBATCH_SIZE, fast_rcnn_positives_ratio=cfgs.FAST_RCNN_POSITIVE_RATE, fast_rcnn_positives_iou_threshold=cfgs. FAST_RCNN_IOU_POSITIVE_THRESHOLD, # iou>0.5 is positive, iou<0.5 is negative use_dropout=cfgs.USE_DROPOUT, weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME], is_training=False, level=cfgs.LEVEL, head_quadrant=None) fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category, \ fast_rcnn_decode_boxes_rotate, fast_rcnn_score_rotate, fast_rcnn_head_quadrant, \ num_of_objects_rotate, detection_category_rotate = fast_rcnn.fast_rcnn_predict() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = restore_model.get_restorer() config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) obstacle_points, obstacle_labels = read_vif('./sp_whole_reg.vif') while True: alldemo = os.listdir(file_paths) file_names = [] for singledemo in alldemo: singlepath = os.path.join(file_paths, singledemo) file_names.append(singlepath) for img_path in file_names: if img_path.endswith(('.jpg')): # and f not in fs_found: l_f = img_path + '.lock' if os.path.exists(l_f): time.sleep(0.01) continue # try: start = timer() img = cv2.imread(img_path) box_res = [] label_res = [] score_res = [] box_res_rotate = [] label_res_rotate = [] score_res_rotate = [] head_rotate = [] imgH = img.shape[0] imgW = img.shape[1] for hh in range(0, imgH, h_len - h_overlap): h_size = min(h_len, imgH - hh) if h_size < 10: break for ww in range(0, imgW, w_len - w_overlap): w_size = min(w_len, imgW - ww) if w_size < 10: break src_img = img[hh:(hh + h_size), ww:(ww + w_size), :] # boxes, labels, scores = sess.run([fast_rcnn_decode_boxes, detection_category, fast_rcnn_score], # feed_dict={img_plac: src_img}) boxes_rotate, labels_rotate, scores_rotate, _fast_rcnn_head_quadrant = \ sess.run([fast_rcnn_decode_boxes_rotate, detection_category_rotate, fast_rcnn_score_rotate, fast_rcnn_head_quadrant], feed_dict={img_plac: src_img}) # if len(boxes) > 0: # for ii in range(len(boxes)): # box = boxes[ii] # box[0] = box[0] + hh # box[1] = box[1] + ww # box[2] = box[2] + hh # box[3] = box[3] + ww # box_res.append(box) # label_res.append(labels[ii]) # score_res.append(scores[ii]) if len(boxes_rotate) > 0: for ii in range(len(boxes_rotate)): box_rotate = boxes_rotate[ii] box_rotate[0] = box_rotate[0] + hh box_rotate[1] = box_rotate[1] + ww box_res_rotate.append(box_rotate) label_res_rotate.append(labels_rotate[ii]) score_res_rotate.append(scores_rotate[ii]) head_rotate.append( _fast_rcnn_head_quadrant[ii]) time_elapsed = timer() - start print("{} detection time : {:.4f} sec".format( img_path.split('/')[-1].split('.')[0], time_elapsed)) mkdir(des_folder) if len(head_rotate) != 0: # img_np = draw_box_cv(np.array(img, np.float32) - np.array([103.939, 116.779, 123.68]), # boxes=np.array(box_res), # labels=np.array(label_res), # scores=np.array(score_res)) img_np_rotate = draw_rotate_box_cv( np.array(img, np.float32) - np.array([103.939, 116.779, 123.68]), boxes=np.array(box_res_rotate), labels=np.array(label_res_rotate), scores=np.array(score_res_rotate), head=np.argmax(head_rotate, axis=1)) geo_points = get_points(box_res_rotate, np.argmax(head_rotate, axis=1)) image_name = img_path.split('/')[-1] xml_path_1 = os.path.join(des_folder, '1_' + image_name).replace( file_ext, ".xml") param_path = os.path.join(paramPath, 'UAV_' + image_name).replace( file_ext, ".param") x_tr, y_tr = get_param(param_path) obstacle_left, obstacle_labels = filter_obstacle( obstacle_points, imgH, imgW, x_tr, y_tr) ###################################################### # obstacle_left = [] # temp = np.array([[2233, 1013], [2196, 980], [2215, 959], [2252, 993]]) # for coord in temp: # coord_convet = convert_coordinate(coord, imgH, imgW, x_tr, y_tr) # obstacle_left.extend(coord_convet) # geo_points, obstacle_labels = filter_obstacle(np.array(geo_points)[:, :8], imgH, imgW, x_tr, y_tr) ###################################################### detect_res, label_res = get_detect_res( obstacle_left, obstacle_labels, geo_points, label_res_rotate, imgH, imgW, x_tr, y_tr) # writer_XML(xml_name, geo_points, label_res, imgW, imgH) writer_XML2(xml_path_1, detect_res, label_res) shutil.move(img_path, os.path.join(bakpath, image_name)) # cv2.imwrite(des_folder + '/{}_horizontal_fpn.jpg'.format(img_path.split('/')[-1].split('.')[0]), img_np) cv2.imwrite( des_folder + '/{}_rotate_fpn.jpg'.format( img_path.split('/')[-1].split('.')[0]), img_np_rotate) final_points = [] final_labels = [] for type in range(3): indx = np.where(np.equal(label_res_rotate, type))[0] if len(indx) != 0: box_res_rotate_ = np.array( box_res_rotate)[indx] label_res_rotate_ = np.array( label_res_rotate)[indx] head_rotate_ = np.array( np.argmax(head_rotate, axis=1))[indx] angles_ = get_angles(box_res_rotate_[:, 4], head_rotate_) convex_points_, center_point_, angle_ = get_convex_points( box_res_rotate_[:, :2], angles_) head_ = get_head(center_point_, angle_) all_points = [] for ii in box_res_rotate_: all_points.extend( convert_coordinate( ii, imgH, imgW, x_tr, y_tr)) all_points.extend( convert_coordinate(center_point_, imgH, imgW, x_tr, y_tr)) all_points.extend( convert_coordinate(head_, imgH, imgW, x_tr, y_tr)) final_points.append(all_points) final_labels.append(type) xml_path_2 = os.path.join(des_folder, '2_' + image_name).replace( file_ext, ".xml") writer_XML2(xml_path_2, final_points, final_labels) # except: # print("Get an error, filename: {}".format(img_path)) coord.request_stop() coord.join(threads)
def eval_with_plac(det_net, real_test_imgname_list, img_root, draw_imgs=False): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) img_batch = short_side_resize_for_inference_data(img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH) if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) # img_batch = (img_batch - tf.constant(cfgs.PIXEL_MEAN)) / (tf.constant(cfgs.PIXEL_STD)*255) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=None) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') for i, a_img_name in enumerate(real_test_imgname_list): raw_img = cv2.imread(os.path.join(img_root, a_img_name)) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] start = time.time() resized_img, detected_boxes, detected_scores, detected_categories = \ sess.run( [img_batch, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: raw_img[:, :, ::-1]} # cv is BGR. But need RGB ) end = time.time() # print("{} cost time : {} ".format(img_name, (end - start))) if draw_imgs: show_indices = detected_scores >= cfgs.SHOW_SCORE_THRSHOLD show_scores = detected_scores[show_indices] show_boxes = detected_boxes[show_indices] show_categories = detected_categories[show_indices] draw_img = np.squeeze(resized_img, 0) if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: draw_img = (draw_img * np.array(cfgs.PIXEL_STD) + np.array(cfgs.PIXEL_MEAN_)) * 255 else: draw_img = draw_img + np.array(cfgs.PIXEL_MEAN) # draw_img = draw_img * (np.array(cfgs.PIXEL_STD)*255) + np.array(cfgs.PIXEL_MEAN) final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(draw_img, boxes=show_boxes, labels=show_categories, scores=show_scores, in_graph=False) if not os.path.exists(cfgs.TEST_SAVE_PATH): os.makedirs(cfgs.TEST_SAVE_PATH) cv2.imwrite(cfgs.TEST_SAVE_PATH + '/' + a_img_name, final_detections[:, :, ::-1]) xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \ detected_boxes[:, 2], detected_boxes[:, 3] resized_h, resized_w = resized_img.shape[1], resized_img.shape[2] xmin = xmin * raw_w / resized_w xmax = xmax * raw_w / resized_w ymin = ymin * raw_h / resized_h ymax = ymax * raw_h / resized_h boxes = np.transpose(np.stack([xmin, ymin, xmax, ymax])) dets = np.hstack((detected_categories.reshape(-1, 1), detected_scores.reshape(-1, 1), boxes)) # all_boxes.append(dets) # eval txt CLASS_VOC = NAME_LABEL_MAP.keys() write_handle = {} txt_dir = os.path.join('voc2012_eval', cfgs.VERSION, 'results', 'VOC2012', 'Main') tools.mkdir(txt_dir) for sub_class in CLASS_VOC: if sub_class == 'back_ground': continue write_handle[sub_class] = open(os.path.join(txt_dir, 'comp3_det_test_%s.txt' % sub_class), 'a+') for det in dets: command = '%s %.6f %.6f %.6f %.6f %.6f\n' % (a_img_name.split('/')[-1].split('.')[0], det[1], det[2], det[3], det[4], det[5]) write_handle[LABEl_NAME_MAP[det[0]]].write(command) for sub_class in CLASS_VOC: if sub_class == 'back_ground': continue write_handle[sub_class].close() tools.view_bar('%s image cost %.3fs' % (a_img_name, (end - start)), i + 1, len(real_test_imgname_list))
def inference(det_net, file_paths, des_folder, h_len, w_len, h_overlap, w_overlap, show_res=False): if show_res: assert cfgs.SHOW_SCORE_THRSHOLD >= 0.5, \ 'please set score threshold (example: SHOW_SCORE_THRSHOLD = 0.5) in cfgs.py' else: assert cfgs.SHOW_SCORE_THRSHOLD < 0.005, \ 'please set score threshold (example: SHOW_SCORE_THRSHOLD = 0.00) in cfgs.py' # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) img_batch = tf.cast(img_plac, tf.float32) img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_resize=False) det_boxes, det_scores, det_category = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') for count, img_path in enumerate(file_paths): start = timer() img = cv2.imread(img_path) box_res_rotate = [] label_res_rotate = [] score_res_rotate = [] imgH = img.shape[0] imgW = img.shape[1] if imgH < h_len: temp = np.zeros([h_len, imgW, 3], np.float32) temp[0:imgH, :, :] = img img = temp imgH = h_len if imgW < w_len: temp = np.zeros([imgH, w_len, 3], np.float32) temp[:, 0:imgW, :] = img img = temp imgW = w_len for hh in range(0, imgH, h_len - h_overlap): if imgH - hh - 1 < h_len: hh_ = imgH - h_len else: hh_ = hh for ww in range(0, imgW, w_len - w_overlap): if imgW - ww - 1 < w_len: ww_ = imgW - w_len else: ww_ = ww src_img = img[hh_:(hh_ + h_len), ww_:(ww_ + w_len), :] det_boxes_, det_scores_, det_category_ = \ sess.run( [det_boxes, det_scores, det_category], feed_dict={img_plac: src_img[:, :, ::-1]} ) if len(det_boxes_) > 0: for ii in range(len(det_boxes_)): box_rotate = det_boxes_[ii] box_rotate[0] = box_rotate[0] + ww_ box_rotate[1] = box_rotate[1] + hh_ box_res_rotate.append(box_rotate) label_res_rotate.append(det_category_[ii]) score_res_rotate.append(det_scores_[ii]) box_res_rotate = np.array(box_res_rotate) label_res_rotate = np.array(label_res_rotate) score_res_rotate = np.array(score_res_rotate) box_res_rotate_ = [] label_res_rotate_ = [] score_res_rotate_ = [] threshold = { 'roundabout': 0.1, 'tennis-court': 0.3, 'swimming-pool': 0.1, 'storage-tank': 0.2, 'soccer-ball-field': 0.3, 'small-vehicle': 0.2, 'ship': 0.05, 'plane': 0.3, 'large-vehicle': 0.1, 'helicopter': 0.2, 'harbor': 0.0001, 'ground-track-field': 0.3, 'bridge': 0.0001, 'basketball-court': 0.3, 'baseball-diamond': 0.3 } for sub_class in range(1, cfgs.CLASS_NUM + 1): index = np.where(label_res_rotate == sub_class)[0] if len(index) == 0: continue tmp_boxes_r = box_res_rotate[index] tmp_label_r = label_res_rotate[index] tmp_score_r = score_res_rotate[index] tmp_boxes_r = np.array(tmp_boxes_r) tmp = np.zeros( [tmp_boxes_r.shape[0], tmp_boxes_r.shape[1] + 1]) tmp[:, 0:-1] = tmp_boxes_r tmp[:, -1] = np.array(tmp_score_r) try: inx = nms_rotate.nms_rotate_cpu( boxes=np.array(tmp_boxes_r), scores=np.array(tmp_score_r), iou_threshold=threshold[LABEl_NAME_MAP[sub_class]], max_output_size=500) except: # Note: the IoU of two same rectangles is 0, which is calculated by rotate_gpu_nms jitter = np.zeros( [tmp_boxes_r.shape[0], tmp_boxes_r.shape[1] + 1]) jitter[:, 0] += np.random.rand(tmp_boxes_r.shape[0], ) / 1000 inx = rotate_gpu_nms( np.array(tmp, np.float32) + np.array(jitter, np.float32), float(threshold[LABEl_NAME_MAP[sub_class]]), 0) box_res_rotate_.extend(np.array(tmp_boxes_r)[inx]) score_res_rotate_.extend(np.array(tmp_score_r)[inx]) label_res_rotate_.extend(np.array(tmp_label_r)[inx]) time_elapsed = timer() - start if show_res: det_detections = draw_box_in_img.draw_rotate_box_cv( np.array(img, np.float32) - np.array(cfgs.PIXEL_MEAN), boxes=np.array(box_res_rotate_), labels=np.array(label_res_rotate_), scores=np.array(score_res_rotate_)) save_dir = os.path.join(des_folder, cfgs.VERSION) tools.mkdir(save_dir) cv2.imwrite( save_dir + '/' + img_path.split('/')[-1].split('.')[0] + '_r.jpg', det_detections) else: # eval txt CLASS_DOTA = NAME_LABEL_MAP.keys() write_handle = {} txt_dir = os.path.join('txt_output', cfgs.VERSION) tools.mkdir(txt_dir) for sub_class in CLASS_DOTA: if sub_class == 'back_ground': continue write_handle[sub_class] = open( os.path.join(txt_dir, 'Task1_%s.txt' % sub_class), 'a+') rboxes = coordinate_convert.forward_convert(box_res_rotate_, with_label=False) for i, rbox in enumerate(rboxes): command = '%s %.3f %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.1f\n' % ( img_path.split('/')[-1].split('.')[0], score_res_rotate_[i], rbox[0], rbox[1], rbox[2], rbox[3], rbox[4], rbox[5], rbox[6], rbox[7], ) write_handle[LABEl_NAME_MAP[label_res_rotate_[i]]].write( command) for sub_class in CLASS_DOTA: if sub_class == 'back_ground': continue write_handle[sub_class].close() view_bar( '{} cost {}s'.format( img_path.split('/')[-1].split('.')[0], time_elapsed), count + 1, len(file_paths))
def eval_with_plac(det_net, imgId_list, coco, out_json_root, draw_imgs=False): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not GBR img_batch = tf.cast(img_plac, tf.float32) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH) if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant( cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True # coco_test_results = [] with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') for i, imgid in enumerate(imgId_list): imgname = coco.loadImgs(ids=[imgid])[0]['file_name'] raw_img = cv2.imread( os.path.join("/home/yjr/DataSet/COCO/2017/test2017", imgname)) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] start = time.time() resized_img, detected_boxes, detected_scores, detected_categories = \ sess.run( [img_batch, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: raw_img[:, :, ::-1]} # cv is BGR. But need RGB ) end = time.time() if draw_imgs: show_indices = detected_scores >= cfgs.VIS_SCORE show_scores = detected_scores[show_indices] show_boxes = detected_boxes[show_indices] show_categories = detected_categories[show_indices] draw_img = np.squeeze(resized_img, 0) if cfgs.NET_NAME in [ 'resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d' ]: draw_img = (draw_img * np.array(cfgs.PIXEL_STD) + np.array(cfgs.PIXEL_MEAN_)) * 255 else: draw_img = draw_img + np.array(cfgs.PIXEL_MEAN) final_detections = draw_box_in_img.draw_boxes_with_label_and_scores( draw_img, boxes=show_boxes, labels=show_categories, scores=show_scores, in_graph=False) cv2.imwrite(cfgs.TEST_SAVE_PATH + '/' + str(imgid) + '.jpg', final_detections[:, :, ::-1]) xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \ detected_boxes[:, 2], detected_boxes[:, 3] resized_h, resized_w = resized_img.shape[1], resized_img.shape[2] xmin = xmin * raw_w / resized_w xmax = xmax * raw_w / resized_w ymin = ymin * raw_h / resized_h ymax = ymax * raw_h / resized_h boxes = np.transpose( np.stack([xmin, ymin, xmax - xmin, ymax - ymin])) dets = np.hstack((detected_categories.reshape(-1, 1), detected_scores.reshape(-1, 1), boxes)) a_img_detect_result = [] for a_det in dets: label, score, bbox = a_det[0], a_det[1], a_det[2:] cat_id = classes_originID[LABEL_NAME_MAP[label]] if score < 0.00001: continue det_object = { "image_id": imgid, "category_id": cat_id, "bbox": bbox.tolist(), "score": float(score) } # print (det_object) a_img_detect_result.append(det_object) f = open( os.path.join(out_json_root, 'each_img', str(imgid) + '.json'), 'w') json.dump(a_img_detect_result, f) # , indent=4 f.close() del a_img_detect_result del dets del boxes del resized_img del raw_img tools.view_bar('{} image cost {}s'.format(imgid, (end - start)), i + 1, len(imgId_list))
def test_coco(det_net, real_test_img_list, eval_data, draw_imgs=False): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH) if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant( cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) # img_batch = (img_batch - tf.constant(cfgs.PIXEL_MEAN)) / (tf.constant(cfgs.PIXEL_STD)*255) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') save_path = os.path.join('./eval_coco', cfgs.VERSION) tools.mkdir(save_path) fw_json_dt = open(os.path.join(save_path, 'coco_test-dev.json'), 'w') coco_det = [] for i, a_img in enumerate(real_test_img_list): raw_img = cv2.imread(os.path.join(eval_data, a_img['file_name'])) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] start = time.time() resized_img, detected_boxes, detected_scores, detected_categories = \ sess.run( [img_batch, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: raw_img[:, :, ::-1]} # cv is BGR. But need RGB ) end = time.time() eval_indices = detected_scores >= 0.01 detected_scores = detected_scores[eval_indices] detected_boxes = detected_boxes[eval_indices] detected_categories = detected_categories[eval_indices] # print("{} cost time : {} ".format(img_name, (end - start))) if draw_imgs: show_indices = detected_scores >= cfgs.VIS_SCORE show_scores = detected_scores[show_indices] show_boxes = detected_boxes[show_indices] show_categories = detected_categories[show_indices] draw_img = np.squeeze(resized_img, 0) if cfgs.NET_NAME in [ 'resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d' ]: draw_img = (draw_img * np.array(cfgs.PIXEL_STD) + np.array(cfgs.PIXEL_MEAN_)) * 255 else: draw_img = draw_img + np.array(cfgs.PIXEL_MEAN) # draw_img = draw_img * (np.array(cfgs.PIXEL_STD)*255) + np.array(cfgs.PIXEL_MEAN) final_detections = draw_box_in_img.draw_boxes_with_label_and_scores( draw_img, boxes=show_boxes, labels=show_categories, scores=show_scores, in_graph=False) if not os.path.exists(cfgs.TEST_SAVE_PATH): os.makedirs(cfgs.TEST_SAVE_PATH) cv2.imwrite( cfgs.TEST_SAVE_PATH + '/' + '{}.jpg'.format(a_img['id']), final_detections[:, :, ::-1]) xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \ detected_boxes[:, 2], detected_boxes[:, 3] resized_h, resized_w = resized_img.shape[1], resized_img.shape[2] xmin = xmin * raw_w / resized_w xmax = xmax * raw_w / resized_w ymin = ymin * raw_h / resized_h ymax = ymax * raw_h / resized_h boxes = np.transpose( np.stack([xmin, ymin, xmax - xmin, ymax - ymin])) # cost much time for j, box in enumerate(boxes): coco_det.append({ 'bbox': [ float(box[0]), float(box[1]), float(box[2]), float(box[3]) ], 'score': float(detected_scores[j]), 'image_id': a_img['id'], 'category_id': int(classes_originID[LABEL_NAME_MAP[ detected_categories[j]]]) }) tools.view_bar( '%s image cost %.3fs' % (a_img['id'], (end - start)), i + 1, len(real_test_img_list)) json.dump(coco_det, fw_json_dt) fw_json_dt.close()
def inference(det_net, cap): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) img_batch = tf.cast(img_plac, tf.float32) img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_resize=False) det_boxes_h, det_scores_h, det_category_h, \ det_boxes_r, det_scores_r, det_category_r = det_net.build_whole_detection_network(input_img_batch=img_batch, gtboxes_h_batch=None, gtboxes_r_batch=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') fourcc = cv2.VideoWriter_fourcc(*'xvid') out = cv2.VideoWriter('./camera_demo.avi', fourcc, 10, (640, 480)) ret, frame = cap.read() while ret: ret, frame = cap.read() start = time.time() resized_img, det_boxes_h_, det_scores_h_, det_category_h_, \ det_boxes_r_, det_scores_r_, det_category_r_ = \ sess.run( [img_batch, det_boxes_h, det_scores_h, det_category_h, det_boxes_r, det_scores_r, det_category_r], feed_dict={img_plac: frame} ) end = time.time() # det_detections_h = draw_box_in_img.draw_box_cv(np.squeeze(resized_img, 0), # boxes=det_boxes_h_, # labels=det_category_h_, # scores=det_scores_h_) det_detections_r = draw_box_in_img.draw_rotate_box_cv( np.squeeze(resized_img, 0), boxes=det_boxes_r_, labels=det_category_r_, scores=det_scores_r_) # det_detections_h = cv2.resize(det_detections_h, # (det_detections_h.shape[0] // 2, det_detections_h.shape[1] // 2)) # cv2.putText(det_detections_h, # text="horizon bbox", # org=(0, 0), # fontFace=3, # fontScale=1, # color=(255, 0, 0)) # det_detections_r = cv2.resize(det_detections_r, # (det_detections_r.shape[0] // 2, det_detections_r.shape[1] // 2)) cv2.putText(det_detections_r, text="rotated bbox--%3.2f" % (1 / (end - start)), org=(0, 10), fontFace=1, fontScale=1, color=(0, 255, 0)) out.write(det_detections_r) # hmerge = np.hstack((det_detections_h, det_detections_r)) # 水平拼接 cv2.imshow("faceDetection", det_detections_r) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() out.release() cv2.destroyAllWindows()
def worker(gpu_id, images, det_net, args, result_queue): os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) img_batch = short_side_resize_for_inference_data(img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH, is_resize=not args.multi_scale) if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch_h=None, gtboxes_batch_r=None) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model %d ...' % gpu_id) for img_path in images: # if 'P2043.png' not in img_path: # continue img = cv2.imread(img_path) box_res_rotate = [] label_res_rotate = [] score_res_rotate = [] imgH = img.shape[0] imgW = img.shape[1] img_short_side_len_list = cfgs.IMG_SHORT_SIDE_LEN if args.multi_scale else [cfgs.IMG_SHORT_SIDE_LEN] if imgH < args.h_len: temp = np.zeros([args.h_len, imgW, 3], np.float32) temp[0:imgH, :, :] = img img = temp imgH = args.h_len if imgW < args.w_len: temp = np.zeros([imgH, args.w_len, 3], np.float32) temp[:, 0:imgW, :] = img img = temp imgW = args.w_len for hh in range(0, imgH, args.h_len - args.h_overlap): if imgH - hh - 1 < args.h_len: hh_ = imgH - args.h_len else: hh_ = hh for ww in range(0, imgW, args.w_len - args.w_overlap): if imgW - ww - 1 < args.w_len: ww_ = imgW - args.w_len else: ww_ = ww src_img = img[hh_:(hh_ + args.h_len), ww_:(ww_ + args.w_len), :] for short_size in cfgs.IMG_SHORT_SIDE_LEN: max_len = cfgs.IMG_MAX_LENGTH if args.h_len < args.w_len: new_h, new_w = short_size, min(int(short_size * float(args.w_len) / args.h_len), max_len) else: new_h, new_w = min(int(short_size * float(args.h_len) / args.w_len), max_len), short_size img_resize = cv2.resize(src_img, (new_w, new_h)) resized_img, det_boxes_r_, det_scores_r_, det_category_r_ = \ sess.run( [img_batch, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: img_resize[:, :, ::-1]} ) resized_h, resized_w = resized_img.shape[1], resized_img.shape[2] src_h, src_w = src_img.shape[0], src_img.shape[1] if len(det_boxes_r_) > 0: det_boxes_r_ = forward_convert(det_boxes_r_, False) det_boxes_r_[:, 0::2] *= (src_w / resized_w) det_boxes_r_[:, 1::2] *= (src_h / resized_h) # det_boxes_r_ = backward_convert(det_boxes_r_, False) for ii in range(len(det_boxes_r_)): box_rotate = det_boxes_r_[ii] box_rotate[0::2] = box_rotate[0::2] + ww_ box_rotate[1::2] = box_rotate[1::2] + hh_ box_res_rotate.append(box_rotate) label_res_rotate.append(det_category_r_[ii]) score_res_rotate.append(det_scores_r_[ii]) if args.flip_img: det_boxes_r_flip, det_scores_r_flip, det_category_r_flip = \ sess.run( [detection_boxes, detection_scores, detection_category], feed_dict={img_plac: cv2.flip(img_resize, flipCode=1)[:, :, ::-1]} ) if len(det_boxes_r_flip) > 0: det_boxes_r_flip = forward_convert(det_boxes_r_flip, False) det_boxes_r_flip[:, 0::2] *= (src_w / resized_w) det_boxes_r_flip[:, 1::2] *= (src_h / resized_h) for ii in range(len(det_boxes_r_flip)): box_rotate = det_boxes_r_flip[ii] box_rotate[0::2] = (src_w - box_rotate[0::2]) + ww_ box_rotate[1::2] = box_rotate[1::2] + hh_ box_res_rotate.append(box_rotate) label_res_rotate.append(det_category_r_flip[ii]) score_res_rotate.append(det_scores_r_flip[ii]) det_boxes_r_flip, det_scores_r_flip, det_category_r_flip = \ sess.run( [detection_boxes, detection_scores, detection_category], feed_dict={img_plac: cv2.flip(img_resize, flipCode=0)[:, :, ::-1]} ) if len(det_boxes_r_flip) > 0: det_boxes_r_flip = forward_convert(det_boxes_r_flip, False) det_boxes_r_flip[:, 0::2] *= (src_w / resized_w) det_boxes_r_flip[:, 1::2] *= (src_h / resized_h) for ii in range(len(det_boxes_r_flip)): box_rotate = det_boxes_r_flip[ii] box_rotate[0::2] = box_rotate[0::2] + ww_ box_rotate[1::2] = (src_h - box_rotate[1::2]) + hh_ box_res_rotate.append(box_rotate) label_res_rotate.append(det_category_r_flip[ii]) score_res_rotate.append(det_scores_r_flip[ii]) box_res_rotate = np.array(box_res_rotate) label_res_rotate = np.array(label_res_rotate) score_res_rotate = np.array(score_res_rotate) box_res_rotate_ = [] label_res_rotate_ = [] score_res_rotate_ = [] threshold = {'roundabout': 0.1, 'tennis-court': 0.3, 'swimming-pool': 0.1, 'storage-tank': 0.2, 'soccer-ball-field': 0.3, 'small-vehicle': 0.2, 'ship': 0.2, 'plane': 0.3, 'large-vehicle': 0.1, 'helicopter': 0.2, 'harbor': 0.0001, 'ground-track-field': 0.3, 'bridge': 0.0001, 'basketball-court': 0.3, 'baseball-diamond': 0.3} for sub_class in range(1, cfgs.CLASS_NUM + 1): index = np.where(label_res_rotate == sub_class)[0] if len(index) == 0: continue tmp_boxes_r = box_res_rotate[index] tmp_label_r = label_res_rotate[index] tmp_score_r = score_res_rotate[index] tmp_boxes_r_ = backward_convert(tmp_boxes_r, False) try: inx = nms_rotate.nms_rotate_cpu(boxes=np.array(tmp_boxes_r_), scores=np.array(tmp_score_r), iou_threshold=threshold[LABEL_NAME_MAP[sub_class]], max_output_size=5000) except: tmp_boxes_r_ = np.array(tmp_boxes_r_) tmp = np.zeros([tmp_boxes_r_.shape[0], tmp_boxes_r_.shape[1] + 1]) tmp[:, 0:-1] = tmp_boxes_r_ tmp[:, -1] = np.array(tmp_score_r) # Note: the IoU of two same rectangles is 0, which is calculated by rotate_gpu_nms jitter = np.zeros([tmp_boxes_r_.shape[0], tmp_boxes_r_.shape[1] + 1]) jitter[:, 0] += np.random.rand(tmp_boxes_r_.shape[0], ) / 1000 inx = rotate_gpu_nms(np.array(tmp, np.float32) + np.array(jitter, np.float32), float(threshold[LABEL_NAME_MAP[sub_class]]), 0) box_res_rotate_.extend(np.array(tmp_boxes_r)[inx]) score_res_rotate_.extend(np.array(tmp_score_r)[inx]) label_res_rotate_.extend(np.array(tmp_label_r)[inx]) result_dict = {'boxes': np.array(box_res_rotate_), 'scores': np.array(score_res_rotate_), 'labels': np.array(label_res_rotate_), 'image_id': img_path} result_queue.put_nowait(result_dict)
def eval_coco(det_net, real_test_img_list, draw_imgs=False): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not BGR img_batch = tf.cast(img_plac, tf.float32) img_batch = short_side_resize_for_inference_data(img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH, is_resize=False) if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) # img_batch = (img_batch - tf.constant(cfgs.PIXEL_MEAN)) / (tf.constant(cfgs.PIXEL_STD)*255) img_batch = tf.expand_dims(img_batch, axis=0) detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=None) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') save_path = os.path.join('./eval_coco', cfgs.VERSION) tools.mkdir(save_path) fw_json_dt = open(os.path.join(save_path, 'coco_minival_ms.json'), 'w') coco_det = [] for i, a_img in enumerate(real_test_img_list): record = json.loads(a_img) raw_img = cv2.imread(record['fpath']) raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] start = time.time() detected_scores_, detected_boxes_, detected_categories_ = [], [], [] for ss in [600, 800, 1000, 1200]: # cfgs.IMG_SHORT_SIDE_LEN: img_resize = cv2.resize(raw_img, (ss, ss)) resized_img, tmp_detected_boxes, tmp_detected_scores, tmp_detected_categories = \ sess.run( [img_batch, detection_boxes, detection_scores, detection_category], feed_dict={img_plac: img_resize[:, :, ::-1]} # cv is BGR. But need RGB ) eval_indices = tmp_detected_scores >= 0.01 tmp_detected_scores = tmp_detected_scores[eval_indices] tmp_detected_boxes = tmp_detected_boxes[eval_indices] tmp_detected_categories = tmp_detected_categories[eval_indices] xmin, ymin, xmax, ymax = tmp_detected_boxes[:, 0], tmp_detected_boxes[:, 1], \ tmp_detected_boxes[:, 2], tmp_detected_boxes[:, 3] resized_h, resized_w = resized_img.shape[1], resized_img.shape[2] xmin = xmin * raw_w / resized_w xmax = xmax * raw_w / resized_w ymin = ymin * raw_h / resized_h ymax = ymax * raw_h / resized_h resize_boxes = np.transpose(np.stack([xmin, ymin, xmax, ymax])) detected_scores_.append(tmp_detected_scores) detected_boxes_.append(resize_boxes) detected_categories_.append(tmp_detected_categories) detected_scores_ = np.concatenate(detected_scores_) detected_boxes_ = np.concatenate(detected_boxes_) detected_categories_ = np.concatenate(detected_categories_) detected_scores, detected_boxes, detected_categories = [], [], [] for sub_class in range(1, cfgs.CLASS_NUM + 1): index = np.where(detected_categories_ == sub_class)[0] if len(index) == 0: continue tmp_boxes_h = detected_boxes_[index] tmp_label_h = detected_categories_[index] tmp_score_h = detected_scores_[index] tmp_boxes_h = np.array(tmp_boxes_h) tmp = np.zeros([tmp_boxes_h.shape[0], tmp_boxes_h.shape[1] + 1]) tmp[:, 0:-1] = tmp_boxes_h tmp[:, -1] = np.array(tmp_score_h) inx = nms.py_cpu_nms(dets=np.array(tmp, np.float32), thresh=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD, max_output_size=500) detected_boxes.extend(np.array(tmp_boxes_h)[inx]) detected_scores.extend(np.array(tmp_score_h)[inx]) detected_categories.extend(np.array(tmp_label_h)[inx]) detected_scores = np.array(detected_scores) detected_boxes = np.array(detected_boxes) detected_categories = np.array(detected_categories) # print("{} cost time : {} ".format(img_name, (end - start))) if draw_imgs: show_indices = detected_scores >= cfgs.SHOW_SCORE_THRSHOLD show_scores = detected_scores[show_indices] show_boxes = detected_boxes[show_indices] show_categories = detected_categories[show_indices] # if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: # draw_img = (raw_img * np.array(cfgs.PIXEL_STD) + np.array(cfgs.PIXEL_MEAN_)) * 255 # else: # draw_img = raw_img + np.array(cfgs.PIXEL_MEAN) # draw_img = draw_img * (np.array(cfgs.PIXEL_STD)*255) + np.array(cfgs.PIXEL_MEAN) raw_img = np.array(raw_img, np.float32) final_detections = draw_box_in_img.draw_boxes_with_label_and_scores(raw_img, boxes=show_boxes, labels=show_categories, scores=show_scores, in_graph=False) if not os.path.exists(cfgs.TEST_SAVE_PATH): os.makedirs(cfgs.TEST_SAVE_PATH) cv2.imwrite(cfgs.TEST_SAVE_PATH + '/' + record['ID'], final_detections) # cost much time for j, box in enumerate(detected_boxes): coco_det.append({'bbox': [float(box[0]), float(box[1]), float(box[2]-box[0]), float(box[3]-box[1])], 'score': float(detected_scores[j]), 'image_id': int(record['ID'].split('.jpg')[0].split('_000000')[-1]), 'category_id': int(classes_originID[LABEl_NAME_MAP[detected_categories[j]]])}) end = time.time() tools.view_bar('%s image cost %.3fs' % (record['ID'], (end - start)), i + 1, len(real_test_img_list)) json.dump(coco_det, fw_json_dt) fw_json_dt.close() return os.path.join(save_path, 'coco_minival_ms.json')
def inference(det_net, file_paths, des_folder, h_len, w_len, h_overlap, w_overlap, save_res=False): if save_res: assert cfgs.SHOW_SCORE_THRSHOLD >= 0.5, \ 'please set score threshold (example: SHOW_SCORE_THRSHOLD = 0.5) in cfgs.py' else: assert cfgs.SHOW_SCORE_THRSHOLD < 0.005, \ 'please set score threshold (example: SHOW_SCORE_THRSHOLD = 0.00) in cfgs.py' tmp_file = './tmp_%s.txt' % cfgs.VERSION # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) img_batch = tf.cast(img_plac, tf.float32) if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: img_batch = (img_batch / 255 - tf.constant( cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD) else: img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN[0], is_resize=False) det_boxes_h, det_scores_h, det_category_h = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') if not os.path.exists(tmp_file): fw = open(tmp_file, 'w') fw.close() fr = open(tmp_file, 'r') pass_img = fr.readlines() fr.close() for count, img_path in enumerate(file_paths): fw = open(tmp_file, 'a+') if img_path + '\n' in pass_img: continue start = timer() img = cv2.imread(img_path) box_res = [] label_res = [] score_res = [] imgH = img.shape[0] imgW = img.shape[1] ori_H = imgH ori_W = imgW # print(" ori_h, ori_w: ", imgH, imgW) if imgH < h_len: temp = np.zeros([h_len, imgW, 3], np.float32) temp[0:imgH, :, :] = img img = temp imgH = h_len if imgW < w_len: temp = np.zeros([imgH, w_len, 3], np.float32) temp[:, 0:imgW, :] = img img = temp imgW = w_len for hh in range(0, imgH, h_len - h_overlap): if imgH - hh - 1 < h_len: hh_ = imgH - h_len else: hh_ = hh for ww in range(0, imgW, w_len - w_overlap): if imgW - ww - 1 < w_len: ww_ = imgW - w_len else: ww_ = ww src_img = img[hh_:(hh_ + h_len), ww_:(ww_ + w_len), :] for short_size in cfgs.IMG_SHORT_SIDE_LEN: max_len = 1200 if h_len < w_len: new_h, new_w = short_size, min( int(short_size * float(w_len) / h_len), max_len) else: new_h, new_w = min( int(short_size * float(h_len) / w_len), max_len), short_size img_resize = cv2.resize(src_img, (new_h, new_w)) det_boxes_h_, det_scores_h_, det_category_h_ = \ sess.run( [det_boxes_h, det_scores_h, det_category_h], feed_dict={img_plac: img_resize[:, :, ::-1]} ) valid = det_scores_h_ > 1e-4 det_boxes_h_ = det_boxes_h_[valid] det_scores_h_ = det_scores_h_[valid] det_category_h_ = det_category_h_[valid] det_boxes_h_[:, 0] = det_boxes_h_[:, 0] * w_len / new_w det_boxes_h_[:, 1] = det_boxes_h_[:, 1] * h_len / new_h det_boxes_h_[:, 2] = det_boxes_h_[:, 2] * w_len / new_w det_boxes_h_[:, 3] = det_boxes_h_[:, 3] * h_len / new_h if len(det_boxes_h_) > 0: for ii in range(len(det_boxes_h_)): box = det_boxes_h_[ii] box[0] = box[0] + ww_ box[1] = box[1] + hh_ box[2] = box[2] + ww_ box[3] = box[3] + hh_ box_res.append(box) label_res.append(det_category_h_[ii]) score_res.append(det_scores_h_[ii]) box_res = np.array(box_res) label_res = np.array(label_res) score_res = np.array(score_res) box_res_, label_res_, score_res_ = [], [], [] # h_threshold = {'roundabout': 0.35, 'tennis-court': 0.35, 'swimming-pool': 0.4, 'storage-tank': 0.3, # 'soccer-ball-field': 0.3, 'small-vehicle': 0.4, 'ship': 0.35, 'plane': 0.35, # 'large-vehicle': 0.4, 'helicopter': 0.4, 'harbor': 0.3, 'ground-track-field': 0.4, # 'bridge': 0.3, 'basketball-court': 0.4, 'baseball-diamond': 0.3} h_threshold = { 'turntable': 0.5, 'tennis-court': 0.5, 'swimming-pool': 0.5, 'storage-tank': 0.5, 'soccer-ball-field': 0.5, 'small-vehicle': 0.5, 'ship': 0.5, 'plane': 0.5, 'large-vehicle': 0.5, 'helicopter': 0.5, 'harbor': 0.5, 'ground-track-field': 0.5, 'bridge': 0.5, 'basketball-court': 0.5, 'baseball-diamond': 0.5, 'container-crane': 0.5 } for sub_class in range(1, cfgs.CLASS_NUM + 1): index = np.where(label_res == sub_class)[0] if len(index) == 0: continue tmp_boxes_h = box_res[index] tmp_label_h = label_res[index] tmp_score_h = score_res[index] tmp_boxes_h = np.array(tmp_boxes_h) tmp = np.zeros( [tmp_boxes_h.shape[0], tmp_boxes_h.shape[1] + 1]) tmp[:, 0:-1] = tmp_boxes_h tmp[:, -1] = np.array(tmp_score_h) # inx = nms.py_cpu_nms(dets=np.array(tmp, np.float32), # thresh=h_threshold[LABEL_NAME_MAP[sub_class]], # max_output_size=500) inx = nms(np.array(tmp, np.float32), h_threshold[LABEl_NAME_MAP[sub_class]]) inx = inx[:500] # max_outpus is 500 box_res_.extend(np.array(tmp_boxes_h)[inx]) score_res_.extend(np.array(tmp_score_h)[inx]) label_res_.extend(np.array(tmp_label_h)[inx]) time_elapsed = timer() - start if save_res: scores = np.array(score_res_) labels = np.array(label_res_) boxes = np.array(box_res_) valid_show = scores > cfgs.SHOW_SCORE_THRSHOLD scores = scores[valid_show] boxes = boxes[valid_show] labels = labels[valid_show] det_detections_h = draw_box_in_img.draw_boxes_with_label_and_scores( np.array(img, np.float32), boxes=boxes, labels=labels, scores=scores, in_graph=False) det_detections_h = det_detections_h[:ori_H, :ori_W] save_dir = os.path.join(des_folder, cfgs.VERSION) tools.mkdir(save_dir) cv2.imwrite( save_dir + '/' + img_path.split('/')[-1].split('.')[0] + '_h_s%d_t%f.jpg' % (h_len, cfgs.FAST_RCNN_NMS_IOU_THRESHOLD), det_detections_h) view_bar( '{} cost {}s'.format( img_path.split('/')[-1].split('.')[0], time_elapsed), count + 1, len(file_paths)) else: # eval txt CLASS_DOTA = NAME_LABEL_MAP.keys() # Task2 write_handle_h = {} txt_dir_h = os.path.join('txt_output', cfgs.VERSION + '_h') tools.mkdir(txt_dir_h) for sub_class in CLASS_DOTA: if sub_class == 'back_ground': continue write_handle_h[sub_class] = open( os.path.join(txt_dir_h, 'Task2_%s.txt' % sub_class), 'a+') for i, hbox in enumerate(box_res_): command = '%s %.3f %.1f %.1f %.1f %.1f\n' % ( img_path.split('/')[-1].split('.')[0], score_res_[i], hbox[0], hbox[1], hbox[2], hbox[3]) write_handle_h[LABEl_NAME_MAP[label_res_[i]]].write( command) for sub_class in CLASS_DOTA: if sub_class == 'back_ground': continue write_handle_h[sub_class].close() view_bar( '{} cost {}s'.format( img_path.split('/')[-1].split('.')[0], time_elapsed), count + 1, len(file_paths)) fw.write('{}\n'.format(img_path)) fw.close() os.remove(tmp_file)
def detect(det_net, inference_save_path, real_test_imgname_list): # 1. preprocess img img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) # is RGB. not GBR img_batch = tf.cast(img_plac, tf.float32) img_batch = short_side_resize_for_inference_data( img_tensor=img_batch, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH) img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN) img_batch = tf.expand_dims(img_batch, axis=0) # [1, None, None, 3] result_dict = det_net.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=None) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = det_net.get_restorer() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') for i, a_img_name in enumerate(real_test_imgname_list): raw_img = cv2.imread(a_img_name) start = time.time() resized_img, result_dict_ = \ sess.run( [img_batch, result_dict], feed_dict={img_plac: raw_img[:, :, ::-1]} # cv is BGR. But need RGB ) end = time.time() detected_boxes, detected_scores, detected_categories = merge_result( result_dict_) # print("{} cost time : {} ".format(img_name, (end - start))) show_indices = detected_scores >= cfgs.SHOW_SCORE_THRSHOLD show_scores = detected_scores[show_indices] show_boxes = detected_boxes[show_indices] show_categories = detected_categories[show_indices] final_detections = draw_box_in_img.draw_boxes_with_label_and_scores( np.squeeze(resized_img, 0), boxes=show_boxes, labels=show_categories, scores=show_scores) nake_name = a_img_name.split('/')[-1] # print (inference_save_path + '/' + nake_name) cv2.imwrite(inference_save_path + '/' + nake_name, final_detections[:, :, ::-1]) tools.view_bar( '{} image cost {}s'.format(a_img_name, (end - start)), i + 1, len(real_test_imgname_list))