def encode_label(image, gt_boxes): target_scores = np.zeros(shape=[45, 60, 9, 2]) # 0: background, 1: foreground, , target_bboxes = np.zeros(shape=[45, 60, 9, 4]) # t_x, t_y, t_w, t_h target_masks = np.zeros( shape=[45, 60, 9]) # negative_samples: -1, positive_samples: 1 for i in range(45): # y: height for j in range(60): # x: width for k in range(9): center_x = j * grid_width + grid_width * 0.5 center_y = i * grid_height + grid_height * 0.5 xmin = center_x - wandhG[k][0] * 0.5 ymin = center_y - wandhG[k][1] * 0.5 xmax = center_x + wandhG[k][0] * 0.5 ymax = center_y + wandhG[k][1] * 0.5 # print(xmin, ymin, xmax, ymax) # ignore cross-boundary anchors if (xmin > -5) & (ymin > -5) & (xmax < (image_width + 5)) & ( ymax < (image_height + 5)): anchor_boxes = np.array([xmin, ymin, xmax, ymax]) anchor_boxes = np.expand_dims(anchor_boxes, axis=0) # compute iou between this anchor and all ground-truth boxes in image. ious = compute_iou(anchor_boxes, gt_boxes) positive_masks = ious > pos_thresh negative_masks = ious < neg_thresh if np.any(positive_masks): plot_boxes_on_image(image, anchor_boxes, thickness=1) print("=> encode: %d, %d, %d" % (i, j, k)) cv2.circle(image, center=(int(0.5 * (xmin + xmax)), int(0.5 * (ymin + ymax))), radius=1, color=[255, 0, 0], thickness=4) target_scores[i, j, k, 1] = 1. target_masks[i, j, k] = 1 # labeled as a positive sample # find out which ground-truth box matches this anchor max_iou_idx = np.argmax(ious) selected_gt_boxes = gt_boxes[max_iou_idx] target_bboxes[i, j, k] = compute_regression( selected_gt_boxes, anchor_boxes[0]) if np.all(negative_masks): target_scores[i, j, k, 0] = 1. target_masks[i, j, k] = -1 # labeled as a negative sample cv2.circle(image, center=(int(0.5 * (xmin + xmax)), int(0.5 * (ymin + ymax))), radius=1, color=[0, 0, 0], thickness=4) Image.fromarray(image).show() return target_scores, target_bboxes, target_masks
import os import cv2 import numpy as np import tensorflow as tf from PIL import Image from rpn import RPNplus from utils import decode_output, plot_boxes_on_image, nms synthetic_dataset_path ="./synthetic_dataset/synthetic_dataset" prediction_result_path = "./prediction" if not os.path.exists(prediction_result_path): os.mkdir(prediction_result_path) model = RPNplus() fake_data = np.ones(shape=[1, 720, 960, 3]).astype(np.float32) model(fake_data) # initialize model to load weights model.load_weights("./RPN.h5") for idx in range(8000, 8200): image_path = os.path.join(synthetic_dataset_path, "image/%d.jpg" %(idx+1)) raw_image = cv2.imread(image_path) image_data = np.expand_dims(raw_image / 255., 0) pred_scores, pred_bboxes = model(image_data) pred_scores = tf.nn.softmax(pred_scores, axis=-1) pred_scores, pred_bboxes = decode_output(pred_bboxes, pred_scores, 0.9) pred_bboxes = nms(pred_bboxes, pred_scores, 0.5) plot_boxes_on_image(raw_image, pred_bboxes) save_path = os.path.join(prediction_result_path, str(idx)+".jpg") print("=> saving prediction results into %s" %save_path) Image.fromarray(raw_image).save(save_path)
from PIL import Image from rpn import RPNplus from utils import compute_iou, plot_boxes_on_image, wandhG, load_gt_boxes, compute_regression pos_thresh = 0.5 neg_thresh = 0.1 iou_thresh = 0.5 grid_width = grid_height = 16 image_height, image_width = 720, 960 wandhG = np.array(wandhG) image_path = "/Users/yangyun/synthetic_dataset/image/1.jpg" gt_boxes = load_gt_boxes("/Users/yangyun/synthetic_dataset/imageAno/1.txt") raw_image = cv2.imread(image_path) image_with_gt_boxes = np.copy(raw_image) plot_boxes_on_image(image_with_gt_boxes, gt_boxes) Image.fromarray(image_with_gt_boxes).show() encoded_image = np.copy(raw_image) target_scores = np.zeros(shape=[45, 60, 9, 2]) # 0: background, 1: foreground, , target_bboxes = np.zeros(shape=[45, 60, 9, 4]) # t_x, t_y, t_w, t_h target_masks = np.zeros(shape=[45, 60, 9]) # negative_samples: -1, positive_samples: 1 ################################### ENCODE INPUT ################################# for i in range(45): for j in range(60): for k in range(9): center_x = j * grid_width + grid_width * 0.5
from utils import compute_iou, plot_boxes_on_image, wandhG, load_gt_boxes, compute_regression, decode_output pos_thresh = 0.5 neg_thresh = 0.1 iou_thresh = 0.5 grid_width = 16 grid_height = 16 image_height = 720 image_width = 960 image_path = "./synthetic_dataset/image/1.jpg" label_path = "./synthetic_dataset/imageAno/1.txt" gt_boxes = load_gt_boxes(label_path) raw_image = cv2.imread(image_path) image_with_gt_boxes = np.copy(raw_image) plot_boxes_on_image(image_with_gt_boxes, gt_boxes) Image.fromarray(image_with_gt_boxes).show() encoded_image = np.copy(raw_image) target_scores = np.zeros(shape=[45, 60, 9, 2]) # 0: background, 1: foreground, , target_bboxes = np.zeros(shape=[45, 60, 9, 4]) # t_x, t_y, t_w, t_h target_masks = np.zeros(shape=[45, 60, 9]) # negative_samples: -1, positive_samples: 1 ################################### ENCODE INPUT ################################# for i in range(45): for j in range(60): for k in range(9): center_x = j * grid_width + grid_width * 0.5
# selected_boxes = pred_boxes selected_boxes = [] while len(pred_boxes) > 0: max_idx = np.argmax(pred_score) selected_box = pred_boxes[max_idx] selected_boxes.append(selected_box) pred_boxes = np.concatenate( [pred_boxes[:max_idx], pred_boxes[max_idx + 1:]]) pred_score = np.concatenate( [pred_score[:max_idx], pred_score[max_idx + 1:]]) ious = compute_iou(selected_box, pred_boxes) iou_mask = ious <= 0.1 pred_boxes = pred_boxes[iou_mask] pred_score = pred_score[iou_mask] selected_boxes = np.array(selected_boxes) plot_boxes_on_image(raw_image, selected_boxes) Image.fromarray(np.uint8(raw_image)).show() grid_size = [45, 60] grid_x = tf.range(grid_size[0], dtype=tf.int32) grid_y = tf.range(grid_size[1], dtype=tf.int32) a, b = tf.meshgrid(grid_x, grid_y) x_offset = tf.reshape(a, (-1, 1)) y_offset = tf.reshape(b, (-1, 1)) xy_offset = tf.concat([x_offset, y_offset], axis=-1) xy_offset = tf.reshape(x_y_offset, [grid_size[0], grid_size[1], 1, 2]) xy_offset = tf.cast(x_y_offset, tf.float32)
pos_thresh = 0.5 neg_thresh = 0.1 iou_thresh = 0.5 grid_width = 16 # 网格的长宽都是16,因为从原始图片到 feature map 经历了16倍的缩放 grid_height = 16 image_height = 900 image_width = 900 wnum, hnum = math.floor(image_width / grid_width), math.floor(image_height / grid_height) image_path = "IMG_1278.jpg" label_path = "test.txt" gt_boxes = load_gt_boxes(label_path) # 把 ground truth boxes 的坐标读取出来 raw_image = cv2.imread(image_path) # 将图片读取出来 (高,宽,通道数) image_with_gt_boxes = np.copy(raw_image) # 复制原始图片 plot_boxes_on_image(image_with_gt_boxes, gt_boxes) # 将 ground truth boxes 画在图片上 Image.fromarray(image_with_gt_boxes).show() # 展示画了 ground truth boxes 的图片 ## 因为得到的 feature map 的长宽都是原始图片的 1/16,所以这里 wnum=720/16,hnum=960/16。 target_scores = np.zeros(shape=[wnum, hnum, 9, 2]) # 0: background, 1: foreground, , target_bboxes = np.zeros(shape=[wnum, hnum, 9, 4]) # t_x, t_y, t_w, t_h target_masks = np.zeros(shape=[wnum, hnum, 9]) # negative_samples: -1, positive_samples: 1 ################################### ENCODE INPUT ################################# ## 将 feature map 分成 wnum*hnum 个小块 for i in range(wnum): for j in range(hnum): for k in range(9): center_x = j * grid_width + grid_width * 0.5 # 计算此小块的中心点横坐标 center_y = i * grid_height + grid_height * 0.5 # 计算此小块的中心点纵坐标 xmin = center_x - wandhG[k][