def encode_quads(y, epsilon=10e-5): df_bboxes = textboxes_utils.get_bboxes_from_quads(np.reshape(y[:, -16:-8], (-1, 4, 2))) y_encoded = y.copy() # encoded_x = (x - dx) / (dw) / sqrt(var(x)) y_encoded[:, [-24, -22, -20, -18]] = (y[:, [-24, -22, -20, -18]] - y[:, [-16, -14, -12, -10]]) / np.tile(np.expand_dims(df_bboxes[:, 2], axis=-1), (1, 4)) / np.sqrt(y[:, [-8, -6, -4, -2]]) # encoded_y = (y - dy) / (dh) / sqrt(var(y)) y_encoded[:, [-23, -21, -19, -17]] = (y[:, [-23, -21, -19, -17]] - y[:, [-15, -13, -11, -9]]) / np.tile(np.expand_dims(df_bboxes[:, 3], axis=-1), (1, 4)) / np.sqrt(y[:, [-7, -5, -3, -1]]) return y_encoded
input_size = config["model"]["input_size"] model_config = config["model"] if model_config["name"] == "ssd_vgg16": model, label_maps, process_input_fn, image = inference_utils.inference_ssd_vgg16( config, args) elif model_config["name"] == "ssd_mobilenetv1": model, label_maps, process_input_fn, image, bboxes, classes = inference_utils.inference_ssd_mobilenetv1( config, args) elif model_config["name"] == "ssd_mobilenetv2": model, label_maps, process_input_fn, image, bboxes, classes = inference_utils.inference_ssd_mobilenetv2( config, args) elif model_config["name"] == "tbpp_vgg16": model, label_maps, process_input_fn, image, quads, classes = inference_utils.inference_tbpp_vgg16( config, args) bboxes = textboxes_utils.get_bboxes_from_quads(quads) elif model_config["name"] == "qssd_vgg16": model, label_maps, process_input_fn, image = inference_utils.inference_qssd_vgg16( config, args) else: print( f"model with name ${model_config['name']} has not been implemented yet") exit() model.load_weights(args.weights) for idx, input_image in enumerate(list(glob(args.images))): image = cv2.imread(input_image) # read image in bgr format image = np.array(image, dtype=np.float) image = np.uint8(image)
def __get_data(self, batch): X = [] y = self.input_template.copy() for batch_idx, sample_idx in enumerate(batch): image_path, label_path = self.samples[sample_idx].split(" ") image, quads = textboxes_utils.read_sample(image_path=image_path, label_path=label_path) quads = textboxes_utils.sort_quads_vertices(quads) if self.perform_augmentation: image, quads, _ = self.__augment( image=image, quads=quads, classes=None, ) bboxes = textboxes_utils.get_bboxes_from_quads(quads) image_height, image_width, _ = image.shape height_scale, width_scale = self.input_size / image_height, self.input_size / image_width input_img = cv2.resize(np.uint8(image), (self.input_size, self.input_size)) input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB) input_img = self.process_input_fn(input_img) gt_classes = np.zeros((quads.shape[0], self.num_classes)) gt_textboxes = np.zeros((quads.shape[0], 12)) default_boxes = y[batch_idx, :, -8:] for i in range(bboxes.shape[0]): bbox = bboxes[i] quad = quads[i] cx = bbox[0] * width_scale / self.input_size cy = bbox[1] * height_scale / self.input_size width = bbox[2] * width_scale / self.input_size height = bbox[3] * height_scale / self.input_size q_x1 = quad[0, 0] * width_scale / self.input_size q_y1 = quad[0, 1] * height_scale / self.input_size q_x2 = quad[1, 0] * width_scale / self.input_size q_y2 = quad[1, 1] * height_scale / self.input_size q_x3 = quad[2, 0] * width_scale / self.input_size q_y3 = quad[2, 1] * height_scale / self.input_size q_x4 = quad[3, 0] * width_scale / self.input_size q_y4 = quad[3, 1] * height_scale / self.input_size gt_textboxes[i, :4] = [cx, cy, width, height] gt_textboxes[i, 4:] = [ q_x1, q_y1, q_x2, q_y2, q_x3, q_y3, q_x4, q_y4 ] gt_classes[i] = [0, 1] # image = cv2.resize(np.uint8(image), (self.input_size, self.input_size)) # print(gt_textboxes.shape) # for bbox in bboxes: # cx = bbox[0] * width_scale # cy = bbox[1] * height_scale # width = bbox[2] * width_scale # height = bbox[3] * height_scale # cv2.rectangle( # image, # (int(cx - (width / 2)), int(cy - (height / 2))), # (int(cx + (width / 2)), int(cy + (height / 2))), # (0, 0, 255), # 1 # ) # for i, gt_textbox in enumerate(gt_textboxes): # quad = gt_textbox[4:] # q_x1 = quad[0] * self.input_size # q_y1 = quad[1] * self.input_size # q_x2 = quad[2] * self.input_size # q_y2 = quad[3] * self.input_size # q_x3 = quad[4] * self.input_size # q_y3 = quad[5] * self.input_size # q_x4 = quad[6] * self.input_size # q_y4 = quad[7] * self.input_size # bbox = gt_textbox[:4] # print([ # bbox[0] * self.input_size, # bbox[1] * self.input_size, # bbox[2] * self.input_size, # bbox[3] * self.input_size, # ]) # print(bboxes[i]) # cx = bbox[0] * self.input_size # cy = bbox[1] * self.input_size # w = bbox[2] * self.input_size # h = bbox[3] * self.input_size # cv2.polylines(image, np.expand_dims(np.array([ # [q_x1, q_y1], # [q_x2, q_y2], # [q_x3, q_y3], # [q_x4, q_y4] # ], dtype=np.int), axis=0), True, (0, 255, 0), 1) # cv2.rectangle( # image, # (int(cx - (w / 2)), int(cy - (h / 2))), # (int(cx + (w / 2)), int(cy + (h / 2))), # (255, 0, 0), # 1 # ) # cv2.circle( # image, # (int(cx), int(cy)), # 3, # (0, 0, 255), # 3 # ) # print(gt_textbox.shape) # for quad in quads: # cv2.polylines(image, np.expand_dims(np.array(quad, dtype=np.int), axis=0), True, (0, 255, 0), 1) # for bbox in bboxes: # cv2.rectangle( # image, # (int(bbox[0] - (bbox[2] / 2)), int(bbox[1] - (bbox[3] / 2))), # (int(bbox[0] + (bbox[2] / 2)), int(bbox[1] + (bbox[3] / 2))), # (255, 0, 0), # 1 # ) # cv2.imshow("image", image) # if cv2.waitKey(0) == ord('q'): # cv2.destroyAllWindows() matches, neutral_boxes = ssd_utils.match_gt_boxes_to_default_boxes( gt_boxes=gt_textboxes[:, :4], default_boxes=default_boxes[:, :4], match_threshold=self.match_threshold, neutral_threshold=self.neutral_threshold) # set matched ground truth boxes to default boxes with appropriate class y[batch_idx, matches[:, 1], self.num_classes:self.num_classes + 12] = gt_textboxes[matches[:, 0]] y[batch_idx, matches[:, 1], 0:self.num_classes] = gt_classes[ matches[:, 0]] # set class scores label # set neutral ground truth boxes to default boxes with appropriate class y[batch_idx, neutral_boxes[:, 1], self.num_classes:self.num_classes + 12] = gt_textboxes[neutral_boxes[:, 0]] y[batch_idx, neutral_boxes[:, 1], 0:self.num_classes] = np.zeros( (self.num_classes )) # neutral boxes have a class vector of all zeros # encode the bounding boxes y[batch_idx] = textboxes_utils.encode_textboxes(y[batch_idx]) X.append(input_img) X = np.array(X, dtype=np.float) return X, y
def evaluate_qssd_mobilenetv2(config, args): print("evaluate_qssd_mobilenetv2") input_size = config["model"]["input_size"] with open(args.label_maps, "r") as file: label_maps = [line.strip("\n") for line in file.readlines()] model = QSSD_MOBILENETV2(config, label_maps, is_training=False, num_predictions=args.num_predictions) model.load_weights(args.weights) images = sorted(list(glob(os.path.join(args.images_dir, "*jpg")))) labels = sorted(list(glob(os.path.join(args.images_dir, "*json")))) class_id = 5 Xs = np.zeros((len(images), input_size, input_size, 3), dtype=np.float) ys = [] for i, (image_file, label_file) in enumerate(list(zip(images, labels))): print(f"reading sample: {i+1}/{len(images)}") image = cv2.imread(image_file) # read image in bgr format input_image = cv2.resize(image, (input_size, input_size)) width_scale, height_scale = input_size / \ image.shape[1], input_size / image.shape[0] input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB) input_image = mobilenet_v2.preprocess_input(input_image) Xs[i] = input_image with open(label_file, "r") as label_file: label = json.load(label_file) objects = label["shapes"] objs = [] for obj in objects: if obj["label"] == label_maps[class_id - 1]: polygon = np.array(obj["points"]) tp = polygon.copy() tp[:, 0] = polygon[:, 0] * width_scale tp[:, 1] = polygon[:, 1] * height_scale objs.append({"class": obj["label"], "polygon": tp}) ys.append(objs) y_preds = model.predict(Xs) recalls = [] precisions = [] for confidence_threshold in np.arange(start=0, stop=1, step=0.01): y_preds_filtered = [] for y_pred in y_preds: selected_pred = [] for p in y_pred: obj = p[0] score = p[1] if score >= confidence_threshold and obj == class_id: selected_pred.append(p) y_preds_filtered.append(selected_pred) TP, TN, FP, FN = 0, 0, 0, 0 for i in range(len(images)): y_true = textboxes_utils.get_bboxes_from_quads( np.array([y["polygon"] for y in ys[i]])) y_true = bbox_utils.center_to_corner(y_true) y_pred = np.array(y_preds_filtered[i]) if len(y_true) == 0 and len(y_pred) != 0: FP += len(y_pred) continue if len(y_pred) == 0 and len(y_true) != 0: FN += len(y_true) continue y_pred = y_pred[:, 2:6] for gt_box in y_true: for y_pred_box in y_pred: iou = bbox_utils.iou(np.expand_dims(gt_box, axis=0), np.expand_dims(y_pred_box, axis=0)) if iou > 0.8: TP += 1 else: FP += 1 recall = TP / (TP + FN) precision = TP / (TP + FP) print(f"-- confidence_score: {confidence_threshold}") print(f"---- recall: {recall}") print(f"---- precision: {precision}") recalls.append(recall) precisions.append(precision) plt.plot(recalls, precisions) plt.xlabel("Recall") plt.ylabel("Precision") plt.xlim(0, 1) plt.ylim(0, 1) plt.show()
def __get_data(self, batch): X = [] y = self.input_template.copy() for batch_idx, sample_idx in enumerate(batch): image_path, label_path = self.samples[sample_idx].split(" ") image, quads = textboxes_utils.read_sample(image_path=image_path, label_path=label_path) if self.perform_augmentation: image, quads, _ = self.__augment( image=image, quads=quads, classes=None, ) quads = textboxes_utils.sort_quads_vertices(quads) bboxes = textboxes_utils.get_bboxes_from_quads(quads) image_height, image_width, _ = image.shape height_scale, width_scale = self.input_size / image_height, self.input_size / image_width input_img = cv2.resize(np.uint8(image), (self.input_size, self.input_size)) input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB) input_img = self.process_input_fn(input_img) gt_classes = np.zeros((quads.shape[0], self.num_classes)) gt_textboxes = np.zeros((quads.shape[0], 12)) default_boxes = y[batch_idx, :, -8:] for i in range(bboxes.shape[0]): bbox = bboxes[i] quad = quads[i] cx = bbox[0] * width_scale / self.input_size cy = bbox[1] * height_scale / self.input_size width = bbox[2] * width_scale / self.input_size height = bbox[3] * height_scale / self.input_size q_x1 = quad[0, 0] * width_scale / self.input_size q_y1 = quad[0, 1] * height_scale / self.input_size q_x2 = quad[1, 0] * width_scale / self.input_size q_y2 = quad[1, 1] * height_scale / self.input_size q_x3 = quad[2, 0] * width_scale / self.input_size q_y3 = quad[2, 1] * height_scale / self.input_size q_x4 = quad[3, 0] * width_scale / self.input_size q_y4 = quad[3, 1] * height_scale / self.input_size gt_textboxes[i, :4] = [cx, cy, width, height] gt_textboxes[i, 4:] = [ q_x1, q_y1, q_x2, q_y2, q_x3, q_y3, q_x4, q_y4 ] gt_classes[i] = [0, 1] matches, neutral_boxes = ssd_utils.match_gt_boxes_to_default_boxes( gt_boxes=gt_textboxes[:, :4], default_boxes=default_boxes[:, :4], match_threshold=self.match_threshold, neutral_threshold=self.neutral_threshold) # set matched ground truth boxes to default boxes with appropriate class y[batch_idx, matches[:, 1], self.num_classes:self.num_classes + 12] = gt_textboxes[matches[:, 0]] y[batch_idx, matches[:, 1], 0:self.num_classes] = gt_classes[ matches[:, 0]] # set class scores label # set neutral ground truth boxes to default boxes with appropriate class y[batch_idx, neutral_boxes[:, 1], self.num_classes:self.num_classes + 12] = gt_textboxes[neutral_boxes[:, 0]] y[batch_idx, neutral_boxes[:, 1], 0:self.num_classes] = np.zeros( (self.num_classes )) # neutral boxes have a class vector of all zeros # encode the bounding boxes y[batch_idx] = textboxes_utils.encode_textboxes(y[batch_idx]) X.append(input_img) X = np.array(X, dtype=np.float) return X, y
def random_crop_quad(image, quads, classes, min_size=0.1, max_size=1, min_ar=1, max_ar=2, overlap_modes=[ None, [0.1, None], [0.3, None], [0.7, None], [0.9, None], [None, None], ], max_attempts=100, p=0.5): """ Randomly crops a patch from the image. Args: - image: numpy array representing the input image. - quads: numpy array representing the quads. - classes: the list of classes associating with each quads. - min_size: the maximum size a crop can be - max_size: the maximum size a crop can be - min_ar: the minimum aspect ratio a crop can be - max_ar: the maximum aspect ratio a crop can be - overlap_modes: the list of overlapping modes the function can randomly choose from. - max_attempts: the max number of attempts to generate a patch. Returns: - image: the modified image - quads: the modified quads - classes: the modified classes """ assert p >= 0, "p must be larger than or equal to zero" assert p <= 1, "p must be less than or equal to 1" assert min_size > 0, "min_size must be larger than zero." assert max_size <= 1, "max_size must be less than or equals to one." assert max_size > min_size, "max_size must be larger than min_size." assert max_ar > min_ar, "max_ar must be larger than min_ar." assert max_attempts > 0, "max_attempts must be larger than zero." # if (random.random() > p): # return image, bboxes, classes height, width, channels = image.shape overlap_mode = [0.7, None] # overlap_mode = random.choice(overlap_modes) # if overlap_mode == None: # return image, bboxes, classes bboxes = get_bboxes_from_quads(quads) min_iou, max_iou = overlap_mode if min_iou == None: min_iou = float(-np.inf) if max_iou == None: max_iou = float(np.inf) temp_image = image.copy() for i in range(max_attempts): crop_w = random.uniform(min_size * width, max_size * width) crop_h = random.uniform(min_size * height, max_size * height) crop_ar = crop_h / crop_w if crop_ar < min_ar or crop_ar > max_ar: # crop ar does not match criteria, next attempt continue crop_left = random.uniform(0, width - crop_w) crop_top = random.uniform(0, height - crop_h) crop_rect = np.array( [crop_left, crop_top, crop_left + crop_w, crop_top + crop_h], dtype=np.float) crop_rect = np.expand_dims(crop_rect, axis=0) crop_rect = np.tile(crop_rect, (bboxes.shape[0], 1)) ious = iou(crop_rect, bboxes) obj_coverage = object_coverage(crop_rect, bboxes) if (ious.min() < min_iou and ious.max() > max_iou) or ( obj_coverage.min() < min_iou and obj_coverage.max() > max_iou): continue bbox_centers = np.zeros((bboxes.shape[0], 2), dtype=np.float) bbox_centers[:, 0] = (bboxes[:, 0] + bboxes[:, 2]) / 2 bbox_centers[:, 1] = (bboxes[:, 1] + bboxes[:, 3]) / 2 cx_in_crop = (bbox_centers[:, 0] > crop_left) * (bbox_centers[:, 0] < crop_left + crop_w) cy_in_crop = (bbox_centers[:, 1] > crop_top) * (bbox_centers[:, 1] < crop_top + crop_h) boxes_in_crop = cx_in_crop * cy_in_crop if not boxes_in_crop.any(): continue print(ious, obj_coverage, boxes_in_crop) print("======") temp_image = temp_image[int(crop_top):int(crop_top + crop_h), int(crop_left):int(crop_left + crop_w), :] temp_classes = np.array(classes, dtype=np.object) temp_classes = temp_classes[boxes_in_crop] temp_bboxes = bboxes[boxes_in_crop] temp_quads = quads[boxes_in_crop] crop_rect = np.array( [crop_left, crop_top, crop_left + crop_w, crop_top + crop_h], dtype=np.float) crop_rect = np.expand_dims(crop_rect, axis=0) crop_rect = np.tile(crop_rect, (temp_bboxes.shape[0], 1)) print(temp_quads.shape) temp_bboxes[:, :2] = np.maximum( temp_bboxes[:, :2], crop_rect[:, :2] ) # if bboxes top left is out of crop then use crop's xmin, ymin temp_bboxes[:, :2] -= crop_rect[:, : 2] # translate xmin, ymin to fit crop temp_bboxes[:, 2:] = np.minimum(temp_bboxes[:, 2:], crop_rect[:, 2:]) temp_bboxes[:, 2:] -= crop_rect[:, :2] # translate xmax, ymax to fit crop return temp_image, temp_quads, temp_classes.tolist() return image, bboxes, classes