def compute_inputs(self, image_group, annotations_group): """ Compute inputs for the network using an image_group. """ # construct an image batch object batch_images = np.zeros((len(image_group), self.input_size, self.input_size, 3), dtype=np.float32) batch_hms = np.zeros((len(image_group), self.output_size, self.output_size, self.num_classes()), dtype=np.float32) batch_hms_2 = np.zeros((len(image_group), self.output_size, self.output_size, self.num_classes()), dtype=np.float32) batch_whs = np.zeros((len(image_group), self.max_objects, 2), dtype=np.float32) batch_regs = np.zeros((len(image_group), self.max_objects, 2), dtype=np.float32) batch_reg_masks = np.zeros((len(image_group), self.max_objects), dtype=np.float32) batch_indices = np.zeros((len(image_group), self.max_objects), dtype=np.float32) # copy all images to the upper left part of the image batch object for b, (image, annotations) in enumerate(zip(image_group, annotations_group)): c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) s = max(image.shape[0], image.shape[1]) * 1.0 trans_input = get_affine_transform(c, s, self.input_size) # inputs image = self.preprocess_image(image, c, s, tgt_w=self.input_size, tgt_h=self.input_size) batch_images[b] = image # outputs bboxes = annotations['bboxes'] #assert bboxes.shape[0] != 0 class_ids = annotations['labels'] #assert class_ids.shape[0] != 0 trans_output = get_affine_transform(c, s, self.output_size) for i in range(bboxes.shape[0]): bbox = bboxes[i].copy() cls_id = (int)(class_ids[i]) # (x1, y1) bbox[:2] = affine_transform(bbox[:2], trans_output) # (x2, y2) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.output_size - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.output_size - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius_h, radius_w = gaussian_radius((math.ceil(h), math.ceil(w))) radius_h = max(0, int(radius_h)) radius_w = max(0, int(radius_w)) radius = gaussian_radius_2((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(batch_hms[b, :, :, cls_id], ct_int, radius_h, radius_w) draw_gaussian_2(batch_hms_2[b, :, :, cls_id], ct_int, radius) batch_whs[b, i] = 1. * w, 1. * h batch_indices[b, i] = ct_int[1] * self.output_size + ct_int[0] batch_regs[b, i] = ct - ct_int batch_reg_masks[b, i] = 1 #open this code to show generated images and labels # hm = batch_hms[b, :, :, cls_id] # hm = np.round(hm * 255).astype(np.uint8) # hm = cv2.cvtColor(hm, cv2.COLOR_GRAY2BGR) # hm_2 = batch_hms_2[b, :, :, cls_id] # hm_2 = np.round(hm_2 * 255).astype(np.uint8) # hm_2 = cv2.cvtColor(hm_2, cv2.COLOR_GRAY2BGR) # cv2.rectangle(hm, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 1) # cv2.rectangle(hm_2, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 1) # cv2.namedWindow('hm', cv2.WINDOW_NORMAL) # cv2.imshow('hm', np.hstack([hm, hm_2])) # cv2.waitKey() # print(np.sum(batch_reg_masks[b])) # for i in range(self.num_classes()): # plt.subplot(4, 5, i + 1) # hm = batch_hms[b, :, :, i] # plt.imshow(hm, cmap='gray') # plt.axis('off') # plt.show() # hm = np.sum(batch_hms[0], axis=-1) # hm = np.round(hm * 255).astype(np.uint8) # hm = cv2.cvtColor(hm, cv2.COLOR_GRAY2BGR) # hm_2 = np.sum(batch_hms_2[0], axis=-1) # hm_2 = np.round(hm_2 * 255).astype(np.uint8) # hm_2 = cv2.cvtColor(hm_2, cv2.COLOR_GRAY2BGR) # for i in range(bboxes.shape[0]): # x1, y1 = np.round(affine_transform(bboxes[i, :2], trans_input)).astype(np.int32) # x2, y2 = np.round(affine_transform(bboxes[i, 2:], trans_input)).astype(np.int32) # x1_, y1_ = np.round(affine_transform(bboxes[i, :2], trans_output)).astype(np.int32) # x2_, y2_ = np.round(affine_transform(bboxes[i, 2:], trans_output)).astype(np.int32) # class_id = class_ids[i] # cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 1) # cv2.rectangle(hm, (x1_, y1_), (x2_, y2_), (0, 255, 0), 1) # cv2.rectangle(hm_2, (x1_, y1_), (x2_, y2_), (0, 255, 0), 1) # cv2.namedWindow('hm', cv2.WINDOW_NORMAL) # cv2.imshow('hm', np.hstack([hm, hm_2])) # cv2.namedWindow('image', cv2.WINDOW_NORMAL) # cv2.imshow('image', image) # cv2.waitKey() return [batch_images, batch_hms_2, batch_whs, batch_regs, batch_reg_masks, batch_indices]
def compute_inputs(self, image_group, annotations_group): """ Compute inputs for the network using an image_group. """ # construct an image batch object batch_images = np.zeros((len(image_group), self.input_size, self.input_size, 3), dtype=np.float32) batch_hms = np.zeros((len(image_group), self.output_size, self.output_size, self.num_classes()), dtype=np.float32) batch_hms_2 = np.zeros((len(image_group), self.output_size, self.output_size, self.num_classes()), dtype=np.float32) batch_whs = np.zeros((len(image_group), self.max_objects, 2), dtype=np.float32) batch_regs = np.zeros((len(image_group), self.max_objects, 2), dtype=np.float32) batch_reg_masks = np.zeros((len(image_group), self.max_objects), dtype=np.float32) batch_indices = np.zeros((len(image_group), self.max_objects), dtype=np.float32) # copy all images to the upper left part of the image batch object for b, (image, annotations) in enumerate(zip(image_group, annotations_group)): c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) s = max(image.shape[0], image.shape[1]) * 1.0 trans_input = get_affine_transform(c, s, self.input_size) # inputs image = self.preprocess_image(image, c, s, tgt_w=self.input_size, tgt_h=self.input_size) batch_images[b] = image # outputs bboxes = annotations['bboxes'] assert bboxes.shape[0] != 0 class_ids = annotations['labels'] assert class_ids.shape[0] != 0 trans_output = get_affine_transform(c, s, self.output_size) for i in range(bboxes.shape[0]): bbox = bboxes[i].copy() cls_id = class_ids[i] # (x1, y1) bbox[:2] = affine_transform(bbox[:2], trans_output) # (x2, y2) bbox[2:] = affine_transform(bbox[2:], trans_output) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.output_size - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.output_size - 1) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius_h, radius_w = gaussian_radius((math.ceil(h), math.ceil(w))) radius_h = max(0, int(radius_h)) radius_w = max(0, int(radius_w)) radius = gaussian_radius_2((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) draw_gaussian(batch_hms[b, :, :, cls_id], ct_int, (radius_h, radius_w)) draw_gaussian(batch_hms_2[b, :, :, cls_id], ct_int, radius) batch_whs[b, i] = 1. * w, 1. * h batch_indices[b, i] = ct_int[1] * self.output_size + ct_int[0] batch_regs[b, i] = ct - ct_int batch_reg_masks[b, i] = 1 return [batch_images, batch_hms_2, batch_whs, batch_regs, batch_reg_masks, batch_indices]
# run network start = time.time() detections = prediction_model.predict_on_batch(inputs)[0] print(time.time() - start) scores = detections[:, 4] # select indices which have a score above the threshold indices = np.where(scores > score_threshold)[0] # select those detections detections = detections[indices] detections_copy = detections.copy() detections = detections.astype(np.float64) trans = get_affine_transform(c, s, (tgt_w // 4, tgt_h // 4), inv=1) for j in range(detections.shape[0]): detections[j, 0:2] = affine_transform(detections[j, 0:2], trans) detections[j, 2:4] = affine_transform(detections[j, 2:4], trans) detections[:, [0, 2]] = np.clip(detections[:, [0, 2]], 0, src_image.shape[1]) detections[:, [1, 3]] = np.clip(detections[:, [1, 3]], 0, src_image.shape[0]) for detection in detections: xmin = int(round(detection[0])) ymin = int(round(detection[1])) xmax = int(round(detection[2])) ymax = int(round(detection[3])) score = '{:.4f}'.format(detection[4]) class_id = int(detection[5]) color = colors[class_id] class_name = classes[class_id] label = '-'.join([class_name, score]) ret, baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
def _get_detections(generator, model, score_threshold=0.05, max_detections=100, visualize=False, flip_test=False, keep_resolution=False): """ Get the detections from the model using the generator. The result is a list of lists such that the size is: all_detections[num_images][num_classes] = detections[num_class_detections, 5] Args: generator: The generator used to run images through the model. model: The model to run on the images. score_threshold: The score confidence threshold to use. max_detections: The maximum number of detections to use per image. save_path: The path to save the images with visualized detections to. Returns: A list of lists containing the detections for each image in the generator. """ all_detections = [[ None for i in range(generator.num_classes()) if generator.has_label(i) ] for j in range(generator.size())] for i in progressbar.progressbar(range(generator.size()), prefix='Running network: '): image = generator.load_image(i) src_image = image.copy() c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) s = max(image.shape[0], image.shape[1]) * 1.0 if not keep_resolution: tgt_w = generator.input_size tgt_h = generator.input_size image = generator.preprocess_image(image, c, s, tgt_w=tgt_w, tgt_h=tgt_h) else: tgt_w = image.shape[1] | 31 + 1 tgt_h = image.shape[0] | 31 + 1 image = generator.preprocess_image(image, c, s, tgt_w=tgt_w, tgt_h=tgt_h) if flip_test: flipped_image = image[:, ::-1] inputs = np.stack([image, flipped_image], axis=0) else: inputs = np.expand_dims(image, axis=0) # run network detections = model.predict_on_batch(inputs)[0] scores = detections[:, 4] # select indices which have a score above the threshold indices = np.where(scores > score_threshold)[0] # select those detections detections = detections[indices] detections_copy = detections.copy() detections = detections.astype(np.float64) trans = get_affine_transform(c, s, (tgt_w // 4, tgt_h // 4), inv=1) for j in range(detections.shape[0]): detections[j, 0:2] = affine_transform(detections[j, 0:2], trans) detections[j, 2:4] = affine_transform(detections[j, 2:4], trans) detections[:, [0, 2]] = np.clip(detections[:, [0, 2]], 0, src_image.shape[1]) detections[:, [1, 3]] = np.clip(detections[:, [1, 3]], 0, src_image.shape[0]) if visualize: # draw_annotations(src_image, generator.load_annotations(i), label_to_name=generator.label_to_name) draw_detections(src_image, detections[:5, :4], detections[:5, 4], detections[:5, 5].astype(np.int32), label_to_name=generator.label_to_name, score_threshold=score_threshold) # cv2.imwrite(os.path.join(save_path, '{}.png'.format(i)), raw_image) cv2.namedWindow('{}'.format(i), cv2.WINDOW_NORMAL) cv2.imshow('{}'.format(i), src_image) cv2.waitKey(0) # copy detections to all_detections for class_id in range(generator.num_classes()): all_detections[i][class_id] = detections[detections[:, -1] == class_id, :-1] return all_detections
def main(): generator = PascalVocGenerator(cfg.IMAGE_DIR, cfg.ANNOTATION_DIR, cfg.TEST_TEXT, classes=cfg.CLASSES, skip_difficult=True, train_data=False) num_classes = generator.num_classes() classes = list(generator.classes.keys()) colors = [ np.random.randint(0, 256, 3).tolist() for i in range(num_classes) ] model = centernet(num_classes, score_threshold=cfg.SCORE_THRESHOLD, nms=cfg.NMS, flip_test=cfg.FLIP_TEST, training=False) model.load_weights(model_path, by_name=True, skip_mismatch=True) for i in range(10): image = generator.load_image(i) #cv2.imwrite("./results/{}_o.jpg".format(i), image) src_image = image.copy() c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) s = max(image.shape[0], image.shape[1]) * 1.0 tgt_w = generator.input_size tgt_h = generator.input_size image = generator.preprocess_image(image, c, s, tgt_w=tgt_w, tgt_h=tgt_h) if cfg.FLIP_TEST: flipped_image = image[:, ::-1] inputs = np.stack([image, flipped_image], axis=0) else: inputs = np.expand_dims(image, axis=0) detections = model.predict_on_batch(inputs)[0] scores = detections[:, 4] indices = np.where(scores > cfg.SCORE_THRESHOLD)[0] detections = detections[indices] detections_copy = detections.copy() detections = detections.astype(np.float64) trans = get_affine_transform(c, s, (tgt_w // 4, tgt_h // 4), inv=1) for j in range(detections.shape[0]): detections[j, 0:2] = affine_transform(detections[j, 0:2], trans) detections[j, 2:4] = affine_transform(detections[j, 2:4], trans) detections[:, [0, 2]] = np.clip(detections[:, [0, 2]], 0, src_image.shape[1]) detections[:, [1, 3]] = np.clip(detections[:, [1, 3]], 0, src_image.shape[0]) for detection in detections: xmin = int(round(detection[0])) ymin = int(round(detection[1])) xmax = int(round(detection[2])) ymax = int(round(detection[3])) score = '{:.4f}'.format(detection[4]) class_id = int(detection[5]) color = colors[class_id] class_name = classes[class_id] label = '-'.join([class_name, score]) ret, baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) cv2.rectangle(src_image, (xmin, ymin), (xmax, ymax), color, 1) cv2.rectangle(src_image, (xmin, ymax - ret[1] - baseline), (xmin + ret[0], ymax), color, -1) cv2.putText(src_image, label, (xmin, ymax - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) #cv2.imwrite("./results/{}_r.jpg".format(i), src_image) cv2.imshow('image', src_image) cv2.waitKey(0)
def evaluate_coco(generator, model, threshold=0.05): """ Use the pycocotools to evaluate a COCO model on a dataset. Args generator: The generator for generating the evaluation data. model: The model to evaluate. threshold: The score threshold to use. """ # start collecting results results = [] image_ids = [] for index in trange(generator.size(), desc='COCO evaluation: '): image = generator.load_image(index) src_image = image.copy() h_src, w_src = src_image.shape[:2] c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) s = max(image.shape[0], image.shape[1]) * 1.0 tgt_w = generator.input_size tgt_h = generator.input_size scale = max(1.0 * w_src / tgt_w, 1.0 * h_src / tgt_h) trans_input = get_affine_transform(c, s, (tgt_w, tgt_h)) image = generator.preprocess_image(image, c, s, tgt_w=tgt_w, tgt_h=tgt_h) shift = affine_transform([0, 0], trans_input) shift = np.r_[shift, shift] # image_shape = image.shape[:2] # image_shape = np.array(image_shape) # run network # detections = model.predict_on_batch([np.expand_dims(image, axis=0), np.expand_dims(image_shape, axis=0)])[0] detections = model.predict_on_batch(np.expand_dims(image, axis=0))[0] boxes = detections[:, :4] scores = detections[:, 4] class_ids = detections[:, 5].astype(np.int32) # compute predicted labels and scores for box, score, class_id in zip(boxes, scores, class_ids): # scores are sorted, so we can break if score < threshold: break # 512/128 = 4 box = (box * 4 - shift) * scale box = np.clip(box, [0., 0., 0., 0.], [w_src, h_src, w_src, h_src]) # change to (x, y, w, h) (MS COCO standard) box[2:] = box[2:] - box[:2] # append detection for each positively labeled class image_result = { 'image_id': generator.image_ids[index], 'category_id': generator.label_to_coco_label(class_id), 'score': float(score), 'bbox': box.tolist(), } # append detection to results results.append(image_result) # class_name = generator.label_to_name(class_id) # ret, baseline = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) # cv2.rectangle(src_image, (box[0], box[1]), (box[0] + box[2], box[1] + box[3]), (0, 255, 0), 1) # cv2.putText(src_image, class_name, (box[0], box[1] + box[3] - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.5, # (0, 0, 0), 1) # cv2.namedWindow('image', cv2.WINDOW_NORMAL) # cv2.imshow('image', src_image) # cv2.waitKey(0) # append image to list of processed images image_ids.append(generator.image_ids[index]) if not len(results): return # write output json.dump(results, open('{}_bbox_results.json'.format(generator.set_name), 'w'), indent=4) json.dump(image_ids, open('{}_processed_image_ids.json'.format(generator.set_name), 'w'), indent=4) # load results in COCO evaluation tool coco_true = generator.coco coco_pred = coco_true.loadRes('{}_bbox_results.json'.format( generator.set_name)) # run COCO evaluation coco_eval = COCOeval(coco_true, coco_pred, 'bbox') coco_eval.params.imgIds = image_ids coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() return coco_eval.stats