def nms(bboxes, masks, scores, thresh) : # non-maximum suppression assert boxes.shape[0] > 0 final_bboxes = [] final_masks = [] overlaps = utils.compute_overlaps(bboxes, bboxes) # Get indicies of boxes sorted by scores (highest first) ixs = scores.argsort()[::-1]
def compute_gt_stats(gt_bbox, gt_mask): # Compute statistics for all the ground truth things. hw = gt_bbox[:,2:] - gt_bbox[:,:2] hw = hw*1. min_side = np.min(hw,1)[:,np.newaxis] max_side = np.max(hw,1)[:,np.newaxis] aspect_ratio = np.max(hw, 1) / np.min(hw, 1) aspect_ratio = aspect_ratio[:,np.newaxis] log_aspect_ratio = np.log(aspect_ratio) box_area = np.prod(hw, 1)[:,np.newaxis] log_box_area = np.log(box_area) sqrt_box_area = np.sqrt(box_area) modal_area = np.sum(np.sum(gt_mask, 0), 0)[:,np.newaxis]*1. log_modal_area = np.log(modal_area) sqrt_modal_area = np.sqrt(modal_area) # Number of distinct components ov_connected = sqrt_box_area*1. for i in range(gt_mask.shape[2]): aa = skimage.measure.label(gt_mask[:,:,i], background=0) sz = np.bincount(aa.ravel())[1:] biggest = np.argmax(sz)+1 big_comp = utilslib.extract_bboxes(aa[:,:,np.newaxis]==biggest) ov_connected[i,0] = utilslib.compute_overlaps(big_comp, gt_bbox[i:i+1,:]) a = np.concatenate([min_side, max_side, aspect_ratio, log_aspect_ratio, box_area, log_box_area, sqrt_box_area, modal_area, log_modal_area, sqrt_modal_area, ov_connected], 1) n = ['min_side', 'max_side', 'aspect_ratio', 'log_aspect_ratio', 'box_area', 'log_box_area', 'sqrt_box_area', 'modal_area', 'log_modal_area', 'sqrt_modal_area', 'ov_connected'] return a, n
def TP_FP_NF_per_score_bbox(gt_bbox, pred_bbox, scores, IoU_treshold): #loop scores score_range = np.arange(0.5, 1.0, 0.05) #print(gt_r) #print(pred_r) gt_rings = [] pred_rings = [] TPs = [] FPs = [] FNs = [] for SR in score_range: #print(SR) score_ids = np.where( scores > SR)[0] #Ids for predictions above certain score threshold #print(score_ids) bbox_SR = np.take(pred_bbox, score_ids, axis=0) #print('mask_SR:', mask_SR.shape) bbox_matrix = utils.compute_overlaps(gt_bbox, bbox_SR) #for every score range callculate TP, ...append by the socre ranges # making binary numpy array with IoU treshold bbox_matrix_binary = np.where(bbox_matrix > IoU_treshold, 1, 0) #print (bbox_matrix_binary) #GT rings and predicted rigs gt_r = len(bbox_matrix) pred_r = len(bbox_matrix[0]) #TP sum_truth = np.sum(bbox_matrix_binary, axis=1) sum_truth_binary = np.where(sum_truth > 0, 1, 0) TP = np.sum(sum_truth_binary) TPs.append(TP) #print('TP:', TP) #FP sum_pred = np.sum(bbox_matrix_binary, axis=0) sum_pred_binary = np.where(sum_pred > 0, 1, 0) FP = pred_r - np.sum(sum_pred_binary) FPs.append(FP) #print('FP:', FP) #FN FN = gt_r - TP FNs.append(FN) #print('FN:', FN) #put together and sum up TP...per range return TPs, FPs, FNs, score_range
def compute_recall_prediction(pred_boxes, gt_boxes, iou=0.5): """Compute the recall at the given IoU threshold. It's an indication of how many GT boxes were found by the given prediction boxes. pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates """ # Measure overlaps overlaps = compute_overlaps(pred_boxes, gt_boxes) iou_max = np.max(overlaps, axis=1) iou_argmax = np.argmax(overlaps, axis=1) positive_ids = np.where(iou_max >= iou)[0] matched_gt_boxes = iou_argmax[positive_ids] recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0] if pred_boxes.shape[0] == 0: prediction = 0.0 else: prediction = len(set(matched_gt_boxes)) / pred_boxes.shape[0] return recall, prediction
def find_TPs_FPs(dataset, model, cfg, iou_threshold=0.5): tps_fps = np.empty((0, 3)) counter = 0 for image_id in dataset.image_ids: image, image_meta, gt_class_id, gt_bbox, gt_mask = load_image_gt( dataset, cfg, image_id, use_mini_mask=False) scaled_image = mold_image(image, cfg) sample = np.expand_dims(scaled_image, 0) yhat = model.detect(sample, verbose=0) r = yhat[0] overlaps = compute_overlaps(r["rois"], gt_bbox) iou_max = np.max(overlaps, axis=1) for i in range(len(iou_max)): if iou_max[i] > iou_threshold: # TP add_data = np.array([r["scores"][i], 1, 0]) else: # FP add_data = np.array([r["scores"][i], 0, 1]) tps_fps = np.vstack((tps_fps, add_data)) print("Counter:", counter) counter += 1 return tps_fps
def build_rpn_targets(image_shape, anchors, gt_class_ids, gt_boxes, config): ''' Given the anchors and GT boxes, compute overlaps and identify positive anchors and deltas to refine them to match their corresponding GT boxes. Inputs: -------- anchors: [num_anchors, (y1, x1, y2, x2)] gt_class_ids: [num_gt_boxes] Integer class IDs. gt_boxes: [num_gt_boxes, (y1, x1, y2, x2)] Returns: -------- rpn_match: [N] (int32) matches between anchors and GT boxes. 1 = positive anchor, -1 = negative anchor, 0 = neutral rpn_bbox: [N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas. ''' # RPN Match: 1 = positive anchor, -1 = negative anchor, 0 = neutral rpn_match = np.zeros([anchors.shape[0]], dtype=np.int32) # RPN bounding boxes: [max anchors per image, (dy, dx, log(dh), log(dw))] rpn_bbox = np.zeros((config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4)) # Handle COCO crowds # A crowd box in COCO is a bounding box around several instances. Exclude # them from training. A crowd box is given a negative class ID. crowd_ix = np.where(gt_class_ids < 0)[0] if crowd_ix.shape[0] > 0: # Filter out crowds from ground truth class IDs and boxes non_crowd_ix = np.where(gt_class_ids > 0)[0] crowd_boxes = gt_boxes[crowd_ix] gt_class_ids = gt_class_ids[non_crowd_ix] gt_boxes = gt_boxes[non_crowd_ix] # Compute overlaps with crowd boxes [anchors, crowds] crowd_overlaps = utils.compute_overlaps(anchors, crowd_boxes) crowd_iou_max = np.amax(crowd_overlaps, axis=1) no_crowd_bool = (crowd_iou_max < 0.001) else: # All anchors don't intersect a crowd no_crowd_bool = np.ones([anchors.shape[0]], dtype=bool) # Compute overlaps [num_anchors, num_gt_boxes] overlaps = utils.compute_overlaps(anchors, gt_boxes) #-------------------------------------------------------------------------- ## Match anchors to GT Boxes # # If an anchor overlaps a GT box with IoU >= 0.7 then it's positive. # If an anchor overlaps a GT box with IoU < 0.3 then it's negative. # Neutral anchors are those that don't match the conditions above, # and they don't influence the loss function. # However, don't keep any GT box unmatched (rare, but happens). Instead, # match it to the closest anchor (even if its max IoU is < 0.3). #-------------------------------------------------------------------------- # 1. Set negative anchors first. They get overwritten below if a GT box is # matched to them. Skip boxes in crowd areas. anchor_iou_argmax = np.argmax(overlaps, axis=1) anchor_iou_max = overlaps[np.arange(overlaps.shape[0]), anchor_iou_argmax] rpn_match[(anchor_iou_max < 0.3) & (no_crowd_bool)] = -1 #-------------------------------------------------------------------------- # 2. Set an anchor for each GT box (regardless of IoU value). # TODO: If multiple anchors have the same IoU match all of them #-------------------------------------------------------------------------- gt_iou_argmax = np.argmax(overlaps, axis=0) rpn_match[gt_iou_argmax] = 1 #-------------------------------------------------------------------------- # 3. Set anchors with high overlap as positive. #-------------------------------------------------------------------------- rpn_match[anchor_iou_max >= 0.7] = 1 #-------------------------------------------------------------------------- # Subsample to balance positive and negative anchors # Don't let positives be more than half the anchors # RPN_TRAIN_ANCHORS_PER_IMAGE = 256 # so we train a maximum of 128 positive and negative anchors, the rest # are set to netural. #-------------------------------------------------------------------------- ids = np.where(rpn_match == 1)[0] extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE // 2) # print( 'build_rpn_targets() : Positives: {}, Extra: {} '.format(len(ids), extra)) if extra > 0: # Reset the extra ones to neutral ids = np.random.choice(ids, extra, replace=False) rpn_match[ids] = 0 # Same for negative proposals ids = np.where(rpn_match == -1)[0] extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE - np.sum(rpn_match == 1)) # print( 'build_rpn_targets() : Negatives: {}, Extra: {} '.format(len(ids), extra)) if extra > 0: # Rest the extra ones to neutral ids = np.random.choice(ids, extra, replace=False) rpn_match[ids] = 0 # For positive anchors, compute shift and scale needed to transform them # to match the corresponding GT boxes. ids = np.where(rpn_match == 1)[0] ix = 0 # index into rpn_bbox # TODO: use box_refinment() rather than duplicating the code here for i, a in zip(ids, anchors[ids]): # Closest gt box (it might have IoU < 0.7) gt = gt_boxes[anchor_iou_argmax[i]] # Convert coordinates to center plus width/height. # GT Box gt_h = gt[2] - gt[0] gt_w = gt[3] - gt[1] gt_center_y = gt[0] + 0.5 * gt_h gt_center_x = gt[1] + 0.5 * gt_w # Anchor a_h = a[2] - a[0] a_w = a[3] - a[1] a_center_y = a[0] + 0.5 * a_h a_center_x = a[1] + 0.5 * a_w # Compute the bbox refinement that the RPN should predict. rpn_bbox[ix] = [ (gt_center_y - a_center_y) / a_h, (gt_center_x - a_center_x) / a_w, np.log(gt_h / a_h), np.log(gt_w / a_w), ] # Normalize rpn_bbox[ix] /= config.RPN_BBOX_STD_DEV ix += 1 return rpn_match, rpn_bbox
def s_benchmark( run_dir, dataset_real, inference_config, pred_mask_dir, pred_info_dir, vis_missed=False, ): """Runs supplementary benchmarking code.""" print("Computing Supplementary's bounding box metrics") results_dir = os.path.join(run_dir, "results_supplement") mkdir_if_missing(results_dir) image_ids = dataset_real.image_ids mkdir_if_missing(os.path.join(results_dir, "vis_fn")) ms = [[] for _ in range(10)] thresh_all = [0.25, 0.5, 0.75] for ov in thresh_all: for m in ms: m.append([]) ms.append(thresh_all) ms = list(zip(*ms)) for image_id in tqdm(image_ids): # Load image and ground truth data image, _, gt_class_id, gt_bbox, gt_mask = modellib.load_image_gt( dataset_real, inference_config, image_id, use_mini_mask=False) gt_stat, stat_name = compute_gt_stats(gt_bbox, gt_mask) r = np.load( os.path.join(pred_info_dir, "image_{:06}.npy".format(image_id))).item() r_masks = np.load( os.path.join(pred_mask_dir, "image_{:06}.npy".format(image_id))) # Must transpose from (n, h, w) to (h, w, n) r["masks"] = np.transpose(r_masks, (1, 2, 0)) # Make sure scores are sorted. sc = r["scores"] is_sorted = np.all(np.diff(sc) <= 0) assert is_sorted overlaps = utilslib.compute_overlaps(r["rois"], gt_bbox) dt = {"sc": sc[:, np.newaxis] * 1.0} gt = {"diff": np.zeros((gt_bbox.shape[0], 1), dtype=np.bool)} for ( tps, fps, scs, num_insts, dup_dets, inst_ids, ovs, tp_inds, fn_inds, gt_stats, thresh, ) in ms: tp, fp, sc, num_inst, dup_det, inst_id, ov = inst_bench_image( dt, gt, {"minoverlap": thresh}, overlaps) tp_ind = np.sort(inst_id[tp]) fn_ind = np.setdiff1d(np.arange(num_inst), tp_ind) tps.append(tp) fps.append(fp) scs.append(sc) num_insts.append(num_inst) dup_dets.append(dup_det) inst_ids.append(inst_id) ovs.append(ov) tp_inds.append(tp_ind) fn_inds.append(fn_ind) gt_stats.append(gt_stat) # Visualize missing objects fn_ind = ms[1][8][-1] # missing objects at threshold 0.5 if fn_ind.size > 0: _, _, axes = subplot(plt, (fn_ind.size + 1, 1), sz_y_sz_x=(5, 5)) ax = axes.pop() ax.imshow(image) ax.set_axis_off() class_names = {1: ""} for _ in range(fn_ind.size): j = fn_ind[_] ax = axes.pop() visualize.display_instances( image, gt_bbox[j:j + 1, :], gt_mask[:, :, j:j + 1], gt_class_id[j:j + 1], class_names, ax=ax, title="", ) file_name = os.path.join( results_dir, "vis_fn", "vis_{:06d}.png".format(dataset_real.image_id[image_id]), ) plt.savefig(file_name, bbox_inches="tight", pad_inches=0) plt.close() print("Computing AP and plotting PR curves...") # Compute AP for ( tps, fps, scs, num_insts, dup_dets, inst_ids, ovs, tp_inds, fn_inds, gt_stats, thresh, ) in ms: ap, rec, prec, npos, _ = inst_bench(None, None, None, tp=tps, fp=fps, score=scs, numInst=num_insts) str_ = "mAP: {:.3f}, prec: {:.3f}, rec: {:.3f}, npos: {:d}".format( ap[0], np.min(prec), np.max(rec), npos) # logging.error('%s', str_) # print("mAP: ", ap[0], "prec: ", np.max(prec), "rec: ", np.max(rec), "prec-1: ", # prec[-1], "npos: ", npos) plt.style.use("fivethirtyeight") # bmh') _, _, axes = subplot(plt, (3, 4), (8, 8), space_y_x=(0.2, 0.2)) ax = axes.pop() ax.plot(rec, prec, "r") ax.set_xlim([0, 1]) ax.set_ylim([0, 1]) ax.set_xlabel("Recall") ax.set_ylabel("Precision") ax.set_title(str_) #'{:5.3f}'.format(ap[0]*100)) plot_stats(stat_name, gt_stats, tp_inds, fn_inds, axes) file_name = os.path.join(results_dir, "pr_stats_{:d}.png".format(int(thresh * 100))) # logging.error('plot file name: %s', file_name) plt.savefig(file_name, bbox_inches="tight", pad_inches=0) plt.close()
def s_benchmark(run_dir, dataset_dir, indices_arr, pred_mask_dir, pred_info_dir, gt_mask_dir, vis_missed=False): """Runs supplementary benchmarking code.""" print("Computing bounding box metrics") results_dir = os.path.join(run_dir, 'results_supplement') if not os.path.exists(os.path.join(results_dir, 'vis_fn')): os.makedirs(os.path.join(results_dir, 'vis_fn')) ms = [[] for _ in range(10)] thresh_all = [0.25, 0.5, 0.75] for ov in thresh_all: for m in ms: m.append([]) ms.append(thresh_all) ms = list(zip(*ms)) image_ids = np.arange(indices_arr.size) for image_id in tqdm(image_ids): # Load image and ground truth data image = skimage.io.imread( os.path.join(dataset_dir, 'depth_ims', 'image_{:06}.png'.format(indices_arr[image_id]))) image = np.transpose(image, (1, 0, 2)) gt_mask = np.load( os.path.join(gt_mask_dir, 'image_{:06}.npy'.format(image_id))).transpose() gt_class_id = np.array([1 for _ in range(gt_mask.shape[2]) ]).astype(np.int32) gt_bbox = utilslib.extract_bboxes(gt_mask) gt_stat, stat_name = compute_gt_stats(gt_bbox, gt_mask) r = np.load( os.path.join(pred_info_dir, 'image_{:06}.npy'.format(image_id))).item() r_masks = np.load( os.path.join(pred_mask_dir, 'image_{:06}.npy'.format(image_id))) # Must transpose from (n, h, w) to (h, w, n) r['masks'] = np.transpose(r_masks, (1, 2, 0)) # Make sure scores are sorted. sc = r['scores'] is_sorted = np.all(np.diff(sc) <= 0) assert (is_sorted) overlaps = utilslib.compute_overlaps(r['rois'], gt_bbox) dt = {'sc': sc[:, np.newaxis] * 1.} gt = {'diff': np.zeros((gt_bbox.shape[0], 1), dtype=np.bool)} for tps, fps, scs, num_insts, dup_dets, inst_ids, ovs, tp_inds, fn_inds, \ gt_stats, thresh in ms: tp, fp, sc, num_inst, dup_det, inst_id, ov = inst_bench_image( dt, gt, {'minoverlap': thresh}, overlaps) tp_ind = np.sort(inst_id[tp]) fn_ind = np.setdiff1d(np.arange(num_inst), tp_ind) tps.append(tp) fps.append(fp) scs.append(sc) num_insts.append(num_inst) dup_dets.append(dup_det) inst_ids.append(inst_id) ovs.append(ov) tp_inds.append(tp_ind) fn_inds.append(fn_ind) gt_stats.append(gt_stat) # Visualize missing objects fn_ind = ms[1][8][-1] # missing objects at threshold 0.5 if fn_ind.size > 0: _, _, axes = subplot(plt, (fn_ind.size + 1, 1), sz_y_sz_x=(5, 5)) ax = axes.pop() ax.imshow(image) ax.set_axis_off() class_names = {1: ''} for _ in range(fn_ind.size): j = fn_ind[_] ax = axes.pop() visualize.display_instances(image, gt_bbox[j:j + 1, :], gt_mask[:, :, j:j + 1], gt_class_id[j:j + 1], class_names, ax=ax, title='') file_name = os.path.join(results_dir, 'vis_fn', 'vis_{:06d}.png'.format(image_id)) plt.savefig(file_name, bbox_inches='tight', pad_inches=0) plt.close() print('Computing AP and plotting PR curves...') # Compute AP for tps, fps, scs, num_insts, dup_dets, inst_ids, ovs, tp_inds, fn_inds, \ gt_stats, thresh in ms: ap, rec, prec, npos, _ = inst_bench(None, None, None, tp=tps, fp=fps, score=scs, numInst=num_insts) str_ = 'mAP: {:.3f}, prec: {:.3f}, rec: {:.3f}, npos: {:d}'.format( ap[0], np.min(prec), np.max(rec), npos) # logging.error('%s', str_) # print("mAP: ", ap[0], "prec: ", np.max(prec), "rec: ", np.max(rec), "prec-1: ", # prec[-1], "npos: ", npos) plt.style.use('fivethirtyeight') #bmh') _, _, axes = subplot(plt, (3, 4), (8, 8), space_y_x=(0.2, 0.2)) ax = axes.pop() ax.plot(rec, prec, 'r') ax.set_xlim([0, 1]) ax.set_ylim([0, 1]) ax.set_xlabel('Recall') ax.set_ylabel('Precision') ax.set_title(str_) #'{:5.3f}'.format(ap[0]*100)) plot_stats(stat_name, gt_stats, tp_inds, fn_inds, axes) file_name = os.path.join(results_dir, 'pr_stats_{:d}.png'.format(int(thresh * 100))) # logging.error('plot file name: %s', file_name) plt.savefig(file_name, bbox_inches='tight', pad_inches=0) plt.close()
def recall(model, class_names): class_dict = {} label_dict = ['background'] if args.label: label_file = open(args.label) label_lines = label_file.readlines() label_id = 1 for label_line in label_lines: label_line = label_line.replace('\n', '') class_dict[label_line] = label_id label_dict.append(label_line) label_id = label_id + 1 # Validation dataset dataset_val = MyDataset() dataset_val.load_my(args.dataset, "val", class_dict) dataset_val.prepare() pre_correct_dict = {} pre_total_dict = {} pre_iou_dict = {} pre_scores_dict = {} gt_total_dict = {} for i in range(1, len(class_dict) + 1): pre_correct_dict[i] = 0 pre_total_dict[i] = 0 pre_iou_dict[i] = 0.0 pre_scores_dict[i] = 0.0 gt_total_dict[i] = 0 backbone_shapes = modellib.compute_backbone_shapes(config, [768, 1280, 3]) anchor_boxes = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, backbone_shapes, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE) #utils.generate_anchors(300, config.RPN_ANCHOR_RATIOS, [40,40], 32, config.RPN_ANCHOR_STRIDE) #print(anchor_boxes) rois = [] obj_groups = [] # {image_file, [gt_class_id], [gt_box, (y1,x1,y2,x2)], [gt_bbox_area], [gt_wh_ratio], [gt_mask_area], [gt_mask_ratio], [gt_size], } for image_id in dataset_val.image_ids: image, image_meta, gt_class_id, gt_box, gt_mask = modellib.load_image_gt( dataset_val, config, image_id, use_mini_mask=False) #print(image.shape) gt_detects = {} gt_detects['image'] = dataset_val.image_reference(image_id) gt_detects['gt_class_id'] = gt_class_id gt_detects['gt_bbox'] = gt_box gt_detects['gt_bbox_area'] = [] gt_detects['gt_wh_ratio'] = [] gt_detects['gt_mask_area'] = [] gt_detects['gt_mask_ratio'] = [] gt_detects['gt_size'] = [] for i in range(0, len(gt_class_id)): gt_total_dict[gt_class_id[i]] = gt_total_dict[gt_class_id[i]] + 1 wh_ratio, box_size, box_area, square_box = toSquareBox(gt_box[i]) mask_area = np.sum(gt_mask[:, :, i] == True) mask_ratio = mask_area / box_area gt_detects['gt_bbox_area'].append(box_area) gt_detects['gt_wh_ratio'].append(wh_ratio) gt_detects['gt_mask_area'].append(mask_area) gt_detects['gt_mask_ratio'].append(mask_ratio) gt_detects['gt_size'].append(box_size) molded_image = modellib.mold_image(image, config) #print(molded_image.shape) # Anchors """ anchors = model.get_anchors(molded_image.shape) # Duplicate across the batch dimension because Keras requires it # TODO: can this be optimized to avoid duplicating the anchors? anchors = np.broadcast_to(anchors, (config.BATCH_SIZE,) + anchors.shape) print(anchors) # Run object detection detections, mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois, rpn_class, rpn_bbox =\ model.keras_model.predict([np.expand_dims(molded_image, 0), np.expand_dims(image_meta, 0), anchors], verbose=0) print(detections[0]) print(mrcnn_class[0]) print(rpn_class[0]) """ #skimage.io.imsave("test.jpg", image) start_time = time.time() results = model.detect_molded(np.expand_dims(molded_image, 0), np.expand_dims(image_meta, 0), verbose=0) end_time = time.time() #print("Time: %s" % str(end_time - start_time)) #print(results) r = results[0] pre_class_ids = r['class_ids'] for i in range(0, len(pre_class_ids)): pre_total_dict[ pre_class_ids[i]] = pre_total_dict[pre_class_ids[i]] + 1 pre_scores = r['scores'] #print(r['rois']) for roi in r['rois']: whr, bsize, _, _ = toSquareBox(roi) rois.append([bsize, whr]) #print(gt_detects['gt_size']) #overlaps = utils.compute_iou(roi, gt_detects['gt_bbox'], roi_area, gt_detects['gt_bbox_area']) #print(overlaps) gt_match, pred_match, overlap = display_differences( image, gt_box, gt_class_id, gt_mask, r['rois'], pre_class_ids, pre_scores, r['masks'], class_names, title="", ax=None, show_mask=True, show_box=True, iou_threshold=0.1, score_threshold=0.1) gt_detects['rois'] = r['rois'] gt_detects['gt_match'] = gt_match gt_detects['pred_match'] = pred_match #print(gt_match) """ visualize.display_differences(image, gt_box, gt_class_id, gt_mask, r['rois'], pre_class_ids, pre_scores, r['masks'], class_names, title="", ax=None, show_mask=True, show_box=True, iou_threshold=0.1, score_threshold=0.1) """ for i in range(0, len(pred_match)): if pred_match[i] > -1.0: #print(r['rois'][i]) pre_correct_dict[ pre_class_ids[i]] = pre_correct_dict[pre_class_ids[i]] + 1 pre_iou_dict[pre_class_ids[i]] = pre_iou_dict[ pre_class_ids[i]] + overlap[i, int(pred_match[i])] pre_scores_dict[pre_class_ids[i]] = pre_scores_dict[ pre_class_ids[i]] + pre_scores[i] obj_groups.append(gt_detects) #print(rois) print("图片,类别,标注框,标注宽高比,标注尺寸,检测框,检测宽高比,检测尺寸,最大IOU") for det in obj_groups: for i in range(0, len(det['gt_class_id'])): overlaped = utils.compute_overlaps( anchor_boxes, np.reshape(det['gt_bbox'][i], (1, 4))) omax = max(overlaped) #if det['gt_size'][i] > 150 and det['gt_size'][i] < 367: if omax[0] > 0.0: print(det['image'], end='') print(",", label_dict[det['gt_class_id'][i]], ",", det['gt_bbox'][i], ",", det['gt_wh_ratio'][i], ",", det['gt_size'][i], end="") if det['gt_match'][i] > -1.0: idx = int(det['gt_match'][i]) #print(idx, det['rois']) whr, bsize, _, _ = toSquareBox(det['rois'][idx]) print(",", det['rois'][idx], ",", whr, ",", bsize, ",", omax[0]) else: print(",", 0, ",", 0, ",", 0, ",", omax[0]) tol_pre_correct_dict = 0 tol_pre_total_dict = 0 tol_pre_iou_dict = 0 tol_pre_scores_dict = 0 tol_gt_total_dict = 0 lines = [] tile_line = 'Type,Number,Correct,Proposals,Total,Rps/img,Avg IOU,Avg score,Recall,Precision\n' lines.append(tile_line) for key in class_dict: tol_pre_correct_dict = tol_pre_correct_dict + pre_correct_dict[ class_dict[key]] tol_pre_total_dict = pre_total_dict[ class_dict[key]] + tol_pre_total_dict tol_pre_iou_dict = pre_iou_dict[class_dict[key]] + tol_pre_iou_dict tol_pre_scores_dict = pre_scores_dict[ class_dict[key]] + tol_pre_scores_dict tol_gt_total_dict = gt_total_dict[class_dict[key]] + tol_gt_total_dict type_rps_img = pre_total_dict[class_dict[key]] / len( dataset_val.image_ids) if pre_correct_dict[class_dict[key]] > 0: type_avg_iou = pre_iou_dict[class_dict[key]] / pre_correct_dict[ class_dict[key]] type_avg_score = pre_scores_dict[ class_dict[key]] / pre_correct_dict[class_dict[key]] else: type_avg_iou = 0 type_avg_score = 0 if gt_total_dict[class_dict[key]] > 0: type_recall = pre_total_dict[class_dict[key]] / gt_total_dict[ class_dict[key]] else: type_recall = 0 if pre_total_dict[class_dict[key]] > 0: type_precision = pre_correct_dict[ class_dict[key]] / pre_total_dict[class_dict[key]] else: type_precision = 0 line = '{:s},{:d},{:d},{:d},{:d},{:.2f},{:.2f}%,{:.2f},{:.2f}%,{:.2f}%\n'.format( key, len(dataset_val.image_ids), pre_correct_dict[class_dict[key]], pre_total_dict[class_dict[key]], gt_total_dict[class_dict[key]], type_rps_img, type_avg_iou * 100, type_avg_score, type_recall * 100, type_precision * 100) lines.append(line) print(line) tol_rps_img = tol_pre_total_dict / len(dataset_val.image_ids) if tol_pre_correct_dict > 0: tol_avg_iou = tol_pre_iou_dict / tol_pre_correct_dict tol_avg_score = tol_pre_scores_dict / tol_pre_correct_dict else: tol_avg_iou = 0 tol_avg_score = 0 if tol_gt_total_dict > 0: tol_recall = tol_pre_total_dict / tol_gt_total_dict else: tol_recall = 0 if tol_pre_total_dict > 0: tol_precision = tol_pre_correct_dict / tol_pre_total_dict else: tol_precision = 0 totle_line = '{:s},{:d},{:d},{:d},{:d},{:.2f},{:.2f}%,{:.2f},{:.2f}%,{:.2f}%\n'.format( 'Total', len(dataset_val.image_ids), tol_pre_correct_dict, tol_pre_total_dict, tol_gt_total_dict, type_rps_img, tol_avg_iou * 100, tol_avg_score, tol_recall * 100, tol_precision * 100) print(totle_line) lines.append(totle_line) result_file_name = "result_{:%Y%m%dT%H%M%S}.csv".format(datetime.now()) result_file = open(result_file_name, 'w+') result_file.writelines(lines) result_file.close()
def car_predict_image(): """Gets an image file via POST request, feeds the image to the FaceNet car_model, the resulting embedding is then sent to be compared with the embeddings database. The image file is not stored. An html page is then rendered showing the prediction result. """ if request.method == 'POST': if 'file' not in request.files: return "No file part" file = request.files['file'] filename = file.filename if filename == "": return "No selected file" if file and allowed_file(filename=filename, allowed_set=allowed_set): # Read image file as numpy array of RGB dimension frame = io.imread(fname=file, mode='RGB') # Convert the image from BGR color (which OpenCV uses) to RGB color rgb_image = frame[:, :, ::-1] # Run the image through the Mask R-CNN model to get results. results = car_model.detect([rgb_image], verbose=0) # Mask R-CNN assumes we are running detection on multiple images. # We only passed in one image to detect, so only grab the first result. r = results[0] # How many frames of video we've seen in a row with a parking space open free_space_frames = 0 if os.path.exists('./parkings/park.csv'): park_slots = pd.read_csv("./parkings/park.csv") outbox = park_slots[["y1", "x1", "y2", "x2"]].astype(int) parked_car_boxes = outbox.to_numpy() else: parked_car_boxes = get_car_boxes(r['rois'], r['class_ids']) # Get where cars are currently located in the frame car_boxes = get_car_boxes(r['rois'], r['class_ids']) # Assume no spaces are free until we find one that is free free_space = False available_spaces = 0 occupied_spaces = 0 if car_boxes.size > 0 and parked_car_boxes.size > 0: # See how much those cars overlap with the known parking spaces overlaps = utils.compute_overlaps(parked_car_boxes, car_boxes) # Loop through each known parking space box for parking_area, overlap_areas in zip(parked_car_boxes, overlaps): # For this parking space, find the max amount it was covered by any # car that was detected in our image (doesn't really matter which car) max_IoU_overlap = np.max(overlap_areas) # Get the top-left and bottom-right coordinates of the parking area y1, x1, y2, x2 = parking_area # Check if the parking space is occupied by seeing if any car overlaps # it by more than 0.15 using IoU if max_IoU_overlap < 0.15: # Parking space not occupied! Draw a green box around it available_spaces +=1 # Flag that we have seen at least one open space free_space = True else: # Parking space is still occupied - draw a red box around it occupied_spaces +=1 modelresults="available spaces: "+str(available_spaces)+"\n"+" occupied spaces: "+str(occupied_spaces) # Compare euclidean distance between this embedding and the embeddings in 'embeddings/' return render_template('car_occupancy_predict_result.html', identity=modelresults) else: return render_template( 'car_occupancy_predict_result.html', identity="Operation was unsuccessful! Nothing detected." ) else: return "POST HTTP method required!"
r['rois'], r['class_ids'], r['scores'], r['masks'], verbose=0) #print(r['scores']) #print(r['masks'].shape) mAP.append(ap) #compute mask IoU IoU_m = utils.compute_overlaps_masks(gt_mask, r['masks']) IoU_m = np.nan_to_num(np.mean(IoU_m)) #change nans to 0 mask_IoU.append(IoU_m) #compute bbox IoU IoU_bbox = utils.compute_overlaps(gt_bbox, r['rois']) IoU_bbox = np.nan_to_num(np.mean(IoU_bbox)) bbox_IoU.append(IoU_bbox) #compute TP, FP, FN for mask TP, FP, FN, score_range = TP_FP_NF_per_score_mask(gt_mask, r['masks'], r['scores'], IoU_treshold=0.3) #print(TP) #print(FP) #print(FN) TPs_mask.append(TP) FPs_mask.append(FP) FNs_mask.append(FN)
def gen(): free_space_frames = 0 global count parked_car_boxes = None greenpoints = [] redpoints = [] last_pos = 0 global graph with graph.as_default(): while (cap.isOpened()): video_cur_pos = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0 ret, frame_initial = cap.read() if not ret: break # frame_initial = frame_initial[:, :, ::-1] frame = cv2.resize(frame_initial, None, fx=0.6, fy=0.6) if video_cur_pos - last_pos > dict['park_sec_to_wait'] or last_pos == 0: last_pos = video_cur_pos results = model.detect([frame], verbose=0) greenpoints.clear() redpoints.clear() count = 0 # Mask R-CNN assumes we are running detection on multiple images. # We only passed in one image to detect, so only grab the first result. r = results[0] # The r variable will now have the results of detection: # - r['rois'] are the bounding box of each detected object # - r['class_ids'] are the class id (type) of each detected object # - r['scores'] are the confidence scores for each detection # - r['masks'] are the object masks for each detected object (which gives you the object outline) # if parked_car_boxes is None: # This is the first frame of video - assume all the cars detected are in parking spaces. # Save the location of each car as a parking space box and go to the next frame of video. # parked_car_boxes = get_car_boxes(r['rois'], r['class_ids']) # else: # Get where cars are currently located in the frame car_boxes = get_car_boxes(r['rois'], r['class_ids']) # See how much those cars overlap with the known parking spaces overlaps = utils.compute_overlaps(parking_yml_box, car_boxes) # Assume no spaces are free until we find one that is free free_space = False fgbg = cv2.createBackgroundSubtractorMOG2(history=300, varThreshold=16, detectShadows=False) frame_blur = cv2.GaussianBlur(frame.copy(), (5, 5), 3) # frame_blur = frame_blur[150:1000, 100:1800] frame_gray = cv2.cvtColor(frame_blur, cv2.COLOR_BGR2GRAY) frame_out = frame.copy() # Loop through each known parking space box for parking_area, overlap_areas, park in zip(parking_yml_box, overlaps, parking_data): # For this parking space, find the max amount it was covered by any # car that was detected in our image (doesn't really matter which car) max_IoU_overlap = np.max(overlap_areas) # Get the top-left and bottom-right coordinates of the parking area y1, x1, y2, x2 = parking_area points = np.array(park['points']) ind = park['id'] # Check if the parking space is occupied by seeing if any car overlaps # it by more than 0.15 using IoU if max_IoU_overlap < 0.20 and ind not in (11, 18, 19, 26): # Parking space not occupied! Draw a green box around it # cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 3) cv2.drawContours(frame, [points], contourIdx=-1, color=(0, 255, 0), thickness=1, lineType=cv2.LINE_8) greenpoints.append(points) # Flag that we have seen at least one open space free_space = True count += 1 else: # Parking space is still occupied - draw a red box around it # cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 1) cv2.drawContours(frame, [points], contourIdx=-1, color=(0, 0, 255), thickness=1, lineType=cv2.LINE_8) redpoints.append(points) # If at least one space was free, start counting frames # This is so we don't alert based on one frame of a spot being open. # This helps prevent the script triggered on one bad detection. if free_space: free_space_frames += 1 else: # If no spots are free, reset the count free_space_frames = 0 # If a space has been free for several frames, we are pretty sure it is really free! if free_space_frames > 100: # Write SPACE AVAILABLE!! at the top of the screen font = cv2.FONT_HERSHEY_DUPLEX cv2.putText(frame, f"SPACE AVAILABLE!", (10, 150), font, 3.0, (0, 255, 0), 2, cv2.FILLED) else: cv2.drawContours(frame, greenpoints, contourIdx=-1, color=(0, 255, 0), thickness=1, lineType=cv2.LINE_8) cv2.drawContours(frame, redpoints, contourIdx=-1, color=(0, 0, 255), thickness=1, lineType=cv2.LINE_8) if dict['motion_detection']: frame_blur = cv2.GaussianBlur(frame.copy(), (5, 5), 3) fgmask = fgbg.apply(frame_blur) bw = np.uint8(fgmask == 255) * 255 bw = cv2.erode(bw, kernel_erode, iterations=1) bw = cv2.dilate(bw, kernel_dilate, iterations=1) (_, cnts, _) = cv2.findContours(bw.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # loop over the contours for c in cnts: (x, y, w, h) = cv2.boundingRect(c) cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 1) if dict['show_ids']: print_parkIDs(frame) # Show the frame of video on the screen # cv2.imshow('Video', frame) # Write the IoU measurement inside the box cv2.putText(frame, 'Total vacant ' + str(count), (5, 30), cv2.FONT_HERSHEY_DUPLEX, 0.8, (255, 0, 0), 1, cv2.LINE_8) ret, jpeg = cv2.imencode('.jpg', frame) if jpeg is not None: yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + jpeg.tobytes() + b'\r\n') else: print("frame is none")
def Park(box1, box2): overlaps = compute_overlaps(box1, box2) filter_boxes = [ box1[i] for i in range(len(box1)) if np.any(overlaps[i, :] > 0.9) ] return np.array(filter_boxes)
def build_rpn_targets(image_shape, anchors, gt_class_ids, gt_boxes, config): rpn_match = np.zeros([anchors.shape[0]], dtype=np.int32) # RPN bounding boxes: [max anchors per image, (dy, dx, log(dh), log(dw))] rpn_bbox = np.zeros((config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4)) crowd_ix = np.where(gt_class_ids < 0)[0] if crowd_ix.shape[0] > 0: # Filter out crowds from ground truth class IDs and boxes non_crowd_ix = np.where(gt_class_ids > 0)[0] crowd_boxes = gt_boxes[crowd_ix] gt_class_ids = gt_class_ids[non_crowd_ix] gt_boxes = gt_boxes[non_crowd_ix] # Compute overlaps with crowd boxes [anchors, crowds] crowd_overlaps = utils.compute_overlaps(anchors, crowd_boxes) crowd_iou_max = np.amax(crowd_overlaps, axis=1) no_crowd_bool = (crowd_iou_max < 0.001) else: # All anchors don't intersect a crowd no_crowd_bool = np.ones([anchors.shape[0]], dtype=bool) # Compute overlaps [num_anchors, num_gt_boxes] overlaps = utils.compute_overlaps(anchors, gt_boxes) anchor_iou_argmax = np.argmax(overlaps, axis=1) anchor_iou_max = overlaps[np.arange(overlaps.shape[0]), anchor_iou_argmax] rpn_match[(anchor_iou_max < 0.3) & (no_crowd_bool)] = -1 # 2. Set an anchor for each GT box (regardless of IoU value). # TODO: If multiple anchors have the same IoU match all of them gt_iou_argmax = np.argmax(overlaps, axis=0) rpn_match[gt_iou_argmax] = 1 # 3. Set anchors with high overlap as positive. rpn_match[anchor_iou_max >= 0.7] = 1 ids = np.where(rpn_match == 1)[0] extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE // 2) if extra > 0: # Reset the extra ones to neutral ids = np.random.choice(ids, extra, replace=False) rpn_match[ids] = 0 # Same for negative proposals ids = np.where(rpn_match == -1)[0] extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE - np.sum(rpn_match == 1)) if extra > 0: # Rest the extra ones to neutral ids = np.random.choice(ids, extra, replace=False) rpn_match[ids] = 0 ids = np.where(rpn_match == 1)[0] ix = 0 # index into rpn_bbox # TODO: use box_refinement() rather than duplicating the code here for i, a in zip(ids, anchors[ids]): # Closest gt box (it might have IoU < 0.7) gt = gt_boxes[anchor_iou_argmax[i]] gt_h = gt[2] - gt[0] gt_w = gt[3] - gt[1] gt_center_y = gt[0] + 0.5 * gt_h gt_center_x = gt[1] + 0.5 * gt_w # Anchor a_h = a[2] - a[0] a_w = a[3] - a[1] a_center_y = a[0] + 0.5 * a_h a_center_x = a[1] + 0.5 * a_w # Compute the bbox refinement that the RPN should predict. rpn_bbox[ix] = [ (gt_center_y - a_center_y) / a_h, (gt_center_x - a_center_x) / a_w, np.log(gt_h / a_h), np.log(gt_w / a_w), ] # Normalize rpn_bbox[ix] /= config.RPN_BBOX_STD_DEV ix += 1 return rpn_match, rpn_bbox