def detect_and_save(self, img): boxes = self.detect_on_image(img) X, ratio = format_img(img, self.cfg) for cls_num, box in boxes.items(): boxes_nms = roi_helpers.non_max_suppression_fast( box, overlap_thresh=0.5) boxes[cls_num] = boxes_nms print(self.class_mapping[cls_num] + ":") for b in boxes_nms: b[0], b[1], b[2], b[3] = get_real_coordinates( ratio, b[0], b[1], b[2], b[3]) print('{} prob: {}'.format(b[0:4], b[-1])) img = draw_boxes_and_label_on_image_cv2(img, self.class_mapping, boxes) result_path = './results_images/{}.png'.format('result') print('result saved into ', result_path) cv2.imwrite(result_path, img) cv2.waitKey(0)
def predict_single_image(img_path, model_rpn, model_classifier_only, cfg, class_mapping): st = time.time() img = cv2.imread(img_path) if img is None: print('reading image failed.') exit(0) X, ratio = format_img(img, cfg) if K.image_dim_ordering() == 'tf': X = np.transpose(X, (0, 2, 3, 1)) # get the feature maps and output from the RPN [Y1, Y2, F] = model_rpn.predict(X) # this is result contains all boxes, which is [x1, y1, x2, y2] result = roi_helpers.rpn_to_roi(Y1, Y2, cfg, K.image_dim_ordering(), overlap_thresh=0.7) # convert from (x1,y1,x2,y2) to (x,y,w,h) result[:, 2] -= result[:, 0] result[:, 3] -= result[:, 1] bbox_threshold = 0.8 # apply the spatial pyramid pooling to the proposed regions boxes = dict() for jk in range(result.shape[0] // cfg.num_rois + 1): rois = np.expand_dims(result[cfg.num_rois * jk:cfg.num_rois * (jk + 1), :], axis=0) if rois.shape[1] == 0: break if jk == result.shape[0] // cfg.num_rois: # pad R curr_shape = rois.shape target_shape = (curr_shape[0], cfg.num_rois, curr_shape[2]) rois_padded = np.zeros(target_shape).astype(rois.dtype) rois_padded[:, :curr_shape[1], :] = rois rois_padded[0, curr_shape[1]:, :] = rois[0, 0, :] rois = rois_padded [p_cls, p_regr] = model_classifier_only.predict([F, rois]) for ii in range(p_cls.shape[1]): if np.max(p_cls[0, ii, :]) < bbox_threshold or np.argmax( p_cls[0, ii, :]) == (p_cls.shape[2] - 1): continue cls_num = np.argmax(p_cls[0, ii, :]) if cls_num not in boxes.keys(): boxes[cls_num] = [] (x, y, w, h) = rois[0, ii, :] try: (tx, ty, tw, th) = p_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] tx /= cfg.classifier_regr_std[0] ty /= cfg.classifier_regr_std[1] tw /= cfg.classifier_regr_std[2] th /= cfg.classifier_regr_std[3] x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th) except Exception as e: print(e) pass boxes[cls_num].append([ cfg.rpn_stride * x, cfg.rpn_stride * y, cfg.rpn_stride * (x + w), cfg.rpn_stride * (y + h), np.max(p_cls[0, ii, :]) ]) # add some nms to reduce many boxes for cls_num, box in boxes.items(): boxes_nms = roi_helpers.non_max_suppression_fast(box, overlap_thresh=0.5) boxes[cls_num] = boxes_nms print(class_mapping[cls_num] + ":") for b in boxes_nms: b[0], b[1], b[2], b[3] = get_real_coordinates( ratio, b[0], b[1], b[2], b[3]) print('{} prob: {}'.format(b[0:4], b[-1])) img = draw_boxes_and_label_on_image_cv2(img, class_mapping, boxes) print('Elapsed time = {}'.format(time.time() - st)) cv2.imshow('image', img) result_path = './results_images/{}.png'.format( os.path.basename(img_path).split('.')[0]) print('result saved into ', result_path) cv2.imwrite(result_path, img) cv2.waitKey(0)
def predict_single_image(img_path, model_rpn, model_classifier_only, cfg, class_mapping): st = datetime.datetime.now() img = cv2.imread(img_path) width = img.shape[1] height = img.shape[0] if img is None: print('reading image failed.') exit(0) #============================================================================== # #add a bilateralfilter & histequalize(this part is now moved to another code) # img = cv2.bilateralFilter(img,9,75,75) # img = cv2.equalizeHist(img) # img = cv2.cvtColor(img,cv2.COLOR_GRAY2BGR) #============================================================================== X, ratio = format_img(img, cfg) if K.image_dim_ordering() == 'tf': X = np.transpose(X, (0, 2, 3, 1)) [Y1, Y2, F] = model_rpn.predict(X) #============================================================================== # # get the feature maps and output from the RPN # model_conv1 = Model(inputs=model_rpn.input, # outputs=model_rpn.get_layer('activation_40').output) # feature1 = model_conv1.predict(X) # print(feature1.shape) # #show the feature maps # images_per_row = int(math.sqrt(feature1.shape[-1])) # n_features = feature1.shape[-1] # feature_sizex = feature1.shape[1] # feature_sizey = feature1.shape[2] # n_cols = n_features // images_per_row # display_grid = np.zeros((feature_sizex * n_cols, images_per_row * feature_sizey)) # for col in range(n_cols): # for row in range(images_per_row): # channel_image = feature1[0,:, :,col * images_per_row + row].copy() # # Post-process the feature to make it visually palatable # channel_image -= channel_image.mean() # channel_image /= channel_image.std() # channel_image *= 64 # channel_image += 128 # channel_image = np.clip(channel_image, 0, 255).astype('uint8') # display_grid[col * feature_sizex : (col + 1) * feature_sizex, # row * feature_sizey : (row + 1) * feature_sizey] = channel_image # # # Display the grid # scalex = 1. / feature_sizex # scaley = 1. / feature_sizey # plt.figure(figsize=(scaley * display_grid.shape[1], # scalex * display_grid.shape[0])) # print(display_grid.shape) # plt.title('feature_map') # plt.grid(False) # plt.imshow(display_grid, aspect='equal', cmap='viridis') # plt.show() #============================================================================== # this is result contains all boxes, which is [x1, y1, x2, y2] result = roi_helpers.rpn_to_roi(Y1, Y2, cfg, K.image_dim_ordering(), overlap_thresh=0.7) # convert from (x1,y1,x2,y2) to (x,y,w,h) result[:, 2] -= result[:, 0] result[:, 3] -= result[:, 1] bbox_threshold = 0.8 # apply the spatial pyramid pooling to the proposed regions boxes = dict() for jk in range(result.shape[0] // cfg.num_rois + 1): rois = np.expand_dims(result[cfg.num_rois * jk:cfg.num_rois * (jk + 1), :], axis=0) if rois.shape[1] == 0: break if jk == result.shape[0] // cfg.num_rois: # pad R curr_shape = rois.shape target_shape = (curr_shape[0], cfg.num_rois, curr_shape[2]) rois_padded = np.zeros(target_shape).astype(rois.dtype) rois_padded[:, :curr_shape[1], :] = rois rois_padded[0, curr_shape[1]:, :] = rois[0, 0, :] rois = rois_padded [p_cls, p_regr] = model_classifier_only.predict([F, rois]) # print(p_cls) for ii in range(p_cls.shape[1]): if np.max(p_cls[0, ii, :]) < bbox_threshold or np.argmax( p_cls[0, ii, :]) == (p_cls.shape[2] - 1): continue cls_num = np.argmax(p_cls[0, ii, :]) if cls_num not in boxes.keys(): boxes[cls_num] = [] (x, y, w, h) = rois[0, ii, :] try: (tx, ty, tw, th) = p_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] tx /= cfg.classifier_regr_std[0] ty /= cfg.classifier_regr_std[1] tw /= cfg.classifier_regr_std[2] th /= cfg.classifier_regr_std[3] x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th) except Exception as e: print(e) pass boxes[cls_num].append([ cfg.rpn_stride * x, cfg.rpn_stride * y, cfg.rpn_stride * (x + w), cfg.rpn_stride * (y + h), np.max(p_cls[0, ii, :]) ]) # add some nms to reduce many boxes for cls_num, box in boxes.items(): boxes_nms = roi_helpers.non_max_suppression_fast(box, overlap_thresh=0.4) boxes[cls_num] = boxes_nms print(class_mapping[cls_num] + ":") for b in boxes_nms: b[0], b[1], b[2], b[3] = get_real_coordinates( ratio, b[0], b[1], b[2], b[3], width, height) print('{} prob: {}'.format(b[0:4], b[-1])) img = draw_boxes_and_label_on_image_cv2(img, class_mapping, boxes) print('Elapsed time = {}'.format(datetime.datetime.now() - st)) result_path = './result_images/{}.jpg'.format('.'.join( os.path.basename(img_path).split('.')[:-1])) print('result saved into ', result_path) # cv2.imwrite(result_path, img) #draw groundtruth box all_images = open('mito_simple_label_d+e.txt', 'r') for image in all_images: image = image.strip() [filepath, x1, y1, x2, y2, cls_name] = image.split(',') if img_path.split('\\')[-1] == filepath.split('\\')[-1]: x1 = int(x1) y1 = int(y1) x2 = int(x2) y2 = int(y2) print('ground truth bbox: [{},{},{},{}]'.format(x1, y1, x2, y2)) cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 8) text_label = '{}'.format(cls_name) (ret_val, base_line) = cv2.getTextSize(text_label, cv2.FONT_HERSHEY_COMPLEX, 1, 1) text_org = (x1, y1 - 0) cv2.rectangle(img, (text_org[0] - 1, text_org[1] + base_line - 80), (text_org[0] + ret_val[0] + 120, text_org[1] - ret_val[1] + 40), (0, 0, 255), 1) # this rectangle for fill text rect cv2.rectangle(img, (text_org[0] - 1, text_org[1] + base_line - 80), (text_org[0] + ret_val[0] + 120, text_org[1] - ret_val[1] + 40), (0, 0, 255), -1) cv2.putText(img, text_label, text_org, cv2.FONT_HERSHEY_DUPLEX, 1.5, (255, 255, 255), 3) cv2.imwrite(result_path, img) img = cv2.resize(img, (600, 621), interpolation=cv2.INTER_CUBIC) cv2.imshow('result', img) cv2.waitKey(5000) all_images.close()
def predict_single_image(img_path, model_rpn, model_classifier_only, cfg, class_mapping, Ap): st = time.time() img = cv2.imread(img_path) if img is None: print('reading image failed.') exit(0) X, ratio = format_img(img, cfg) # 如果用的是tensorflow内核,需要将图片的深度变换到最后一位。 if K.image_dim_ordering() == 'tf': X = np.transpose(X, (0, 2, 3, 1)) # 进行区域预测 # get the feature maps and output from the RPN # Y1: anchor包含物体的概率 # Y2:每一个anchor对应的回归梯度 # F:卷积后的特征图 [Y1, Y2, F] = model_rpn.predict(X) # this is result contains all boxes, which is [x1, y1, x2, y2] # result是一个又一个框 result = roi_helpers.rpn_to_roi(Y1, Y2, cfg, K.image_dim_ordering(), overlap_thresh=0.7) # convert from (x1,y1,x2,y2) to (x,y,w,h) result[:, 2] -= result[:, 0] result[:, 3] -= result[:, 1] bbox_threshold = 0.50 # apply the spatial pyramid pooling to the proposed regions boxes = dict() # 分批训练,每一次遍历num_rois个预选框,总共要(result.shape[0] // cfg.num_rois + 1)次 # 遍历所有的预选框,需要注意的是每一次预选框的个数为num_rois for jk in range(result.shape[0] // cfg.num_rois + 1): # 取出num_rois个预选框,并增加一个维度(注:当不满一个num_rois,自动只取到最后一个) rois = np.expand_dims(result[cfg.num_rois * jk:cfg.num_rois * (jk + 1), :], axis=0) # 没框 if rois.shape[1] == 0: break # 当最后一次取不足num_rois个预选框时,补第一个框使其达到num_rois个 if jk == result.shape[0] // cfg.num_rois: # pad R # 得到当前rois的shape curr_shape = rois.shape # 得到目标rois的shape target_shape = (curr_shape[0], cfg.num_rois, curr_shape[2]) # 创建一个元素都为0的目标rois rois_padded = np.zeros(target_shape).astype(rois.dtype) # 将目标rois前面用现在的rois填充 rois_padded[:, :curr_shape[1], :] = rois # 剩下的用第一个框填充 rois_padded[0, curr_shape[1]:, :] = rois[0, 0, :] rois = rois_padded # 进行类别预测和边框回归 # p_cls:该边框属于某一类别的概率 # p_regr:每一个类别对应的边框回归梯度 # F:rpn网络得到的卷积后的特征图 # rois:处理得到的区域预选框 [p_cls, p_regr] = model_classifier_only.predict([F, rois]) # 遍历每一个预选框(p_cls.shape[1]预选框的个数) for ii in range(p_cls.shape[1]): # 如果该预选框的最大概率小于设定的阈值(即预测的肯定程度大于一定的值,我们才认为这次的类别的概率预测是有效的) # 或者最大的概率出现在背景上,则认为这个预选框是无效的,进行下一次预测。 # p_cls[0, ii, :]类 if np.max(p_cls[0, ii, :]) < bbox_threshold or np.argmax( p_cls[0, ii, :]) == (p_cls.shape[2] - 1): continue # 不属于上面的两种情况,取最大的概率点处为此边框的类别得到其名称。 cls_num = np.argmax(p_cls[0, ii, :]) # 创建两个list,用于存放不同类别对应的边框和概率 if cls_num not in boxes.keys(): # cls_num类别对应的编号 boxes[cls_num] = [] # 得到该预选框的信息 (x, y, w, h) = rois[0, ii, :] # ii是框 try: # 根据类别对应的编号得到该类的边框回归梯度 (tx, ty, tw, th) = p_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] # 对回归梯度进行规整化 tx /= cfg.classifier_regr_std[0] ty /= cfg.classifier_regr_std[1] tw /= cfg.classifier_regr_std[2] th /= cfg.classifier_regr_std[3] # 对预测的边框进行修正 x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th) except Exception as e: print(e) pass # 向相应的类里添加信息。 # cfg.rpn_stride,边框的预测都是在特征图上进行的,要将其映射到规整后的原图上。是16吧? boxes[cls_num].append([ cfg.rpn_stride * x, cfg.rpn_stride * y, cfg.rpn_stride * (x + w), cfg.rpn_stride * (y + h), np.max(p_cls[0, ii, :]) ]) print(boxes) # add by me result_txt_filename = "./predict_labels/" + os.path.basename( img_path).split('.')[0] + ".txt" with open(result_txt_filename, 'w') as f: for cls_num, box in boxes.items(): # add some nms to reduce many boxes # 进行NMS boxes_nms = roi_helpers.non_max_suppression_fast( box, overlap_thresh=0.5) boxes[cls_num] = boxes_nms print(class_mapping[cls_num] + ":") accall = 0 for b in boxes_nms: b[0], b[1], b[2], b[3] = get_real_coordinates( ratio, b[0], b[1], b[2], b[3]) print('{} prob: {}'.format(b[0:4], b[-1])) accall += b[-1] #print("accall:".format(accall)) f.write('{} {} {} {} {} {}\n'.format(class_mapping[cls_num], b[-1], b[0], b[1], b[2], b[3])) print("accall:{}".format(accall)) avg = accall / len(boxes_nms) print("{} acc:{}".format(class_mapping[cls_num], avg)) Ap[cls_num].append(avg) img = draw_boxes_and_label_on_image_cv2(img, class_mapping, boxes) print('Elapsed time = {}'.format(time.time() - st)) # cv2.imshow('image', img) # 显示图片 # 注释掉 result_path = './predict_images/{}.png'.format( os.path.basename(img_path).split('.')[0]) print('result saved into ', result_path) cv2.imwrite(result_path, img) # 显示图片的操作