def ctpn(sess, net, image_path): timer = Timer() timer.tic() img = cv2.imread(image_path) img_name = image_path.split('/')[-1] # 将图像进行resize并返回其缩放大小 img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) # 送入网络得到1000个得分,1000个bbox cls, scores, boxes = test_ctpn(sess, net, img) print('cls, scores, boxes', cls.shape, scores.shape, boxes.shape) # img_re = img # for i in range(np.shape(boxes)[0]): # if cls[i] == 1: # color = (255, 0, 0) # else: # color = (0, 255, 0) # cv2.rectangle(img_re, (boxes[i][0],boxes[i][1]),(boxes[i][2],boxes[i][3]),color,1) # cv2.imwrite(os.path.join('./data/proposal_res', img_name), img_re) handwritten_filter = np.where(cls == 1)[0] handwritten_scores = scores[handwritten_filter] handwritten_boxes = boxes[handwritten_filter, :] print_filter = np.where(cls == 2)[0] print_scores = scores[print_filter] print_boxes = boxes[print_filter, :] handwritten_detector = TextDetector() handwritten_detector = TextDetector() print('print_filter', np.array(print_filter).shape) print('handwritten_boxes, handwritten_scores', handwritten_boxes.shape, handwritten_scores[:, np.newaxis].shape) filted_handwritten_boxes = handwritten_detector.detect( handwritten_boxes, handwritten_scores[:, np.newaxis], img.shape[:2]) filted_print_boxes = handwritten_detector.detect( print_boxes, print_scores[:, np.newaxis], img.shape[:2]) # boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) draw_boxes(img, filted_handwritten_boxes, (255, 0, 0)) draw_boxes(img, filted_print_boxes, (0, 255, 0)) img = cv2.resize(img, None, None, fx=1.0 / scale, fy=1.0 / scale, interpolation=cv2.INTER_LINEAR) cv2.imwrite(os.path.join("data/results", img_name), img) timer.toc() print(('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
def ctpn_area(sess, net, image_name, dst, draw_img=False, show_area=False, area_min=-0.1, area_max=1.1): #timer = Timer() #timer.tic() img = cv2.imread(image_name) if img is None: return 0.0 img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(sess, net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) ret = compute_area(img, image_name, boxes, scale, dst, draw_img=draw_img, show_area=show_area, area_min=area_min, area_max=area_max) #timer.toc() #print(('Detection took {:.3f}s for ' # '{:d} object proposals').format(timer.total_time, boxes.shape[0])) return ret
def img_read(im_name): print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') print(('Demo for {:s}'.format(im_name))) img = open_cv.imread(im_name) if img is None: print('Img not exist') return img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) blobs, im_scales = _get_blobs(img, None) if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred], feed_dict={input_img: blobs['data']}) rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES) # print('img_read', blobs) scores = rois[:, 0] boxes = rois[:, 1:5] / im_scales[0] text_detector = TextDetector() boxes = text_detector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) draw_boxes(img, im_name, boxes, scale)
def ctpn(sess, net, image_name): timer = Timer() timer.tic() img = cv2.imread(image_name) height, width = img.shape[:2] img = img[int(2 * height / 3.0):height, :] img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(sess, net, img) # for box in boxes: # color = (0, 255, 0) # cv2.line(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[1])), color, 2) # cv2.line(img, (int(box[0]), int(box[1])), (int(box[0]), int(box[3])), color, 2) # cv2.line(img, (int(box[2]), int(box[1])), (int(box[2]), int(box[3])), color, 2) # cv2.line(img, (int(box[0]), int(box[3])), (int(box[2]), int(box[3])), color, 2) # base_name = image_name.split('/')[-1] # cv2.imwrite("data/results/test_"+base_name, img) # draw_boxes(img, image_name, boxes, scale) # print(boxes) # assert 0 textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) draw_boxes(img, image_name, boxes, scale) timer.toc() print(('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
def ctpn(input_path_img, output_path_label, output_path_img, img_section): print(('CTPN for {:s}'.format(input_path_img))) img = cv2.imread(input_path_img) img = img[:img_section[0], :img_section[1]] img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) blobs, im_scales = _get_blobs(img, None) if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred], feed_dict={input_img: blobs['data']}) rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES) scores = rois[:, 0] boxes = rois[:, 1:5] / im_scales[0] textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) draw_boxes(img, boxes, scale, output_path_label, output_path_img) print('*** OCR Complete ***')
def test(im_name, sess, output_cls_prob, output_box_pred, input_img, keras_model): # im_name='test.jpg'###测试图片名字 print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') print(('Demo for {:s}'.format(im_name))) img = cv2.imread(im_name) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) blobs, im_scales = _get_blobs(img, None) if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred], feed_dict={input_img: blobs['data']}) rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES) scores = rois[:, 0] boxes = rois[:, 1:5] / im_scales[0] textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) predition_result = draw_boxes(img, im_name, boxes, scale, keras_model) return predition_result
def test_net(sess, net, imdb, weights_filename): timer = Timer() timer.tic() np.random.seed(cfg.RNG_SEED) """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) output_dir = get_output_dir(imdb, weights_filename) # timers _t = {'im_detect': Timer(), 'misc': Timer()} # all_boxes = [] all_boxes = [[[] for _ in range(imdb.num_classes)] for _ in range(num_images)] print(all_boxes) for i in range(num_images): print('***********', imdb.image_path_at(i)) img = cv2.imread(imdb.image_path_at(i)) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(sess, net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) print(('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0])) boxes = check_unreasonable_box(boxes, scale) all_boxes[i][1] += boxes det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) imdb.evaluate_detections(all_boxes, output_dir) timer.toc()
def ctpn(sess, net, image_name, save_path1, save_path2): timer = Timer() timer.tic() #读取图片 img = cv2.imread(image_name) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) #灰度化处理 #img2 = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY) #img2 = cv2.cvtColor(img2,cv2.COLOR_GRAY2RGB) # base_name = im_name.split('\\')[-1] # cv2.imwrite(os.path.join("data/results2", base_name), img2) scores, boxes = test_ctpn(sess, net, img) #后处理过程,detect包含过滤和合并 textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) draw_boxes2(img, boxes, image_name, save_path2, scale) draw_boxes(img, boxes, image_name, save_path1, scale) #后处理过程,detect2只过滤小文本框 # textdetector = TextDetector() # boxes = textdetector.detect2(boxes, scores[:, np.newaxis], img.shape[:2]) # draw_boxes3(img, boxes,image_name, scale) timer.toc() print(('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
def ctpn(sess, net, image_name): global true_text, true_non_text, false_text, false_non_text base_name = image_name.split('/')[-1] label_name = image_name.split('/')[-2] img = cv2.imread(image_name) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(sess, net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) print(len(boxes)) with open('boxes.txt', 'w') as f: f.write(str(len(boxes))) if len(boxes) > 0: if (label_name == 'non_text'): false_non_text += 1 else: true_text += 1 cv2.imwrite(os.path.join('data/results/text', base_name), img) else: if (label_name == 'text'): false_text += 1 else: true_non_text += 1 cv2.imwrite(os.path.join('data/results/non_text', base_name), img)
def ctpn(sess, net, image_name, boxlabel): timer = Timer() timer.tic() img = cv2.imread(image_name) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(sess, net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) img = draw_boxes(img, image_name, boxes, scale, None) boxlabel2 = np.transpose( np.array([ boxlabel[:, 0], boxlabel[:, 1], boxlabel[:, 2], boxlabel[:, 1], boxlabel[:, 0], boxlabel[:, 3], boxlabel[:, 2], boxlabel[:, 3], np.ones(len(boxlabel)) ])) draw_boxes(img, image_name, boxlabel2, 1, (0, 0, 0)) timer.toc() print(('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0])) boxes = boxes / scale return boxes
def text_detection(img): # im_name = "test_images/0044000030667_1.jpg" # print(('Demo for {:s}'.format(im_name))) # img = cv2.imread(im_name) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) blobs, im_scales = _get_blobs(img, None) if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) cls_prob, box_pred = obj.get_text_classification( blobs ) # sess.run([output_cls_prob, output_box_pred], feed_dict={input_img: blobs['data']}) rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES) scores = rois[:, 0] boxes = rois[:, 1:5] / im_scales[0] textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) # draw_boxes(img, im_name, boxes, scale) return return_blobs_tuple(boxes, scale)
def main2(self, image_array, im_name): # for im_name in im_names: for i in range(1): print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ I"m here') # print(('Demo for {:s}'.format(im_name))) # img = cv2.imread(im_name) img = image_array img, scale = self.resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) blobs, im_scales = _get_blobs(img, None) if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) cls_prob, box_pred = self.sess.run( [self.output_cls_prob, self.output_box_pred], feed_dict={self.input_img: blobs['data']}) rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES) scores = rois[:, 0] boxes = rois[:, 1:5] / im_scales[0] textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) self.draw_boxes(img, im_name, boxes, scale)
def ctpn(sess, net, image_name): timer = Timer() timer.tic() img = cv2.imread(image_name) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) #将OPENCV图像转换为PIL图像, pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) #求图片清晰度 imageVar = cv2.Laplacian(img, cv2.CV_64F).var() if imageVar <= 5000: pil_img = ImageEnhance.Sharpness(pil_img).enhance(3.0) #将PIL图像转换为opencv图像 img = cv2.cvtColor(np.asarray(pil_img), cv2.COLOR_RGB2BGR) scores, boxes = test_ctpn(sess, net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) draw_boxes(img, image_name, boxes, scale) timer.toc() print(('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
def run_image(self, img_arr): img = np.array(img_arr) img, scale = self.resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) blobs, im_scales = _get_blobs(img, None) if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) cls_prob, box_pred = self.sess.run( [self.output_cls_prob, self.output_box_pred], feed_dict={self.input_img: blobs['data']}) rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES) scores = rois[:, 0] boxes = rois[:, 1:5] / im_scales[0] textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) result = self.draw_boxes(img, boxes, scale) return result
def detect_text_ctpn(image_bytes: bytes, sess: tf.Session) -> (np.ndarray, np.ndarray, float): """ Given an image and an active tensorflow session loaded with config/model, run the model to identify regions of interest (i.e. regions that are likely to contain text). :param image_bytes: image to detect text/perform ocr on :param sess: active tensorflow session with graph and config loaded """ # TODO: Read more about this section, up to TextDetector() # Retrieve tensors from graph input_img = sess.graph.get_tensor_by_name('Placeholder:0') output_cls_prob = sess.graph.get_tensor_by_name('Reshape_2:0') output_box_pred = sess.graph.get_tensor_by_name( 'rpn_bbox_pred/Reshape_1:0') # Process image img_array = np.frombuffer(image_bytes, np.uint8) img = cv2.imdecode(img_array, cv2.IMREAD_ANYCOLOR) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) img = rotate(img, skew_angle(image=img)) blobs, im_scales = _get_blobs(img, None) if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred], feed_dict={input_img: blobs['data']}) rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES) scores = rois[:, 0] boxes = rois[:, 1:5] / im_scales[0] # apply nms and retain only high scoring boxes/proposals textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) # crop regions of interest indicated by boxes cropped_images = draw_boxes(img, boxes) # for each region of interest, perform ocr mystrings = [] for cropped in cropped_images: mystrings.append(ocr(cropped)) # return collection of text extracted from the image return mystrings
def ctpn_batch(imglist): cfg_from_file('./ctpn/text.yml') # init session config = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=config) with gfile.FastGFile('./ctpn/data/ctpn.pb', 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) sess.graph.as_default() tf.import_graph_def(graph_def, name='') sess.run(tf.global_variables_initializer()) input_img = sess.graph.get_tensor_by_name('Placeholder:0') output_cls_prob = sess.graph.get_tensor_by_name('Reshape_2:0') output_box_pred = sess.graph.get_tensor_by_name( 'rpn_bbox_pred/Reshape_1:0') stroutput = [] imgoutput = [] for i in range(len(imglist)): img = imglist[i] #name = imgnames[i] img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) blobs, im_scales = _get_blobs(img, None) if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred], feed_dict={input_img: blobs['data']}) rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES) scores = rois[:, 0] boxes = rois[:, 1:5] / im_scales[0] textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) strlist, img = process_boxes(img, boxes, scale) stroutput.append(strlist) imgoutput.append(img) # cv2.imshow("detection", img) # while (1): # if cv2.waitKey(1) & 0xFF == ord('q'): # break # cv2.destroyWindow("detection") # print(str(len(strlist)) + "个框") # print(strlist) return stroutput, imgoutput
def ctpn(img): img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(sess, net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) return scores, boxes, img, scale
def ctpn(self, image_name): img = cv2.imread(image_name) img, scale = self.resize_im(img, scale=600, max_scale=1000) # 参考ctpn论文 scores, boxes = test_ctpn(self.sess, self.net, img) # ctpn识别实例 textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) min_y_sort_list, base_name = self.get_coordinates( img, image_name, boxes, scale) return min_y_sort_list, base_name
def ctpn(sess, net, image_name): timer = Timer() timer.tic() img = cv2.imread(image_name) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(sess, net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) draw_boxes(img, image_name, boxes, scale) timer.toc() print(('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
def predict(self, image_name): img = cv2.imread(image_name) img, scale = self.resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(self.sess, self.net, img) # print('scores', scores) # mask = scores > 0.9 # boxes = boxes[mask] # print('length of boxes', len(boxes)) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) return img, boxes, scale
def ctpn(sess, net, image_name): timer = Timer() timer.tic() img = cv2.imread(image_name) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(sess, net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) draw_boxes(img, image_name, boxes, scale) timer.toc()
def ctpn(sess, net, img): timer = Timer() timer.tic() img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(sess, net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) sort_index = np.argsort(boxes[:, -1])[::-1] boxes = boxes[sort_index] im, bboxes = draw_boxes(img, boxes, scale) timer.toc() print(('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0])) return im, bboxes
def ctpn(sess, net, image_name, model): img = cv2.imread(image_name) #r = image_to_binary(img) #noise = np.ones(img.shape[:2],dtype="uint8") * 125 #img = cv2.merge((r+noise, r, noise)) img, scale = resize_im(img, scale=600, max_scale=1000) # 参考ctpn论文 print('ctpn', img.shape) scores, boxes = test_ctpn(sess, net, img) # ctpn识别实例 textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) get_coordinates(img, image_name, boxes, scale, model)
def ocr(): # get data jsonData = request.get_json() ori_file = jsonData['path'] # init session cfg_from_file('ctpn/text.yml') config = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=config) with gfile.FastGFile('data/ctpn.pb', 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) sess.graph.as_default() tf.import_graph_def(graph_def, name='') sess.run(tf.global_variables_initializer()) input_img = sess.graph.get_tensor_by_name('Placeholder:0') output_cls_prob = sess.graph.get_tensor_by_name('Reshape_2:0') output_box_pred = sess.graph.get_tensor_by_name( 'rpn_bbox_pred/Reshape_1:0') im_names = glob.glob(os.path.join(ori_file)) for im_name in im_names: img = cv2.imread(im_name) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) blobs, im_scales = _get_blobs(img, None) if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred], feed_dict={input_img: blobs['data']}) rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES) scores = rois[:, 0] boxes = rois[:, 1:5] / im_scales[0] textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) im_dict = draw_boxes(img, im_name, boxes, scale) return Response(json.dumps(im_dict), mimetype='application/json')
def ctpn(cv_image): os.chdir(CTPN_DIR) with ctpn_sess.as_default(): img = cv_image img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(ctpn_sess, ctpn_net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) boxes[:, 0:8] /= scale os.chdir(ROOT_DIR) return boxes
class TextBoxDetector(): def __init__(self,model_path,gpu_fraction=None): self.session = None if gpu_fraction: self.gpu_fraction = gpu_fraction else: self.gpu_fraction = float(os.environ.get('GPU_MEMORY', 0.20)) self.model_path = os.path.dirname(str(model_path.encode('utf-8'))) def load(self): logging.info('Creating networks and loading parameters') cfg_from_file(os.path.join(os.path.dirname(__file__),'text.yml')) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=self.gpu_fraction) config = tf.ConfigProto(allow_soft_placement=True,gpu_options=gpu_options) self.session = tf.Session(config=config) self.net = get_network("VGGnet_test") self.textdetector = TextDetector() saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(self.model_path) saver.restore(self.session, ckpt.model_checkpoint_path) def detect(self,image_path): if self.session is None: self.load() regions = [] img = cv2.imread(image_path) old_h, old_w, channels = img.shape img, scale = self.resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) new_h, new_w, channels = img.shape mul_h, mul_w = float(old_h) / float(new_h), float(old_w) / float(new_w) scores, boxes = test_ctpn(self.session, self.net, img) boxes = self.textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) for box in boxes: left, top = int(box[0]), int(box[1]) right, bottom = int(box[6]), int(box[7]) score = float(box[8]) left, top, right, bottom = int(left * mul_w), int(top * mul_h), int(right * mul_w), int(bottom * mul_h) r = {'score':float(score),'y':top,'x':left,'w':right - left,'h':bottom - top,} regions.append(r) return regions def resize_im(self, im, scale, max_scale=None): f=float(scale)/min(im.shape[0], im.shape[1]) if max_scale!=None and f*max(im.shape[0], im.shape[1])>max_scale: f=float(max_scale)/max(im.shape[0], im.shape[1]) return cv2.resize(im, None,None, fx=f, fy=f,interpolation=cv2.INTER_LINEAR), f
def ctpn(sess, net, frame, draw): # timer = Timer() # timer.tic() img, scale = resize_im( frame, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(sess, net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) buf = img.copy() crop = crop_image(buf, boxes, scale) # timer.toc() if draw is 1: draw_boxes(img, boxes, scale) return crop
def detection(input_image): #if __name__ == '__main__': ''' if os.path.exists("data/results/"): shutil.rmtree("data/results/") os.makedirs("data/results/") cfg_from_file('ctpn/text.yml') ''' # init session config = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=config) with gfile.FastGFile('data/ctpn.pb', 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) sess.graph.as_default() tf.import_graph_def(graph_def, name='') sess.run(tf.global_variables_initializer()) input_img = sess.graph.get_tensor_by_name('Placeholder:0') output_cls_prob = sess.graph.get_tensor_by_name('Reshape_2:0') output_box_pred = sess.graph.get_tensor_by_name('rpn_bbox_pred/Reshape_1:0') #im_names = glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.png')) + \ # glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.jpg')) #input_image = "data/demo/medication-pills-package-3D-model_0.jpg" img = cv2.imread(input_image) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) blobs, im_scales = _get_blobs(img, None) if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred], feed_dict={input_img: blobs['data']}) rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES) scores = rois[:, 0] boxes = rois[:, 1:5] / im_scales[0] textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) draw_boxes(img, input_image, boxes, scale)
def ctpn(img): timer = Timer() timer.tic() img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(sess, net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) timer.toc() #print("\n----------------------------------------------") #print(('Detection took {:.3f}s for ' # '{:d} object proposals').format(timer.total_time, boxes.shape[0])) return scores, boxes, img, scale, timer.total_time, boxes.shape[0]
def decode_ctpn_output(ctpn_output, im_scales, bbox_scale, img_resized_shape): rois = ctpn_output[0] scores = rois[:, 0] if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" # print(im_scales[0]) boxes = rois[:, 1:5] / im_scales[0] textdetector = TextDetector() # 得到是resize图像后的bbox text_proposals, scores, resized_boxes = textdetector.detect( boxes, scores[:, np.newaxis], img_resized_shape[:2]) # 原图像的绝对bbox位置 original_bbox, scores = resize_bbox(resized_boxes, bbox_scale) bbox_connector = BboxConnector(original_bbox) res_bbox = bbox_connector.start() return res_bbox
sess.graph.as_default() tf.import_graph_def(graph_def, name='') sess.run(tf.global_variables_initializer()) input_img = sess.graph.get_tensor_by_name('Placeholder:0') output_cls_prob = sess.graph.get_tensor_by_name('Reshape_2:0') output_box_pred = sess.graph.get_tensor_by_name('rpn_bbox_pred/Reshape_1:0') im_names = glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.png')) + \ glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.jpg')) for im_name in im_names: print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') print(('Demo for {:s}'.format(im_name))) img = cv2.imread(im_name) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) blobs, im_scales = _get_blobs(img, None) if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred], feed_dict={input_img: blobs['data']}) rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES) scores = rois[:, 0] boxes = rois[:, 1:5] / im_scales[0] textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) draw_boxes(img, im_name, boxes, scale)