def detect(self, img): ''' :param img: :return: final result: ''' img, scale = self.resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) blobs, im_scales = _get_blobs(img, None) if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) cls_prob, box_pred = self.sess.run( [self.output_cls_prob, self.output_box_pred], feed_dict={self.input_img: blobs['data']}) rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES) scores = rois[:, 0] boxes = rois[:, 1:5] / im_scales[0] textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) return self.draw_boxes(img, boxes, scale)
def ctpn(img): timer = Timer() timer.tic() img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(sess, net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) timer.toc() print("\n----------------------------------------------") print(('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0])) return scores, boxes, img, scale
def ctpn(self, image_name): timer = Timer() timer.tic() img = cv2.imread(image_name) img, scale = self.resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(self.sess, self.net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) timer.toc() print(('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0])) return self.draw_boxes(img, image_name, boxes, scale)
def predict(): clearData() cfg_from_file(r'./ctpn/ctpn/text.yml') # init session config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.per_process_gpu_memory_fraction = 0.4 sess = tf.Session(config=config) with gfile.FastGFile(r'.\ctpn\ctpn\data\ctpn.pb', 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) sess.graph.as_default() tf.import_graph_def(graph_def, name='') sess.run(tf.global_variables_initializer()) input_img = sess.graph.get_tensor_by_name('Placeholder:0') output_cls_prob = sess.graph.get_tensor_by_name('Reshape_2:0') output_box_pred = sess.graph.get_tensor_by_name( 'rpn_bbox_pred/Reshape_1:0') im_names = os.listdir('./demo/test_images') index = 0 for im_name in im_names: print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') print(('finding card number for {:s}'.format(im_name))) img = cv2.imread('./demo/test_images' + '//' + im_name) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) blobs, im_scales = _get_blobs(img, None) if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred], feed_dict={input_img: blobs['data']}) rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES) scores = rois[:, 0] boxes = rois[:, 1:5] / im_scales[0] textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) draw_boxes(img, im_name, boxes, scale, index) index += 1 print('recognizing card number:') crnn = CRNN(image_shape=crnn_config.image_shape, min_len=crnn_config.min_len, max_len=crnn_config.max_len, lstm_hidden=crnn_config.lstm_hidden, pool_size=crnn_config.pool_size, learning_decay_rate=crnn_config.learning_decay_rate, learning_rate=crnn_config.learning_rate, learning_decay_steps=crnn_config.learning_decay_steps, mode=crnn_config.mode, dict=crnn_config.dict, is_training=True, train_label_path=crnn_config.predict_label_path, train_images_path=crnn_config.predict_images_path, charset_path=crnn_config.charset_path) result = crnn.predict(epoch=crnn_config.epoch, batch_size=crnn_config.batch_size, train_images_path=crnn_config.cardNum_path, train_label_path=crnn_config.predict_label_path, restore=True, fonts=crnn_config.fonts, logs_path=crnn_config.logs_path, models_path=crnn_config.models_path) return result
time5 = time.time() for im_name in im_names: print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') print(('Demo for {:s}'.format(im_name))) img = cv2.imread(im_name) img, scale = resize_im(img, scale=600, max_scale=1200) ##缩放至合适比例## blobs, im_scales = _get_blobs(img, None) ##得到输入图片和缩放系数## if True: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred], feed_dict={input_img: blobs['data']}) rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES) scores = rois[:, 0] boxes = rois[:, 1:5] / im_scales[0] textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) generate_detect_box(img, im_name, boxes, scale) #sort_box() time6 = time.time() print("text detect", time6 - time5) time7 = time.time()