def text_detect(text_detector, image_path, img_type): print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "Image: %s"%image_path if img_type == "others": return [],0 im=cv2.imread(image_path) im_small, f=resize_im(im, Config.SCALE, Config.MAX_SCALE) timer = Timer() timer.tic() text_lines = text_detector.detect(im_small) text_lines = text_lines / f # project back to size of original image text_lines = refine_boxes(im, text_lines, expand_pixel_len = Config.DILATE_PIXEL, pixel_blank = Config.BREATH_PIXEL, binary_thresh=Config.BINARY_THRESH) text_area_ratio = calc_area_ratio(text_lines, im.shape) print "Number of the detected text lines: %s" % len(text_lines) print "Detection Time: %f" % timer.toc() print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" if Config.DEBUG_SAVE_BOX_IMG: im_with_text_lines = draw_boxes(im, text_lines, is_display=False, caption=image_path, wait=False) if im_with_text_lines is not None: cv2.imwrite(image_path+'_boxes.jpg', im_with_text_lines) return text_lines, text_area_ratio
def textPredict(input_path): #CPU mode setting if len(sys.argv)>1 and sys.argv[1]=="--no-gpu": caffe.set_mode_cpu() else: caffe.set_mode_gpu() caffe.set_device(cfg.TEST_GPU_ID) model_path = "../models/" # initialize the detectors NET_DEF_FILE = model_path + "deploy.prototxt" MODEL_FILE = model_path + "ctpn_trained_model.caffemodel" text_proposals_detector=TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE)) text_detector=TextDetector(text_proposals_detector) im=cv2.imread(input_path) #h = im.shape[0] #w = im.shape[1] im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE) text_lines=text_detector.detect(im) return text_lines,f
def detect_text_boxes(video_pk, cpu_mode=False): """ Detect Text Boxes in frames for a video using CTPN, must be run in dva_ctpn container :param detector_pk :param video_pk :return: """ setup_django() from dvaapp.models import Region, Frame from django.conf import settings from PIL import Image import sys video_pk = int(video_pk) sys.path.append('/opt/ctpn/CTPN/tools/') sys.path.append('/opt/ctpn/CTPN/src/') from cfg import Config as cfg from other import resize_im, CaffeModel import cv2, caffe from detectors import TextProposalDetector, TextDetector NET_DEF_FILE = "/opt/ctpn/CTPN/models/deploy.prototxt" MODEL_FILE = "/opt/ctpn/CTPN/models/ctpn_trained_model.caffemodel" if cpu_mode: # Set this to true for CPU only mode caffe.set_mode_cpu() else: caffe.set_mode_gpu() caffe.set_device(cfg.TEST_GPU_ID) text_proposals_detector = TextProposalDetector( CaffeModel(NET_DEF_FILE, MODEL_FILE)) text_detector = TextDetector(text_proposals_detector) for f in Frame.objects.all().filter(video_id=video_pk): path = "{}/{}/frames/{}.jpg".format(settings.MEDIA_ROOT, video_pk, f.frame_index) im = cv2.imread(path) old_h, old_w, channels = im.shape im, _ = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) new_h, new_w, channels = im.shape mul_h = float(old_h) / float(new_h) mul_w = float(old_w) / float(new_w) text_lines = text_detector.detect(im) for k in text_lines: left, top, right, bottom, score = k left, top, right, bottom = int(left * mul_w), int( top * mul_h), int(right * mul_w), int(bottom * mul_h) r = Region() r.region_type = r.DETECTION r.confidence = int(100.0 * score) r.object_name = "CTPN_TEXTBOX" r.y = top r.x = left r.w = right - left r.h = bottom - top r.frame_id = f.pk r.video_id = video_pk r.save() right = r.w + r.x bottom = r.h + r.y img = Image.open(path) img2 = img.crop((left, top, right, bottom)) img2.save("{}/{}/detections/{}.jpg".format(settings.MEDIA_ROOT, video_pk, r.pk))
def text_detect(args, text_detector, image_path): print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "Image: %s" % image_path im = cv2.imread(image_path) im_small, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) timer = Timer() timer.tic() text_lines = text_detector.detect(im_small) text_lines = text_lines / f # project back to size of original image text_lines = refine_boxes(im, text_lines) text_lines = rank_boxes(text_lines) print "Number of the detected text lines: %s" % len(text_lines) print "Detection Time: %f" % timer.toc() print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" if args.SAVE_IMAGE_WITH_BOX: im_with_text_lines = draw_boxes(im, text_lines, is_display=False, caption=image_path, wait=False) if im_with_text_lines is not None: cv2.imwrite(image_path + '_boxes.jpg', im_with_text_lines) return im, text_lines
def ctpn(img): """ text box detect """ scale, max_scale = Config.SCALE,Config.MAX_SCALE img,f = resize_im(img,scale=scale,max_scale=max_scale) scores, boxes = test_ctpn(sess, net, img) return scores, boxes,img
def get_text_lines(self, im, NET_DEF_FILE, MODEL_FILE): # initialize the detectors text_proposals_detector = TextProposalDetector( CaffeModel(NET_DEF_FILE, MODEL_FILE)) text_detector = TextDetector(text_proposals_detector) im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) text_lines = text_detector.detect(im) return text_lines / f
def process(self, image, bbox): im_crop = image[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2]), :] im_scale, f = resize_im(im_crop, cfg.SCALE, cfg.MAX_SCALE) # print(np.array(im).shape) text_lines = self.text_detector.detect(im_scale) return im_scale, text_lines
def getTextRec(text_detector, im): im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) #generator the copy tmp = im.copy() text_lines = text_detector.detect(im) imsrc, text_recs = draw_boxes(tmp, text_lines, caption='im_name', wait=True) return tmp, imsrc, text_recs
def execute(self, data, batch_size): ret = [] for i in range(batch_size): img_array = np.asarray(bytearray(data[i].read()), dtype=np.uint8) im = cv2.imdecode(img_array, -1) im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) text_lines = self.text_detector.detect(im) ret_val = str(text_lines) + '\n' ret.append(ret_val) return ret
def getCharBlock(text_detector, im): im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) tmp = im.copy() #timer=Timer() #timer.tic() text_lines = text_detector.detect(im) #print "Number of the detected text lines: %s"%len(text_lines) #print "Time: %f"%timer.toc() text_recs = draw_boxes(tmp, text_lines, caption='im_name', wait=True) return tmp, text_recs
def getCharBlock(text_detector,im): im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE) cv2.imshow("src", im) tmp = im.copy() #timer=Timer() #timer.tic() text_lines=text_detector.detect(im) #print "Number of the detected text lines: %s"%len(text_lines) #print "Time: %f"%timer.toc() text_recs = draw_boxes(tmp, text_lines, caption='im_name', wait=True) return tmp,text_recs
def detectText(self, image): """ Detects text from the image given its path Returns a list of bounding boxes """ if os.path.exists(image): img = cv2.imread(image) self.timer.tic() im, f = resize_im(img, cfg.SCALE, cfg.MAX_SCALE) text_lines = self.text_detector.detect(im) print("Time: %f" % self.timer.toc()) return text_lines, f else: print("Image not found")
def detect(self,image_path): if self.session is None: self.load() regions = [] im = cv2.imread(image_path) old_h, old_w, channels = im.shape im, _ = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) new_h, new_w, channels = im.shape mul_h = float(old_h) / float(new_h) mul_w = float(old_w) / float(new_w) text_lines = self.session.detect(im) for k in text_lines: left, top, right, bottom, score = k left, top, right, bottom = int(left * mul_w), int(top * mul_h), int(right * mul_w), int(bottom * mul_h) r = {'score':float(score),'y':top,'x':left,'w':right - left,'h':bottom - top,} regions.append(r) return regions
def text_detec(img_url): caffe.set_mode_gpu() caffe.set_device(cfg.TEST_GPU_ID) # initialize the detectors text_proposals_detector = TextProposalDetector( CaffeModel(NET_DEF_FILE, MODEL_FILE)) text_detector = TextDetector(text_proposals_detector) im = cv2.imread(img_url) timer.tic() im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) text_lines = text_detector.detect(im) obj_num = len(text_lines) print "Number of the detected text lines: %s" % len(text_lines) print "Time: %f" % timer.toc() boxstr = u'' count = 0 #http://192.168.7.37:8393/static/jz66f1d49d97d048fe9e4a62004199d0b2_1_for_trail.jpg print text_lines for bbox in text_lines: print bbox count += 1 boxstr += "text[%d]:[%f,%f,%f,%f]<br/>" % (count, bbox[0], bbox[1], bbox[2], bbox[3]) im_name = img_url.split('/')[-1] im_name.replace("?", '_') im_name.replace("%", '_') im_name.replace("&", '_') im_name.replace("=", '_') local_url = img_url write_path = "/data1/mingmingzhao/data_sets/test/text_detect/text_detect_%s" % ( local_url.split('/')[-1]) print "write_path:" + write_path im_with_text_lines = draw_boxes_zmm(im, text_lines, caption=write_path, wait=False) server_url = "http://192.168.7.37:8393/static/text_detect/%s" % ( write_path.split('/')[-1]) print "server_url:" + server_url return boxstr, server_url, count
def detect(self, image_path): if self.session is None: self.load() regions = [] im = cv2.imread(image_path) old_h, old_w, channels = im.shape im, _ = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) new_h, new_w, channels = im.shape mul_h = float(old_h) / float(new_h) mul_w = float(old_w) / float(new_w) text_lines = self.session.detect(im) for k in text_lines: left, top, right, bottom, score = k left, top, right, bottom = int(left * mul_w), int( top * mul_h), int(right * mul_w), int(bottom * mul_h) r = { 'score': float(score), 'y': top, 'x': left, 'w': right - left, 'h': bottom - top, } regions.append(r) return regions
def process(self, image, bbox): def resize_im(im, scale, max_scale=None): f = float(scale) / min(im.shape[0], im.shape[1]) if max_scale != None and f * max(im.shape[0], im.shape[1]) > max_scale: f = float(max_scale) / max(im.shape[0], im.shape[1]) return cv2.resize(im, None, None, fx=f, fy=f, interpolation=cv2.INTER_LINEAR), f im_crop = image[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2]), :] img, scale = resize_im(im_crop, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(self.session, self.net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) return img, boxes, scale
def detect(self, filepath): im = cv2.imread(filepath) im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) self.text_lines = self.text_detector.detect(im) return self.text_lines
text_proposals_detector = TextProposalDetector( CaffeModel(NET_DEF_FILE, MODEL_FILE)) text_detector = TextDetector(text_proposals_detector) demo_imnames = os.listdir(DEMO_IMAGE_DIR) timer = Timer() for im_name in demo_imnames: print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "Image: %s" % im_name im_file = osp.join(DEMO_IMAGE_DIR, im_name) im = cv2.imread(im_file) timer.tic() im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) text_lines = text_detector.detect(im) print "Number of the detected text lines: %s" % len(text_lines) print "Time: %f" % timer.toc() im_with_text_lines = draw_boxes(im, text_lines, caption=im_name, wait=False) print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "Thank you for trying our demo. Press any key to exit..." cv2.waitKey(0)
def forward(self, bottom, top): while True: name = self._imnames[self._image_index].split('.')[0] self._image_index += 1 if self._image_index == len(self._imnames): self._image_index = 0 image = cv2.imread(name + '.jpg') image, scale = resize_im(image, cfg.SCALE, cfg.MAX_SCALE) data = prepare_img(image, cfg.MEAN) im_info = np.array([[data.shape[1], data.shape[2]]], np.float32) data = data[np.newaxis, :] gt_boxes_list = [] with open(name + '.txt', 'r') as f: for line in f: line_data = line.split(',') gt_boxes_list.append([ int(line_data[0]), int(line_data[1]), int(line_data[2]), int(line_data[3]) ]) gt_boxes_list = [[int(x * scale) for x in box] for box in gt_boxes_list] divide_gt_boxes_list = [] side_pos = [] for box in gt_boxes_list: x1 = box[0] y1 = box[1] x2 = box[2] + box[0] - 1 y2 = box[3] + box[1] - 1 if y2 - y1 + 1 <= cfg.TEXT_PROPOSALS_WIDTH / 2: continue if x2 - x1 + 1 < cfg.TEXT_PROPOSALS_WIDTH: continue start = x1 while start % cfg.TEXT_PROPOSALS_WIDTH != 0: start += 1 end = x2 + 1 while end % cfg.TEXT_PROPOSALS_WIDTH != 0: end -= 1 begin_flag = 1 tmp_side_pos = [] while start < end: if inside_image(start, y1, im_info[0, :]) and inside_image( start + cfg.TEXT_PROPOSALS_WIDTH - 1, y2, im_info[0, :]): divide_gt_boxes_list.append([ start, y1, start + cfg.TEXT_PROPOSALS_WIDTH - 1, y2 ]) if begin_flag: begin_flag = 0 tmp_side_pos.append(x1) else: if start + cfg.TEXT_PROPOSALS_WIDTH == end: tmp_side_pos.append(x2) else: tmp_side_pos.append(-1) start += cfg.TEXT_PROPOSALS_WIDTH for p in tmp_side_pos: side_pos.append(p) gt_boxes = np.array(divide_gt_boxes_list) side_pos = np.array(side_pos) if len(divide_gt_boxes_list): break top[0].reshape(*(data.shape)) top[0].data[...] = data.astype(np.float32, copy=False) top[1].reshape(*(im_info.shape)) top[1].data[...] = im_info.astype(np.float32, copy=False) top[2].reshape(*(gt_boxes.shape)) top[2].data[...] = gt_boxes.astype(np.float32, copy=False) top[3].reshape(*(side_pos.shape)) top[3].data[...] = side_pos.astype(np.float32, copy=False)
text_proposals_detector = TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE)) text_detector = TextDetector(text_proposals_detector) path = os.path.abspath(os.curdir) timer=Timer() print "\ninput exit break\n" while 1 : im_name = raw_input("\nplease input file name:") if im_name == "exit": break im_path = path + "/demo_images/" + im_name im = cv2.imread(im_path) if im is None: continue im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE) cv2.imshow("src", im) tmp = im.copy() timer.tic() text_lines=text_detector.detect(im) print "Number of the detected text lines: %s"%len(text_lines) print "Time: %f"%timer.toc() im_with_text_lines=draw_boxes(tmp, text_lines, caption=im_name, wait=True)