def test_net(self, net, image, text_threshold, link_threshold, low_text, cuda, poly, refine_net=None): t0 = time.time() # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, self.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=self.mag_ratio) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] if cuda: x = x.cuda() # forward pass with torch.no_grad(): y, feature = net(x) # make score and link map score_text = y[0, :, :, 0].cpu().data.numpy() score_link = y[0, :, :, 1].cpu().data.numpy() # refine link if refine_net is not None: with torch.no_grad(): y_refiner = refine_net(y, feature) score_link = y_refiner[0, :, :, 0].cpu().data.numpy() t0 = time.time() - t0 t1 = time.time() # Post-processing boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly) # coordinate adjustment boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h) for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] t1 = time.time() - t1 # render results (optional) render_img = score_text.copy() render_img = np.hstack((render_img, score_link)) ret_score_text = imgproc.cvt2HeatmapImg(render_img) if self.show_time: print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1)) return boxes, polys, ret_score_text
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, image_path, refine_net=None): t0 = time.time() img_h,img_w,c = image.shape # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio) ratio_h = ratio_w = 1 / target_ratio h, w ,c = image.shape # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] if cuda: x = x.cuda() # forward pass y, feature = net(x) # make score and link map score_text = y[0,:,:,0].cpu().data.numpy() #리전 스코어 Region score score_link = y[0,:,:,1].cpu().data.numpy() #어피니티 스코어 # refine link if refine_net is not None: y_refiner = refine_net(y, feature) score_link = y_refiner[0,:,:,0].cpu().data.numpy() t0 = time.time() - t0 t1 = time.time() # Post-processing boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, 0.4, poly) # CRAFT에서 박스를 그려주는 부분 # # coordinate adjustment #좌표설정 boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h) #print(scores) for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] t1 = time.time() - t1 # render results (optional) render_img = score_text.copy() ret_score_text = imgproc.cvt2HeatmapImg(render_img) Plus_score_text = imgproc.cvMakeScores(render_img) ## filename, file_ext = os.path.splitext(os.path.basename(image_path)) if args.show_time : print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1)) resize_folder = './resize' # resize된 원본 이미지 저장 if not os.path.isdir(resize_folder+'/'): os.makedirs(resize_folder +'/') resize_file = resize_folder + "/resize_" + filename + '_mask.jpg' #오리지널 이미지 IMG_RGB2 = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB) #craft에서 resize한 이미지를 RGB로 컨버트 # 합성 이미지를 만들기 위한 부분 pil_image=Image.fromarray((IMG_RGB2* 255).astype(np.uint8)) images = np.array(pil_image) images = cv2.cvtColor(images, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(images, 0, 255, cv2.THRESH_BINARY+ cv2.THRESH_OTSU)#+ cv2.THRESH_OTSU # 이미지 합성을 위해 이진화 text_score = cv2.resize(Plus_score_text, None,fx=2, fy=2, interpolation = cv2.INTER_LINEAR) # 다시 원본 사이즈로 조절 thresh = cv2.resize(thresh, (img_w,img_h)) # 원본 이진화 이미지 text_score = cv2.resize(text_score, (img_w,img_h)) # Region 스코어 이진화 이미지 text_score=Image.fromarray((text_score).astype(np.uint8)) text_score = np.array(text_score) #thresh=img_post.img_proc(text_score, thresh) # if not os.path.isdir('./og_bri'+'/'): # 원본 이진화 이미지 저장 폴더 os.makedirs('./og_bri' +'/') if not os.path.isdir('./score/'): # 스코어 이진화 이미지 저장 폴더 os.makedirs('./score/') cv2.imwrite('./og_bri' + "/og_" + filename + '.jpg', thresh) # 원본 이진화 이미지 저장 cv2.imwrite('./score' + "/score_" + filename + '.jpg', text_score) # 스코어 이진화 이미지 저장 img_h = thresh.shape[0] img_w = thresh.shape[1] IMG_RGB2= cv2.resize(IMG_RGB2, (img_w, img_h)) # 다시 원본 사이즈로 resize cv2.imwrite(resize_file, IMG_RGB2) return boxes, polys, ret_score_text
x, y, w, h = cv2.boundingRect(contour) coor = [[x, y], [x+w, y], [x+w, y+h], [x, y+h]] coordinates.append(coor) coordinates = np.array(coordinates, np.float64) height, width = np_image.shape[:2] mag_ratio = args.mag_ratio square_size = args.canvas_size target_size = mag_ratio * max(height, width) if target_size > square_size: target_size = square_size target_ratio = target_size / max(height, width) ratio_h = ratio_w = 1 / target_ratio coordinates = craft_utils.adjustResultCoordinates(coordinates, ratio_w, ratio_h) coordinates = coordinates.astype(np.int64) a2 = 0.22 for k, coor in enumerate(coordinates): x0, y0 = coor[0] x2, y2 = coor[2] mi = math.ceil(a2 * math.sqrt((x2-x0) * (y2-y0))) if 2 * h < w: char_image = image[:, x0-mi:x2+mi, :] elif 2 * w < h: char_image = image[y0-mi:y2+mi, :, :] else: char_image = image[y0-mi:y2+mi, x0-mi:x2+mi, :] if char_image.size:
def main(): print(' -- Loading models') import os os.makedirs('result', exist_ok=True) text_render.prepare_renderer() dictionary, model_ocr = load_ocr_model() model_detect = load_detect_model() model_inpainting = load_inpainting_model() print(' -- Read image') img = cv2.imread(args.image) img_bbox = np.copy(img) img_bbox_all = np.copy(img) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_resized, target_ratio, _, pad_w, pad_h = imgproc.resize_aspect_ratio( img, args.size, cv2.INTER_LINEAR, mag_ratio=1) img_to_overlay = np.copy(img_resized) ratio_h = ratio_w = 1 / target_ratio img_resized = imgproc.normalizeMeanVariance(img_resized) print( f'Detection resolution: {img_resized.shape[1]}x{img_resized.shape[0]}') print(' -- Running text detection') rscore, ascore, mask = run_detect(model_detect, img_resized) overlay = imgproc.cvt2HeatmapImg(rscore + ascore) boxes, polys = craft_utils.getDetBoxes(rscore, ascore, args.text_threshold, args.link_threshold, args.low_text, False) boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h, ratio_net=2) polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net=2) for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] # merge textlines polys = merge_bboxes(polys, can_merge_textline) for [tl, tr, br, bl] in polys: x = int(tl[0]) y = int(tl[1]) width = int(tr[0] - tl[0]) height = int(br[1] - tr[1]) cv2.rectangle(img_bbox_all, (x, y), (x + width, y + height), color=(255, 0, 0), thickness=2) print(' -- Running OCR') # run OCR for each textline textlines = run_ocr(img_bbox, polys, dictionary, model_ocr, 32) # merge textline to text region, filter textlines without characters text_regions: List[BBox] = [] new_textlines = [] for (poly_regions, textline_indices, majority_dir, fg_r, fg_g, fg_b, bg_r, bg_g, bg_b) in merge_bboxes_text_region(textlines): [tl, tr, br, bl] = poly_regions x = int(tl[0]) - 5 y = int(tl[1]) - 5 width = int(tr[0] - tl[0]) + 10 height = int(br[1] - tr[1]) + 10 text = '' logprob_lengths = [] for textline_idx in textline_indices: if not text: text = textlines[textline_idx].text else: last_ch = text[-1] cur_ch = textlines[textline_idx].text[0] if ord(last_ch) > 255 and ord(cur_ch) > 255: text += textlines[textline_idx].text else: text += ' ' + textlines[textline_idx].text logprob_lengths.append((np.log(textlines[textline_idx].prob), len(textlines[textline_idx].text))) vc = count_valuable_text(text) total_logprobs = 0.0 for (logprob, length) in logprob_lengths: total_logprobs += logprob * length total_logprobs /= sum([x[1] for x in logprob_lengths]) # filter text region without characters if vc > 1: region = BBox(x, y, width, height, text, np.exp(total_logprobs), fg_r, fg_g, fg_b, bg_r, bg_g, bg_b) region.textline_indices = [] region.majority_dir = majority_dir text_regions.append(region) for textline_idx in textline_indices: region.textline_indices.append(len(new_textlines)) new_textlines.append(textlines[textline_idx]) textlines = new_textlines print(' -- Generating text mask') # create mask from text_mask_utils import filter_masks, complete_mask mask_resized = cv2.resize(mask, (mask.shape[1] * 2, mask.shape[0] * 2), interpolation=cv2.INTER_LINEAR) if pad_h > 0: mask_resized = mask_resized[:-pad_h, :] elif pad_w > 0: mask_resized = mask_resized[:, :-pad_w] mask_resized = cv2.resize(mask_resized, (img.shape[1] // 2, img.shape[0] // 2), interpolation=cv2.INTER_LINEAR) img_resized_2 = cv2.resize(img, (img.shape[1] // 2, img.shape[0] // 2), interpolation=cv2.INTER_LINEAR) mask_resized[mask_resized > 250] = 255 text_lines = [(a.x // 2, a.y // 2, a.w // 2, a.h // 2) for a in textlines] mask_ccs, cc2textline_assignment = filter_masks(mask_resized, text_lines) cv2.imwrite('result/mask_filtered.png', reduce(cv2.bitwise_or, mask_ccs)) final_mask = complete_mask(img_resized_2, mask_ccs, text_lines, cc2textline_assignment) final_mask = cv2.resize(final_mask, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_LINEAR) final_mask[final_mask > 0] = 255 print(' -- Running inpainting') # run inpainting img_inpainted, inpaint_input = run_inpainting(model_inpainting, img, final_mask, args.inpainting_size) print(' -- Translating') # translate text region texts texts = '\n'.join([r.text for r in text_regions]) trans_ret = baidu_translator.translate('ja', 'zh-CN', texts) translated_sentences = [] batch = len(text_regions) if len(trans_ret) < batch: translated_sentences.extend(trans_ret) translated_sentences.extend([''] * (batch - len(trans_ret))) elif len(trans_ret) > batch: translated_sentences.extend(trans_ret[:batch]) else: translated_sentences.extend(trans_ret) print(' -- Rendering translated text') # render translated texts img_canvas = np.copy(img_inpainted) for trans_text, region in zip(translated_sentences, text_regions): print(region.text) print(trans_text) print(region.majority_dir, region.x, region.y, region.w, region.h) img_bbox = cv2.rectangle(img_bbox, (region.x, region.y), (region.x + region.w, region.y + region.h), color=(0, 0, 255), thickness=2) fg = (region.fg_b, region.fg_g, region.fg_r) for idx in region.textline_indices: txtln = textlines[idx] img_bbox = cv2.rectangle(img_bbox, (txtln.x, txtln.y), (txtln.x + txtln.w, txtln.y + txtln.h), color=fg, thickness=2) if region.majority_dir == 'h': text_render.put_text_horizontal(img_canvas, trans_text, len(region.textline_indices), region.x, region.y, region.w, region.h, fg, None) else: text_render.put_text_vertical(img_canvas, trans_text, len(region.textline_indices), region.x, region.y, region.w, region.h, fg, None) print(' -- Saving results') cv2.imwrite('result/rs.png', imgproc.cvt2HeatmapImg(rscore)) cv2.imwrite('result/as.png', imgproc.cvt2HeatmapImg(ascore)) cv2.imwrite('result/textline.png', overlay) cv2.imwrite('result/bbox.png', img_bbox) cv2.imwrite('result/bbox_unfiltered.png', img_bbox_all) cv2.imwrite( 'result/overlay.png', cv2.cvtColor( overlay_image( img_to_overlay, cv2.resize(overlay, (img_resized.shape[1], img_resized.shape[0]), interpolation=cv2.INTER_LINEAR)), cv2.COLOR_RGB2BGR)) cv2.imwrite('result/mask.png', final_mask) cv2.imwrite('result/inpainted.png', cv2.cvtColor(img_inpainted, cv2.COLOR_RGB2BGR)) if inpaint_input is not None: cv2.imwrite('result/inpaint_input.png', cv2.cvtColor(inpaint_input, cv2.COLOR_RGB2BGR)) cv2.imwrite('result/final.png', cv2.cvtColor(img_canvas, cv2.COLOR_RGB2BGR))
def get_bounding_box(self, image_file, verbose=False): """ Get the bounding boxes from image_file :param image_file :param verbose :return: """ image = cv2.imread(image_file) img_dim = image.shape img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, self.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=self.mag_ratio) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] if self.cuda: x = x.cuda() # forward pass with torch.no_grad(): y, feature = self.net(x) # make score and link map score_text = y[0, :, :, 0].cpu().data.numpy() score_link = y[0, :, :, 1].cpu().data.numpy() boxes, polys = craft_utils.getDetBoxes(score_text, score_link, self.text_threshold, self.link_threshold, self.low_text, self.poly) boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) center_point = [] for i, _b in enumerate(boxes): b = np.array(_b, dtype=np.int16) xmin = np.min(b[:, 0]) ymin = np.min(b[:, 1]) xmax = np.max(b[:, 0]) ymax = np.max(b[:, 1]) x_m = xmin + (xmax - xmin) / 2 y_m = ymin + (ymax - ymin) / 2 center_point.append([x_m, y_m]) list_images = get_box_img(boxes, image) if verbose: for _b in boxes: b = np.array(_b, dtype=np.int16) xmin = np.min(b[:, 0]) ymin = np.min(b[:, 1]) xmax = np.max(b[:, 0]) ymax = np.max(b[:, 1]) r = image[ymin:ymax, xmin:xmax, :].copy() return boxes, list_images, center_point, img_dim
x = x.cpu().detach().numpy() y, feature = run_tflite_model(x) y = torch.from_numpy(y) feature = torch.from_numpy(feature) # make score and link map score_text = y[0, :, :, 0].cpu().data.numpy() score_link = y[0, :, :, 1].cpu().data.numpy() # Post-processing boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly) # coordinate adjustment boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h) for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] # render results (optional) render_img = score_text.copy() render_img = np.hstack((render_img, score_link)) ret_score_text = imgproc.cvt2HeatmapImg(render_img) file_utils.saveResult(image_path, image[:, :, ::-1], polys, dirname=result_folder) filename, file_ext = os.path.splitext(os.path.basename(image_path)) print("Total time taken to run CRAFT tflite model......",