def test(modelpara): # load net net = CRAFT() # initialize print('Loading weights from checkpoint {}'.format(modelpara)) if args.cuda: net.load_state_dict(copyStateDict(torch.load(modelpara))) else: net.load_state_dict(copyStateDict(torch.load(modelpara, map_location='cpu'))) if args.cuda: net = net.cuda() net = torch.nn.DataParallel(net) cudnn.benchmark = False net.eval() t = time.time() # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k+1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = result_folder + "/res_" + filename + '_mask.jpg' #cv2.imwrite(mask_file, score_text) file_utils.saveResult(image_path, image[:,:,::-1], polys, dirname=result_folder) print("elapsed time : {}s".format(time.time() - t))
def eval2015(craft, test_image_folder, result_folder, text_threshold=0.7, link_threshold=0.4, low_text=0.4): # image_list, _, _ = file_utils.get_files(test_folder) image_list = list_img_ic2015(test_image_folder) t = time.time() res_gt_folder = os.path.join(result_folder, 'gt') res_mask_folder = os.path.join(result_folder, 'mask') # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\n') image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(craft, image, text_threshold, link_threshold, low_text, True, True) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = os.path.join(res_mask_folder, "/res_" + filename + '_mask.jpg') cv2.imwrite(mask_file, score_text) file_utils.saveResult(image_path, bboxes, dirname=res_gt_folder) eval_2015(res_gt_folder) print("elapsed time : {}s".format(time.time() - t))
def get_result_img(image, score_text, score_link, text_threshold=0.68, link_threshold=0.4, low_text=0.08, ratio_w=1.0, ratio_h=1.0): boxes = getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, s=False) boxes = adjustResultCoordinates(boxes, ratio_w, ratio_h) file_utils.saveResult('./text_image/text_image_char.jpg', image, boxes, dirname='./text_image/') boxes = getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, s=True) boxes = adjustResultCoordinates(boxes, ratio_w, ratio_h) file_utils.saveResult('./text_image/text_image_word.jpg', image, boxes, dirname='./text_image/')
def main_generate(ini, common_info, logger=None): # Init. local variables vars = {} for key, val in ini.items(): vars[key] = cs.replace_string_from_dict(val, common_info) cg.folder_exists(vars['gt_path'], create_=True) img_fnames = sorted( cg.get_filenames(vars['img_path'], extensions=ig.IMG_EXTENSIONS)) ann_fnames = sorted( cg.get_filenames(vars['ann_path'], extensions=jg.META_EXTENSION)) logger.info( " [GENERATE] # Total file number to be processed: {:d}.".format( len(img_fnames))) for idx, img_fname in enumerate(img_fnames): _, img_core_name, img_ext = cg.split_fname(img_fname) img = ig.imread(img_fname, color_fmt='RGB') # Load json ann_fname = ann_fnames[idx] _, ann_core_name, _ = cg.split_fname(ann_fname) ann_core_name = ann_core_name.replace('.jpg', '') if ann_core_name == img_core_name: with open(ann_fname) as json_file: json_data = json.load(json_file) objects = json_data['objects'] # pprint.pprint(objects) bboxes = [] texts = [] for obj in objects: class_name = obj['classTitle'] if class_name != common_info['dataset_type'].lower(): continue [x1, y1], [x2, y2] = obj['points']['exterior'] text = obj['description'] x_min, y_min, x_max, y_max = int(min(x1, x2)), int(min( y1, y2)), int(max(x1, x2)), int(max(y1, y2)) if x_max - x_min <= 0 or y_max - y_min <= 0: continue rect4 = ic.convert_rect2_to_rect4([x_min, x_max, y_min, y_max]) bboxes.append(rect4) texts.append(text) file_utils.saveResult(img_file=img_core_name, img=img, boxes=bboxes, texts=texts, dirname=vars['gt_path']) logger.info(" [GENERATE-OCR] # Generated to {} ({:d}/{:d})".format( vars['gt_path'] + img_core_name + '.txt', (idx + 1), len(img_fnames))) logger.info(" # {} in {} mode finished.".format(_this_basename_, GENERATE)) return True
def test(modelpara, args=None, result_folder=None): # load net net_encoder = builder.build_encoder( arch='resnet50dilated', fc_dim=2048, weights='', ) net_decoder = builder.build_decoder( arch='c1', weights='', fc_dim=2048, num_class=2, ) net = SegmentationModule(net_encoder, net_decoder, False) print('Loading weights from checkpoint {}'.format(modelpara)) if args.cuda: net.load_state_dict(copyStateDict(torch.load(modelpara))) else: net.load_state_dict( copyStateDict(torch.load(modelpara, map_location='cpu'))) if args.cuda: net = net.cuda() net = torch.nn.DataParallel(net) cudnn.benchmark = False net.eval() t = time.time() # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) with torch.no_grad(): bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, args) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = result_folder + "/res_" + filename + '_mask.jpg' #cv2.imwrite(mask_file, score_text) file_utils.saveResult(image_path, image[:, :, ::-1], polys, dirname=result_folder) net.train() print("elapsed time : {}s".format(time.time() - t))
def main(): # load net net = CRAFT() # initialize print('Loading weights from checkpoint (' + args.trained_model + ')') if args.cuda: net.load_state_dict(copyStateDict(torch.load(args.trained_model))) else: net.load_state_dict(copyStateDict(torch.load(args.trained_model, map_location='cpu'))) if args.cuda: net = net.cuda() net = torch.nn.DataParallel(net) cudnn.benchmark = False net.eval() # LinkRefiner refine_net = None if args.refine: from refinenet import RefineNet refine_net = RefineNet() print('Loading weights of refiner from checkpoint (' + args.refiner_model + ')') if args.cuda: refine_net.load_state_dict(copyStateDict(torch.load(args.refiner_model))) refine_net = refine_net.cuda() refine_net = torch.nn.DataParallel(refine_net) else: refine_net.load_state_dict(copyStateDict(torch.load(args.refiner_model, map_location='cpu'))) refine_net.eval() args.poly = True t = time.time() print(image_list) # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k+1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, refine_net) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = result_folder + "/res_" + filename + '_mask.jpg' cv2.imwrite(mask_file, score_text) file_utils.saveResult(image_path, image[:,:,::-1], polys, dirname=result_folder) # print("elapsed time : {}s".format(time.time() - t))
def detect(self, path): image = imgproc.loadImage(path) refine_net = None bboxes, polys, score_text = self.test_net(self.net, image, 0.7, 999999, 0.5, False, refine_net) bbox = [] for i, box in enumerate(polys): poly = np.array(box).astype(np.int32).reshape((-1)) bbox.append([poly[0] - 3, poly[1] - 5, poly[2], poly[5] + 5]) file_utils.saveResult(path, image[:, :, ::-1], polys, dirname="Detect_result/") bbox.sort(key=sorting_key) return bbox
def PredictDetection(args, net, image_path, opt, reco): """ For test images in a folder """ image_list, _, _ = file_utils.get_files(args.test_folder) result_folder = './result/' if not os.path.isdir(result_folder): os.mkdir(result_folder) t = time.time() # load data # for k, image_path in enumerate(image_list): #print("Test image {:d}/{:d}: {:s}".format(k+1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda,args) # save score text #filename, file_ext = os.path.splitext(os.path.basename(image_path)) #mask_file = result_folder + "/res_" + filename + '_mask.jpg' #cv2.imwrite(mask_file, score_text) fl = file_utils.saveResult(image_path, image[:, :, ::-1], bboxes, opt, reco, dirname=result_folder) print("elapsed time detecting : {}s".format(time.time() - t)) log.info(f'elapsed time detecting : {time.time() - t}s') return fl
def test(pre_model,res_dir = result_folder,mode=0): ## mode 0 = ic15 1 = ours # load net net = CRAFT() # initialize text_threshold = float(0.7) low_text = float(0.4) link_threshold = float(0.4) cuda = True poly = False print('Loading weights from checkpoint {}'.format(pre_model)) #loaded_model = tf.keras.models.load_model(pre_model) loaded_model = net.load_weights(pre_model).expect_partial() print(loaded_model) t = time.time() print("#############") print(net) if mode != 0: image_list = image_list_ours else: image_list = image_list_ic15 print(image_list) # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k+1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) filename, file_ext = os.path.splitext(os.path.basename(image_path)) save_file_name = filename bboxes, polys, score_text = test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, filename) # save score text mask_file = res_dir + "/res_" + filename + '_mask.jpg' cv2.imwrite(mask_file, score_text) file_utils.saveResult(image_path, image[:,:,::-1], polys, dirname=res_dir) print("Eval elapsed time : {}s".format(time.time() - t))
def inference(net): for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, refine_net) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = result_folder + "/res_" + filename + '_mask.jpg' cv2.imwrite(mask_file, score_text) file_utils.saveResult(image_path, image[:, :, ::-1], polys, dirname=result_folder)
def main(trained_model='weights/craft_mlt_25k.pth', text_threshold=0.7, low_text=0.4, link_threshold=0.4, cuda=True, canvas_size=1280, mag_ratio=1.5, poly=False, show_time=False, test_folder='/data/', refine=True, refiner_model='weights/craft_refiner_CTW1500.pth'): # if __name__ == '__main__': # load net net = CRAFT() # initialize print('Loading weights from checkpoint (' + trained_model + ')') if cuda: net.load_state_dict(copyStateDict(torch.load(trained_model))) else: net.load_state_dict(copyStateDict(torch.load(trained_model, map_location='cpu'))) if cuda: net = net.cuda() net = torch.nn.DataParallel(net) cudnn.benchmark = False net.eval() # LinkRefiner refine_net = None if refine: from refinenet import RefineNet refine_net = RefineNet() print('Loading weights of refiner from checkpoint (' + refiner_model + ')') if cuda: refine_net.load_state_dict(copyStateDict(torch.load(refiner_model))) refine_net = refine_net.cuda() refine_net = torch.nn.DataParallel(refine_net) else: refine_net.load_state_dict(copyStateDict(torch.load(refiner_model, map_location='cpu'))) refine_net.eval() poly = True t = time.time() # load data image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, refine_net) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = result_folder + "/res_" + filename + '_mask.jpg' cv2.imwrite(mask_file, score_text) final_img = file_utils.saveResult(image_path, image[:,:,::-1], polys, dirname=result_folder) print("elapsed time : {}s".format(time.time() - t))
def test(modelpara): # load net net = CRAFT() # initialize print('Loading weights from checkpoint {}'.format(modelpara)) #### # if args.cuda: # net.load_state_dict(copyStateDict(torch.load(modelpara))) # else: # net.load_state_dict(copyStateDict(torch.load(modelpara, map_location='cpu'))) # # if args.cuda: # net = net.cuda() # net = torch.nn.DataParallel(net) # cudnn.benchmark = False ### device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") net = net.to(device) net.eval() #stop update the weight of the neuron t = time.time() # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k+1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly) print("\n bboxes = ", bboxes, "\n poly = ", polys, "\n text = ", score_text, "\n text.shape = ", score_text.shape) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = result_folder + "/res_" + filename + '_mask.jpg' #cv2.imwrite(mask_file, score_text) print("save in" + result_folder) file_utils.saveResult(image_path, image[:,:,::-1], polys, dirname=result_folder) print("elapsed time : {}s".format(time.time() - t))
def _test_patch_image(net, image_patch, text_threshold, link_threshold, low_text, cuda, poly, draw_contour): bboxes, polys, score_text = test_net(net, image_patch, text_threshold, link_threshold, low_text, cuda, poly) random_name = randomString(8) # todo: here to add the ocr recognition. result_patch = file_utils.saveResult( random_name, image_patch[:, :, ::-1], polys, dirname="api_result/", return_matrix=True, draw_contour=draw_contour, texts=None, ) return result_patch, bboxes
for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, refine_net) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) Indx = file_utils.saveResult(Indx, image_path, image[:, :, ::-1], polys, dirname=result_folder) for k, image_path in enumerate(image_list2): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list2), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, refine_net) # save score text
i] + ' ' + final_result[-1][start_str:] current_line_x_index.insert(ins_index[0], i) current_line_y = [ min([current_line_y[0], min(box[:, 1])]), max([current_line_y[1], max(box[:, 1])]) ] else: final_result.append(full_text[i]) current_line_y = [min(box[:, 1]), max(box[:, 1])] current_line_x_index = [i] file_utils.saveResult(image_path, image[:, :, ::-1], polys, dirname=result_folder, verticals=None, texts=text_coors_array) # text_file = result_folder + "/res_" + filename + '_text.txt' # mask_file = result_folder + "/res_" + filename + '_mask.jpg' # cv2.imwrite(mask_file, score_text) # # file_utils.saveResult(image_path, image[:,:,::-1], polys, dirname=result_folder) # full_text = [] # oem_psm_config = r'--oem 3 --psm 6' # # for i, box in enumerate(bboxes): # poly = np.array(box).astype(np.int32).reshape((-1)) # poly = poly.reshape(-1, 2)
def img_to_text(image): start = time.time() # Initialize CRAFT parameters text_threshold = 0.7 low_text = 0.4 link_threshold =0.4 # cuda = True cuda=False canvas_size =1280 mag_ratio =1.5 #if text image present curve --> poly=true poly=False refine=False show_time=False refine_net = None # print("imgtotext: 1,1") bboxes, polys, score_text = test_net(canvas_size, mag_ratio, net, image, text_threshold, link_threshold, low_text, cuda, poly, refine_net) # print("file_utils: save file hereeeeee") file_utils.saveResult("./pre_newimage.jpg", image[:,:,::-1], bboxes, dirname='./') poly_indexes = {} central_poly_indexes = [] for i in range(len(polys)): poly_indexes[i] = polys[i] x_central = (polys[i][0][0] + polys[i][1][0] +polys[i][2][0] + polys[i][3][0])/4 y_central = (polys[i][0][1] + polys[i][1][1] +polys[i][2][1] + polys[i][3][1])/4 central_poly_indexes.append({i: [int(x_central), int(y_central)]}) X = [] for idx, x in enumerate(central_poly_indexes): point = Point(x[idx][0],x[idx][1], idx) X.append(point) # file_utils.saveResult("./pre_newimage.jpg", image[:,:,::-1], bboxes, dirname='./result') poly=False refine=False show_time=False refine_net = None clustered = GDBSCAN(Points(X), n_pred, 1, w_card) cluster_values = [] for cluster in clustered: sort_cluster = sorted(cluster, key = lambda elem: (elem.x, elem.y)) max_point_id = sort_cluster[len(sort_cluster) - 1].id min_point_id = sort_cluster[0].id max_rectangle = sorted(poly_indexes[max_point_id], key = lambda elem: (elem[0], elem[1])) min_rectangle = sorted(poly_indexes[min_point_id], key = lambda elem: (elem[0], elem[1])) right_above_max_vertex = max_rectangle[len(max_rectangle) -1] right_below_max_vertex = max_rectangle[len(max_rectangle) -2] left_above_min_vertex = min_rectangle[0] left_below_min_vertex = min_rectangle[1] if (int(min_rectangle[0][1]) > int(min_rectangle[1][1])): left_above_min_vertex = min_rectangle[1] left_below_min_vertex = min_rectangle[0] if (int(max_rectangle[len(max_rectangle) -1][1]) < int(max_rectangle[len(max_rectangle) -2][1])): right_above_max_vertex = max_rectangle[len(max_rectangle) -2] right_below_max_vertex = max_rectangle[len(max_rectangle) -1] cluster_values.append([left_above_min_vertex, left_below_min_vertex, right_above_max_vertex, right_below_max_vertex]) # file_utils.saveResult(image_path, image[:,:,::-1], cluster_values, dirname='/content/drive/My Drive/ocr_demo_code/cluster_result/') file_utils.saveResult('./dbscan_pre_newimage.jpg', image[:,:,::-1], cluster_values, dirname='./') img = np.array(image[:,:,::-1]) res = [] py_eng = [] for i, box in enumerate(cluster_values): poly = np.array(box).astype(np.int32).reshape((-1)) poly = poly.reshape(-1, 2) rect = cv2.boundingRect(poly) x,y,w,h = rect # croped = img[y:y+h, x:x+w].copy() croped = img[y:y+h, x:x+w].copy() # Preprocess croped segment # croped = cv2.resize(croped, None, fx=5, fy=5, interpolation=cv2.INTER_LINEAR) croped = cv2.resize(croped, None, fx=5, fy=5, interpolation=cv2.INTER_CUBIC) croped = cv2.cvtColor(croped, cv2.COLOR_BGR2GRAY) croped = cv2.GaussianBlur(croped, (3, 3), 0) croped = cv2.bilateralFilter(croped,5,25,25) croped = cv2.dilate(croped, None, iterations=1) croped = cv2.threshold(croped, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] # croped = cv2.threshold(croped, 90, 255, cv2.THRESH_BINARY)[1] # croped = cv2.cvtColor(croped, cv2.COLOR_BGR2RGB) text = pytesseract.image_to_string(croped,lang='digits_comma',config='--psm 10 --oem 1') # eng = pytesseract.image_to_string(croped, lang='englegacy',config='--psm 10 --oem 2 ') # processed_data = ''.join(char for char in eng if char.isnumeric() or char == '.') # py_eng.append(processed_data) res.append(text) end = time.time() print(end - start) # print("py_eng:",py_eng) return res
#print("crop paras:\t{}\t{}\t".format(min_value, max_value)) h_0 = int(min_value[1]) h_1 = int(max_value[1]) w_0 = int(min_value[0]) w_1 = int(max_value[0]) text_crop = crop[h_0:h_1, w_0:w_1] #print("=========={}".format(type(text_crop))) print("Finding the nearest one and crop it:{}".format(time.time() - t0)) text_area_path = 'text_area_' + item + data.ImgExt io.imsave(os.path.join(result_folder, text_area_path), text_crop) mask_file = result_folder + "/res_" + item + '_mask.jpg' cv.imwrite(mask_file, score_text) file_utils.saveResult((item + data.ImgExt), crop[:, :, ::-1], polys, dirname=result_folder) ## crnn based recognition process t0 = time.time() text_crop = Image.fromarray(text_crop).convert('L') text_crop = transformer(text_crop) if torch.cuda.is_available(): #print("True") text_crop = text_crop.cuda() text_crop = text_crop.view(1, *text_crop.size()) text_crop = Variable(text_crop) preds = crnn_net(text_crop) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1)
def craftnet(): # load net net = CRAFT() # initialize print('Loading weights from checkpoint (' + CONFIG['trained_model'] + ')') if CONFIG['cuda']: net.load_state_dict(copyStateDict(torch.load(CONFIG['trained_model']))) else: net.load_state_dict( copyStateDict( torch.load(CONFIG['trained_model'], map_location='cpu'))) if CONFIG['cuda']: net = net.cuda() net = torch.nn.DataParallel(net) cudnn.benchmark = False net.eval() # LinkRefiner refine_net = None if CONFIG['refine']: from refinenet import RefineNet refine_net = RefineNet() #print('Loading weights of refiner from checkpoint (' + CONFIG['refiner_model'] + ')') if CONFIG['cuda']: refine_net.load_state_dict( copyStateDict(torch.load(CONFIG['refiner_model']))) refine_net = refine_net.cuda() refine_net = torch.nn.DataParallel(refine_net) else: refine_net.load_state_dict( copyStateDict( torch.load(CONFIG['refiner_model'], map_location='cpu'))) refine_net.eval() CONFIG['poly'] = True t = time.time() # load data for k, image_path in enumerate(image_list): #print("Test image {:d}/{:d}: {:s}".format(k+1, len(image_list), image_path), end='\r') orig, image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net( net, image, CONFIG['text_threshold'], CONFIG['link_threshold'], CONFIG['low_text'], CONFIG['cuda'], CONFIG['poly'], refine_net) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = result_folder + "/res_" + filename + '_mask.jpg' cv2.imwrite(mask_file, score_text) file_utils.saveResult(image_path, image[:, :, ::-1], polys, dirname=result_folder) information = [] for file in os.listdir('result/temp_result'): filename = os.path.splitext(file)[0] extension = os.path.splitext(file)[1] if extension == '.tif': #!tesseract oem 13 --tessdata-dir ./result/ ./result/temp_result{filename}.png ./test/{filename+'-eng'} -l eng+vie image = Image.open('result/temp_result/' + file) config = '--psm 10 --oem 3 -l vie+eng' raw_text = pytesseract.image_to_string(image, lang='eng+vie', config=config) information.append(raw_text) X = { "name": [], "phone": [], "email": [], "company": [], "website": [], "address": [], "extra_information": [] } for i in range(len(information)): info = information[i] if parse_info(info): email_parse = parse_email(info) if email_parse != None: X["email"].append(email_parse) continue phone_parse = parse_phone(info) if phone_parse != None: X["phone"].append(phone_parse) continue website_parse = parse_website(info) if website_parse != None: X["website"].append(website_parse) continue company_parse = parse_company(info) if company_parse != None: X["company"].append(company_parse) continue address_parse = parse_address(info) if address_parse != None: X["address"].append(address_parse) continue name_parse = parse_name(info) if name_parse != None: X["name"].append(info) continue X["extra_information"].append(info) return X
def processing(file_name, crs): #path of file pre-trained model of Craft trained_model_path = './craft_mlt_25k.pth' #trained_model_path = './vgg16.ckpt' net = CRAFT() net.load_state_dict( copyStateDict(torch.load(trained_model_path, map_location='cpu'))) net.eval() # Load image from its path image_path = f'./imgtxtenh/pre_{file_name}' image = imgproc.loadImage(image_path) fig2 = plt.figure(figsize=(10, 10)) # create a 10 x 10 figure ax3 = fig2.add_subplot(111) ax3.imshow(image, interpolation='none') ax3.set_title('larger figure') plt.show() poly = False refine = False show_time = False refine_net = None bboxes, polys, score_text = test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, refine_net) file_utils.saveResult(image_path, image[:, :, ::-1], bboxes, dirname='./craft_result/') # Compute coordinate of central point in each bounding box returned by CRAFT #Purpose: easier for us to make cluster in G-DBScan step poly_indexes = {} central_poly_indexes = [] for i in range(len(polys)): poly_indexes[i] = polys[i] x_central = (polys[i][0][0] + polys[i][1][0] + polys[i][2][0] + polys[i][3][0]) / 4 y_central = (polys[i][0][1] + polys[i][1][1] + polys[i][2][1] + polys[i][3][1]) / 4 central_poly_indexes.append({i: [int(x_central), int(y_central)]}) # for i in central_poly_indexes: # print(i) # For each of these cordinates convert them to new Point instances X = [] for idx, x in enumerate(central_poly_indexes): point = Point(x[idx][0], x[idx][1], idx) X.append(point) # Cluster these central points clustered = GDBSCAN(Points(X), n_pred, 1, w_card) # Create bounding box for each cluster with 4 points #Purpose: Merge words in 1 cluster into 1 bounding box cluster_values = [] for cluster in clustered: sort_cluster = sorted(cluster, key=lambda elem: (elem.x, elem.y)) max_point_id = sort_cluster[len(sort_cluster) - 1].id min_point_id = sort_cluster[0].id max_rectangle = sorted(poly_indexes[max_point_id], key=lambda elem: (elem[0], elem[1])) min_rectangle = sorted(poly_indexes[min_point_id], key=lambda elem: (elem[0], elem[1])) right_above_max_vertex = max_rectangle[len(max_rectangle) - 1] right_below_max_vertex = max_rectangle[len(max_rectangle) - 2] left_above_min_vertex = min_rectangle[0] left_below_min_vertex = min_rectangle[1] if (int(min_rectangle[0][1]) > int(min_rectangle[1][1])): left_above_min_vertex = min_rectangle[1] left_below_min_vertex = min_rectangle[0] if (int(max_rectangle[len(max_rectangle) - 1][1]) < int( max_rectangle[len(max_rectangle) - 2][1])): right_above_max_vertex = max_rectangle[len(max_rectangle) - 2] right_below_max_vertex = max_rectangle[len(max_rectangle) - 1] cluster_values.append([ left_above_min_vertex, left_below_min_vertex, right_above_max_vertex, right_below_max_vertex ]) # for p in cluster_values: # print(p) file_utils.saveResult(image_path, image[:, :, ::-1], cluster_values, dirname='./cluster_result/') img = np.array(image[:, :, ::-1]) ocr_res = [] plain_txt = "" for i, box in enumerate(cluster_values): poly = np.array(box).astype(np.int32).reshape((-1)) poly = poly.reshape(-1, 2) rect = cv2.boundingRect(poly) x, y, w, h = rect croped = img[y:y + h, x:x + w].copy() # Preprocess croped segment croped = cv2.resize(croped, None, fx=5, fy=5, interpolation=cv2.INTER_LINEAR) croped = cv2.cvtColor(croped, cv2.COLOR_BGR2GRAY) croped = cv2.GaussianBlur(croped, (3, 3), 0) croped = cv2.bilateralFilter(croped, 5, 25, 25) croped = cv2.dilate(croped, None, iterations=1) croped = cv2.threshold(croped, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] # croped = cv2.threshold(croped, 90, 255, cv2.THRESH_BINARY)[1] croped = cv2.cvtColor(croped, cv2.COLOR_BGR2RGB) custom_oem_psm_config = r'--oem 1 --psm 12' # print("--------") # print(pytesseract.image_to_string(croped, lang='eng')) plain_txt += "--------\n" plain_txt += pytesseract.image_to_string(croped, lang='eng', config=custom_oem_psm_config) copy_plain_txt = plain_txt # plain_txt = re.sub(r"b", "6", plain_txt) plain_txt = re.sub(r"\$", "5", plain_txt) plain_txt = re.sub(r"%", "7", plain_txt) plain_txt = re.sub(r"Y", "5", plain_txt) plain_txt = re.sub(r"W", "99", plain_txt) plain_txt = re.sub(r"£", "1", plain_txt) plain_txt = re.sub(r"\)", "1", plain_txt) plain_txt = re.sub(r"\}", "1", plain_txt) plain_txt = re.sub(r"\|", "1", plain_txt) # print(plain_txt) # return 0 #Localization init_patterns_1 = re.compile(r'TOA\sDO', re.IGNORECASE) init_patterns_2 = re.compile(r'\w{0,2}\d{5,}', re.IGNORECASE) term_patterns = re.compile(r'\n[^\-\d]{10,}', re.IGNORECASE) coor_patterns = re.compile(r'\d+\s*[\d]*\s*[\d\.]*', re.IGNORECASE) coordinates = coor_patterns.findall(plain_txt) for i in range(len(coordinates)): coordinates[i] = re.sub('\n', '', coordinates[i]) coordinates[i] = re.sub('\x0c', '', coordinates[i]) coordinates[i] = re.sub(r'\s', '', coordinates[i]) # print(coordinates) # return 0 temp_arr = coordinates.copy() for i in range(len(temp_arr)): try: # print(float(temp_arr[i])) if len(temp_arr[i]) <= 7: coordinates.remove(temp_arr[i]) except ValueError: coordinates.remove(temp_arr[i]) print(coordinates) cluster_arr = [[coor] for coor in coordinates] for i in range(len(coordinates)): for coor in coordinates: if cluster_arr[i][0] != coor and cluster_arr[i][0][0] == coor[ 0] and cluster_arr[i][0][1] == coor[1] and cluster_arr[i][ 0][2] == coor[2]: cluster_arr[i].append(coor) # print(cluster_arr) cluster_lens = [] for cluster in cluster_arr: cluster_lens.append(len(cluster)) # print(cluster_lens) try: max_len = max(cluster_lens) except ValueError: max_len = 0 coor_arr_1 = [] for cluster in cluster_arr: if max_len == len(cluster): coor_arr_1 = cluster break # print(coor_arr_1) cluster_arr = [] for coor in coordinates: if coor not in coor_arr_1: cluster_arr.append([coor]) # print(cluster_arr) for i in range(len(cluster_arr)): for coor in coordinates: if coor not in coor_arr_1 and cluster_arr[i][ 0] != coor and cluster_arr[i][0][0] == coor[ 0] and cluster_arr[i][0][1] == coor[1] and cluster_arr[ i][0][2] == coor[2]: cluster_arr[i].append(coor) # print(cluster_arr) cluster_lens = [] for cluster in cluster_arr: cluster_lens.append(len(cluster)) # print(cluster_lens) try: max_len = max(cluster_lens) except ValueError: max_len = 0 # print(cluster_arr) coor_arr_2 = [] similar_cluster_arr = [] temp = 0 for cluster in cluster_arr: if max_len == len(cluster): temp += 1 coor_arr_2 = cluster similar_cluster_arr.append(cluster) if temp > 1: similar_val_arr = [] for cluster in similar_cluster_arr: similar_val_arr.append(similar_value(cluster, coor_arr_1)) right_index = np.where( similar_val_arr == np.amin(similar_val_arr))[0][0] coor_arr_2 = similar_cluster_arr[right_index] # print(coor_arr_2) temp_lst = [] if len(eliminate(coor_arr_1, temp_lst)) != 0: coor_arr_1 = eliminate(coor_arr_1, temp_lst) else: insert_point(coor_arr_1) # print('Arr 1 after remove:') # print(coor_arr_1) if len(eliminate(coor_arr_2, temp_lst)) != 0: coor_arr_2 = eliminate(coor_arr_2, temp_lst) else: insert_point(coor_arr_2) # print('Arr 2 after remove:') # print(coor_arr_2) X = [] Y = [] if findX(coor_arr_1, coordinates) > findX(coor_arr_2, coordinates): X = coor_arr_1 Y = coor_arr_2 else: X = coor_arr_2 Y = coor_arr_1 print('X: ' + str(X)) print('Y: ' + str(Y)) temp_arr = [] for coor in X: try: float(coor) temp_arr.append(float(coor)) except ValueError: pass X = temp_arr temp_arr = [] for coor in Y: try: float(coor) temp_arr.append(float(coor)) except ValueError: pass Y = temp_arr sim_arr = str_similarity(X, coordinates) sim_arr = np.array(sim_arr) try: optimal_index = np.where(sim_arr == np.amax(sim_arr))[0][0] x = X[optimal_index] except ValueError: x = 0 sim_arr = str_similarity(Y, coordinates) sim_arr = np.array(sim_arr) try: optimal_index = np.where(sim_arr == np.amax(sim_arr))[0][0] y = Y[optimal_index] except ValueError: y = 0 print('Most likely to be x: ' + str(x)) print('Most likely to be y: ' + str(y)) #################### VN2K TO WGS83 #################### y, x = vn2k_to_wgs84((x, y), crs) print((x, y)) return (x, y) # processing('test_16.jpg', 9210)
def test(text_detection_modelpara, ocr_modelpara, dictionary_target): # load net net = CRAFT() # initialize print('Loading text detection model from checkpoint {}'.format( text_detection_modelpara)) if args.cuda: net.load_state_dict(copyStateDict( torch.load(text_detection_modelpara))) else: net.load_state_dict( copyStateDict( torch.load(text_detection_modelpara, map_location='cpu'))) if args.cuda: net = net.cuda() net = torch.nn.DataParallel(net) cudnn.benchmark = False params = {} params['n'] = 256 params['m'] = 256 params['dim_attention'] = 512 params['D'] = 684 params['K'] = 5748 params['growthRate'] = 24 params['reduction'] = 0.5 params['bottleneck'] = True params['use_dropout'] = True params['input_channels'] = 3 params['cuda'] = args.cuda # load model OCR = Encoder_Decoder(params) if args.cuda: OCR.load_state_dict(copyStateDict(torch.load(ocr_modelpara))) else: OCR.load_state_dict( copyStateDict(torch.load(ocr_modelpara, map_location='cpu'))) if args.cuda: #OCR = OCR.cuda() OCR = torch.nn.DataParallel(OCR) cudnn.benchmark = False OCR.eval() net.eval() # load dictionary worddicts = load_dict(dictionary_target) worddicts_r = [None] * len(worddicts) for kk, vv in worddicts.items(): worddicts_r[vv] = kk t = time.time() fontPIL = '/usr/share/fonts/truetype/fonts-japanese-gothic.ttf' # japanese font size = 40 colorBGR = (0, 0, 255) paper = ET.Element('paper') paper.set('xmlns', "http://codh.rois.ac.jp/modern-magazine/") # load data for k, image_path in enumerate(image_list[:]): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\r') res_img_file = result_folder + "res_" + os.path.basename(image_path) #print (res_img_file, os.path.basename(image_path), os.path.exists(res_img_file)) #if os.path.exists(res_img_file): continue #image = imgproc.loadImage(image_path) '''image = cv2.imread(image_path, cv2.IMREAD_COLOR) image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) ret2,image = cv2.threshold(image,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) height = image.shape[0] width = image.shape[1] scale = 1000.0/height H = int(image.shape[0] * scale) W = int(image.shape[1] * scale) image = cv2.resize(image , (W, H)) print(image.shape, image_path) cv2.imwrite(image_path, image) continue''' image = cv2.imread(image_path, cv2.IMREAD_COLOR) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) h, w = image.shape[0], image.shape[1] print(image_path) page = ET.SubElement(paper, "page") page.set('file', os.path.basename(image_path).replace('.jpg', '')) page.set('height', str(h)) page.set('width', str(w)) page.set('dpi', str(100)) page.set('number', str(1)) bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly) text = [] localtions = [] for i, box in enumerate(bboxes): poly = np.array(box).astype(np.int32) min_x = np.min(poly[:, 0]) max_x = np.max(poly[:, 0]) min_y = np.min(poly[:, 1]) max_y = np.max(poly[:, 1]) if min_x < 0: min_x = 0 if min_y < 0: min_y = 0 #image = cv2.rectangle(image,(min_x,min_y),(max_x,max_y),(0,255,0),3) input_img = image[min_y:max_y, min_x:max_x] w = max_x - min_x + 1 h = max_y - min_y + 1 line = ET.SubElement(page, "line") line.set("x", str(min_x)) line.set("y", str(min_y)) line.set("height", str(h)) line.set("width", str(w)) if w < h: rate = 20.0 / w w = int(round(w * rate)) h = int(round(h * rate / 20.0) * 20) else: rate = 20.0 / h w = int(round(w * rate / 20.0) * 20) h = int(round(h * rate)) #print (w, h, rate) input_img = cv2.resize(input_img, (w, h)) mat = np.zeros([1, h, w], dtype='uint8') mat[0, :, :] = 0.299 * input_img[:, :, 0] + 0.587 * input_img[:, :, 1] + 0.114 * input_img[:, :, 2] xx_pad = mat.astype(np.float32) / 255. xx_pad = torch.from_numpy(xx_pad[None, :, :, :]) # (1,1,H,W) if args.cuda: xx_pad.cuda() with torch.no_grad(): sample, score, alpha_past_list = gen_sample(OCR, xx_pad, params, args.cuda, k=10, maxlen=600) score = score / np.array([len(s) for s in sample]) ss = sample[score.argmin()] alpha_past = alpha_past_list[score.argmin()] result = '' i = 0 location = [] for vv in ss: if vv == 0: # <eol> break alpha = alpha_past[i] if i != 0: alpha = alpha_past[i] - alpha_past[i - 1] (y, x) = np.unravel_index(np.argmax(alpha, axis=None), alpha.shape) #print (int(16* x /rate), int(16* y/rate) , chr(int(worddicts_r[vv],16))) location.append( [int(16 * x / rate) + min_x, int(16 * y / rate) + min_y]) #image = cv2.circle(image,(int(16* x/rate) - 8 + min_x, int(16* y/rate) + 8 + min_y),25, (0,0,255), -1) result += chr(int(worddicts_r[vv], 16)) '''char = ET.SubElement(line, "char") char.set('num_cand', '1') char.set('x', str(int(16* x/rate) - 8 + min_x)) char.set('y', str(int(16* y/rate) + 8 + min_y)) res = ET.SubElement(char, "result") res.set('CC', str(100)) res.text = chr(int(worddicts_r[vv],16)) cand = ET.SubElement(char, "cand") cand.set('CC', str(100)) cand.text = chr(int(worddicts_r[vv],16))''' i += 1 line.text = result text.append(result) localtions.append(location) image = cv2_putText_1(img=image, text=result, org=(min_x, max_x, min_y, max_y), fontFace=fontPIL, fontScale=size, color=colorBGR) print('save image') # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = result_folder + "/res_" + filename + '_mask.jpg' #cv2.imwrite(mask_file, score_text) file_utils.saveResult(image_path, image, polys, dirname=result_folder) xml_string = ET.tostring(paper, 'Shift_JIS') fout = codecs.open('./data/result.xml', 'w', 'shift_jis') fout.write(xml_string.decode('shift_jis')) fout.close() print("elapsed time : {}s".format(time.time() - t))
else: refine_net.load_state_dict( copyStateDict( torch.load(args.refiner_model, map_location='cpu'))) refine_net.eval() args.poly = True t = time.time() # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, polys, score_text, score_link, cc_mask = test_net( net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, refine_net) file_utils.saveResult(image_path, image[:, :, ::-1], polys, score_text, score_link, cc_mask, dirname=result_folder) print("elapsed time : {}s".format(time.time() - t))
loss_value / 2) ) loss_time = 0 loss_value = 0 st = time.time() # if loss < compare_loss: # print('save the lower loss iter, loss:',loss) # compare_loss = loss # torch.save(net.module.state_dict(), # './output/real_weights/lower_loss.pth') print('Saving state, iter:', epoch) torch.save(net.module.state_dict(), 'weights/clr_' + repr(epoch) + '.pth') for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, args.ocr_type) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) # mask_file = result_folder + "/res_" + filename + '_mask.jpg' # cv2.imwrite(mask_file, score_text) file_utils.saveResult(image_path, image[:, :, ::-1], polys, dirname='weights/' + repr(epoch) + '/') # test('./output/clr_' + repr(epoch) + '.pth') # test('./output/mlt_25k.pth') # getresult()
refine_net.eval() args.poly = True t = time.time() # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, refine_net) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) #mask_file = result_folder + "/res_" + filename + '_mask.jpg' #cv2.imwrite(mask_file, score_text) file_utils.saveResult(image_path, image[:, :, ::-1], polys, dirname=result_folder, write_image=False) print("elapsed time : {}s".format(time.time() - t))
torch.load(args.refiner_model, map_location='cpu'))) refine_net.eval() args.poly = True t = time.time() # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, refine_net) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = result_folder + "/res_" + filename + '_mask.jpg' cv2.imwrite(mask_file, score_text) file_utils.saveResult(image_path, image[:, :, ::-1], polys, dirname=result_folder) print("elapsed time : {}s".format(time.time() - t))
t = time.time() # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) text_only = args.text_result_only if text_only == False: mask_file = result_folder + "/res_" + filename + '_mask.jpg' cv2.imwrite(mask_file, score_text) file_utils.saveResult(image_path, image[:, :, ::-1], polys, dirname=result_folder, textFileOnly=text_only) print("elapsed time : {}s".format(time.time() - t))