def eval2015(net, test_folder, result_folder, text_threshold=0.7, link_threshold=0.4, low_text=0.4): image_list, _, _ = file_utils.get_files(test_folder) t = time.time() res_gt_folder = os.path.join(result_folder, 'gt') res_mask_folder = os.path.join(result_folder, 'mask') # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\n') image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, text_threshold, link_threshold, low_text, True, False, 2240, 1.5, False) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = os.path.join(res_mask_folder, "/res_" + filename + '_mask.jpg') cv2.imwrite(mask_file, score_text) file_utils.saveResult15(image_path, polys, dirname=res_gt_folder) eval_2015(os.path.join(result_folder, 'gt')) print("elapsed time : {}s".format(time.time() - t))
def test_net(model=None, mapper=None, spaces=None, load_from=None, save_to=None): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') with torch.no_grad(): image_name_nums = [] res = [] img_lists, _, _, name_list = file_utils.get_files(load_from) for name in name_list: image_name_nums.append(name.split('_')[0]) for k, in_path in enumerate(img_lists): # data pre-processing for passing net image = imgproc.loadImage(in_path) image = imgproc.cvtColorGray(image) image = imgproc.tranformToTensor(image, opt.RECOG_TRAIN_SIZE).unsqueeze(0) image = image.to(device) y = model(image) _, pred = torch.max(y.data, 1) res.append(mapper[0][pred]) # method for saving result, MODE: file | stdout | all ltr_utils.display_stdout(chars=res, space=spaces, img_name=image_name_nums, MODE='file', save_to=save_to)
def PredictDetectionFrame(args,net,image_path): """ For test images in a folder """ result_folder = './result/' if not os.path.isdir(result_folder): os.mkdir(result_folder) t = time.time() # load data # for k, image_path in enumerate(image_list): #print("Test image {:d}/{:d}: {:s}".format(k+1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda,args) #print(f'boxes sare {bboxes}') # save score text filename, file_ext = 'test','.jpg' #mask_file = result_folder + "/res_" + filename + '_mask.jpg' #cv2.imwrite(mask_file, score_text) result_folder fl = file_utils.saveResultFrame(image_path, image[:,:,::-1], bboxes, dirname=result_folder) print("elapsed time : {}s".format(time.time() - t)) return fl
def PredictDetection(args, net, image_path, opt, reco): """ For test images in a folder """ image_list, _, _ = file_utils.get_files(args.test_folder) result_folder = './result/' if not os.path.isdir(result_folder): os.mkdir(result_folder) t = time.time() # load data # for k, image_path in enumerate(image_list): #print("Test image {:d}/{:d}: {:s}".format(k+1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda,args) # save score text #filename, file_ext = os.path.splitext(os.path.basename(image_path)) #mask_file = result_folder + "/res_" + filename + '_mask.jpg' #cv2.imwrite(mask_file, score_text) fl = file_utils.saveResult(image_path, image[:, :, ::-1], bboxes, opt, reco, dirname=result_folder) print("elapsed time detecting : {}s".format(time.time() - t)) log.info(f'elapsed time detecting : {time.time() - t}s') return fl
def test(modelpara): # load net net = CRAFT() # initialize print('Loading weights from checkpoint {}'.format(modelpara)) if args.cuda: net.load_state_dict(copyStateDict(torch.load(modelpara))) else: net.load_state_dict(copyStateDict(torch.load(modelpara, map_location='cpu'))) if args.cuda: net = net.cuda() net = torch.nn.DataParallel(net) cudnn.benchmark = False net.eval() t = time.time() # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k+1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = result_folder + "/res_" + filename + '_mask.jpg' #cv2.imwrite(mask_file, score_text) file_utils.saveResult(image_path, image[:,:,::-1], polys, dirname=result_folder) print("elapsed time : {}s".format(time.time() - t))
def process(img_path, city_id): # print("Process 1:") pre_img_path = preprocessing(img_path) time.sleep(1.5) # from IPython.display import Image # Image(pre_img_path) image = imgproc.loadImage(pre_img_path) if city_id == 1: # for example, TP.HCM crs = 9210 digitscomma = img_to_text(image) # print("dg",digitscomma,"\n","eng",eng) # print("Process 2") # listX, listY = X_Y_localization(listText) # X,Y = norm_X_Y(listX,listY) # # print(X,Y) # x,y = get_X_Y(X,Y) x,y = x_y_re(digitscomma) # tempx,tempy = x_y_re(eng) # print(x,y) if x != 0 or y != 0: lat, lng = vn2k_to_wgs83((x, y),crs) # print(lat, lng) # from geopy.geocoders import Nominatim # geolocator = Nominatim(user_agent="AIzaSyABrrKL3I5HZh7wx9QLCk7H5Rq0TrdtLjw") # location = geolocator.reverse(str(lat) + ', ' + str(lng)) # print(location.address) # print(lat,lng) return lat, lng else: return 0,0
def infer_detection(impath,net,refine_net,args): #CRAFT """ For test images in a folder """ image_list, _, _ = file_utils.get_files(impath) image_paths = [] image_names = [] #CUSTOMISE START start = impath result_folder = './Results/' data={} t = time.time() # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k+1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) image_name=os.path.relpath(image_path, start) bboxes, polys, score_text, det_scores = test.test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, args, refine_net) bbox_score={} index=0 for box,conf in zip(bboxes,det_scores): bbox_score[str(index)]={} bbox_score[str(index)]['detconf']=str(conf) bbox_score[str(index)]['box']=[] for coors in box: temp=[str(coors[0]),str(coors[1])] bbox_score[str(index)]['box'].append(temp) index+=1 data[image_name]=bbox_score # for box_num in range(len(bboxes)): # key = str (det_scores[box_num]) # item = bboxes[box_num] # bbox_score[key]=item # data['word_bboxes'][k]=bbox_score # save score text # filename, file_ext = os.path.splitext(os.path.basename(image_path)) # mask_file = result_folder + "/res_" + filename + '_mask.jpg' # cv2.imwrite(mask_file, score_text) # file_utils.saveResult(image_path, image[:,:,::-1], polys, dirname=result_folder) if not os.path.isdir('./Results'): os.mkdir('./Results') # data.to_csv('./Results_csv/data.csv', sep = ',', na_rep='Unknown') # print(data) with open('./Results/data.json', 'w') as jsonfile: json.dump(data, jsonfile) jsonfile.close() print("elapsed time : {}s".format(time.time() - t))
def test_data_transform(self, index): image = imgproc.loadImage(self.images[index]) image = imgproc.cvtColorGray(image) image = imgproc.tranformToTensor(img=image, size=self.size) label = self.labels[index] label_num = self.labels_num[index] return image, label_num
def test(modelpara, args=None, result_folder=None): # load net net_encoder = builder.build_encoder( arch='resnet50dilated', fc_dim=2048, weights='', ) net_decoder = builder.build_decoder( arch='c1', weights='', fc_dim=2048, num_class=2, ) net = SegmentationModule(net_encoder, net_decoder, False) print('Loading weights from checkpoint {}'.format(modelpara)) if args.cuda: net.load_state_dict(copyStateDict(torch.load(modelpara))) else: net.load_state_dict( copyStateDict(torch.load(modelpara, map_location='cpu'))) if args.cuda: net = net.cuda() net = torch.nn.DataParallel(net) cudnn.benchmark = False net.eval() t = time.time() # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) with torch.no_grad(): bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, args) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = result_folder + "/res_" + filename + '_mask.jpg' #cv2.imwrite(mask_file, score_text) file_utils.saveResult(image_path, image[:, :, ::-1], polys, dirname=result_folder) net.train() print("elapsed time : {}s".format(time.time() - t))
def detect(image_path): result_folder = "result/" + str(time.time()) + "/" if not os.path.isdir(result_folder): os.mkdir(result_folder) image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(result_folder, net, image, text_threshold, link_threshold, low_text, cuda, args.poly, refine_net) return result_folder
def __getitem__(self, idx): image = imgproc.loadImage(self.image_list[idx]) img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, self.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=self.mag_ratio) x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] # x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] return x, 1
def main(trained_model='weights/craft_mlt_25k.pth', text_threshold=0.7, low_text=0.4, link_threshold=0.4, cuda=True, canvas_size=1280, mag_ratio=1.5, poly=False, show_time=False, test_folder='/data/', refine=True, refiner_model='weights/craft_refiner_CTW1500.pth'): # if __name__ == '__main__': # load net net = CRAFT() # initialize print('Loading weights from checkpoint (' + trained_model + ')') if cuda: net.load_state_dict(copyStateDict(torch.load(trained_model))) else: net.load_state_dict(copyStateDict(torch.load(trained_model, map_location='cpu'))) if cuda: net = net.cuda() net = torch.nn.DataParallel(net) cudnn.benchmark = False net.eval() # LinkRefiner refine_net = None if refine: from refinenet import RefineNet refine_net = RefineNet() print('Loading weights of refiner from checkpoint (' + refiner_model + ')') if cuda: refine_net.load_state_dict(copyStateDict(torch.load(refiner_model))) refine_net = refine_net.cuda() refine_net = torch.nn.DataParallel(refine_net) else: refine_net.load_state_dict(copyStateDict(torch.load(refiner_model, map_location='cpu'))) refine_net.eval() poly = True t = time.time() # load data image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, refine_net) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = result_folder + "/res_" + filename + '_mask.jpg' cv2.imwrite(mask_file, score_text) final_img = file_utils.saveResult(image_path, image[:,:,::-1], polys, dirname=result_folder) print("elapsed time : {}s".format(time.time() - t))
def main(): # load net net = CRAFT() # initialize print('Loading weights from checkpoint (' + args.trained_model + ')') if args.cuda: net.load_state_dict(copyStateDict(torch.load(args.trained_model))) else: net.load_state_dict(copyStateDict(torch.load(args.trained_model, map_location='cpu'))) if args.cuda: net = net.cuda() net = torch.nn.DataParallel(net) cudnn.benchmark = False net.eval() # LinkRefiner refine_net = None if args.refine: from refinenet import RefineNet refine_net = RefineNet() print('Loading weights of refiner from checkpoint (' + args.refiner_model + ')') if args.cuda: refine_net.load_state_dict(copyStateDict(torch.load(args.refiner_model))) refine_net = refine_net.cuda() refine_net = torch.nn.DataParallel(refine_net) else: refine_net.load_state_dict(copyStateDict(torch.load(args.refiner_model, map_location='cpu'))) refine_net.eval() args.poly = True t = time.time() print(image_list) # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k+1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, refine_net) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = result_folder + "/res_" + filename + '_mask.jpg' cv2.imwrite(mask_file, score_text) file_utils.saveResult(image_path, image[:,:,::-1], polys, dirname=result_folder) # print("elapsed time : {}s".format(time.time() - t))
def detect(self, path): image = imgproc.loadImage(path) refine_net = None bboxes, polys, score_text = self.test_net(self.net, image, 0.7, 999999, 0.5, False, refine_net) bbox = [] for i, box in enumerate(polys): poly = np.array(box).astype(np.int32).reshape((-1)) bbox.append([poly[0] - 3, poly[1] - 5, poly[2], poly[5] + 5]) file_utils.saveResult(path, image[:, :, ::-1], polys, dirname="Detect_result/") bbox.sort(key=sorting_key) return bbox
def test(pre_model,res_dir = result_folder,mode=0): ## mode 0 = ic15 1 = ours # load net net = CRAFT() # initialize text_threshold = float(0.7) low_text = float(0.4) link_threshold = float(0.4) cuda = True poly = False print('Loading weights from checkpoint {}'.format(pre_model)) #loaded_model = tf.keras.models.load_model(pre_model) loaded_model = net.load_weights(pre_model).expect_partial() print(loaded_model) t = time.time() print("#############") print(net) if mode != 0: image_list = image_list_ours else: image_list = image_list_ic15 print(image_list) # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k+1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) filename, file_ext = os.path.splitext(os.path.basename(image_path)) save_file_name = filename bboxes, polys, score_text = test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, filename) # save score text mask_file = res_dir + "/res_" + filename + '_mask.jpg' cv2.imwrite(mask_file, score_text) file_utils.saveResult(image_path, image[:,:,::-1], polys, dirname=res_dir) print("Eval elapsed time : {}s".format(time.time() - t))
def representative_data_gen(): for file in os.listdir(dataset_path)[:10]: file_path = dataset_path + file image = imgproc.loadImage(file_path) image = cv2.resize(image, dsize=(800, 1280), interpolation=cv2.INTER_LINEAR) img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, 1280, interpolation=cv2.INTER_LINEAR, mag_ratio=1.5) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] x = x.cpu().detach().numpy() yield [x]
def runCraftNet(image_list): # image list is the folder containing the images args = argparse.Namespace( canvas_size=1280, cuda=False, link_threshold=0.4, low_text=0.4, mag_ratio=1.5, poly=False, refine=False, refiner_model='weights/craft_refiner_CTW1500.pth', show_time=False, test_folder='images', text_threshold=0.7, trained_model='craft_mlt_25k.pth') net = CRAFT() # initialize net.load_state_dict( copyStateDict(torch.load(args.trained_model, map_location='cpu'))) net.eval() # image_list, _, _ = file_utils.get_files(args.test_folder) t = time.time() # result_folder = './result/' # load data refine_net = None for k, image_path in enumerate(image_list): image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, refine_net) # print("elapsed time : {}s ".format(time.time() - t)) img = np.array(image[:, :, ::-1]) txt = [] for i, box in enumerate(polys): poly = np.array(box).astype(np.int32).reshape((-1)) strResult = ','.join([str(p) for p in poly]) txt.append(strResult) return [img, txt]
def __init__(self, args): filelist, _, _ = file_utils.list_files('./data/train/data') self.images = [] self.confmaps = [] self.scores_region = [] self.scores_link = [] for filename in filelist: # get datapath dataset = os.path.dirname(filename).split(os.sep)[-1] filenum = os.path.splitext(os.path.basename(filename)) label_dir = './data/train/ground_truth/{}/gt_{}/'.format( dataset, filenum) # If not exists, generate ground truth if not os.path.exists(label_dir): continue image = imgproc.loadImage(filename) score_region = torch.load(label_dir + 'region.pt') score_link = torch.load(label_dir + 'link.pt') conf_map = torch.load(label_dir + 'conf.pt') # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio) # Image Preprocess x = imgproc.normalizeMeanVariance(img_resized) x = x.transpose((2, 0, 1)) # [h, w, c] to [c, h, w] h, w, _ = img_resized.shape # GT reshape score_region = cv2.resize(score_region, dsize=(h / 2, w / 2)) score_link = cv2.resize(score_link, dsize=(h / 2, w / 2)) conf_map = cv2.resize(conf_map, dsize=(h / 2, w / 2)) self.scores_region.append(score_region) self.scores_link.append(score_link) self.confmaps.append(conf_map) self.images.append(x)
def runLineCut(imagePath, net, resultsPath): image = imgproc.loadImage(imagePath) maxsize = 500 if image.shape[0] > maxsize: image = cv2.resize( image, (maxsize, int(image.shape[1] * maxsize / image.shape[0]))) if image.shape[1] > maxsize: image = cv2.resize( image, (int(image.shape[0] * maxsize / image.shape[1]), maxsize)) oririnImage = getOriginImage(imagePath) bboxes, polys, score_text = test_net(net, image, 0.7, 0.4, 0.4, True, False) lines = [] while True: is_exit = False boxInLineTotal = [] for y in range(image.shape[1]): boxInLine = [] for i, box in enumerate(polys): box = np.array(box).astype(np.int32).tolist() if is_Box_In_Lines(box, lines): firstPoint = box[0] endPoint = box[2] if y >= firstPoint[1] and y <= endPoint[1]: boxInLine.append(box) if is_SubList(boxInLine, boxInLineTotal): boxInLineTotal = boxInLine if boxInLineTotal not in lines: is_exit = True lines.append(boxInLineTotal) if not is_exit: break for i, line in enumerate(lines): line = sorted(line, key=lambda box: box[0]) for ii, box in enumerate(line): firstPoint = box[0] endPoint = box[2] imageCroped = image[firstPoint[1]:endPoint[1], firstPoint[0]:endPoint[0]] cv2.imwrite(resultsPath + '/' + str(i) + '_' + str(ii) + '.png', imageCroped) return bboxes, polys, score_text
def inference(net): for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, refine_net) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = result_folder + "/res_" + filename + '_mask.jpg' cv2.imwrite(mask_file, score_text) file_utils.saveResult(image_path, image[:, :, ::-1], polys, dirname=result_folder)
def train_data_transform(self, index): image = imgproc.loadImage(self.images[index]) image = imgproc.cvtColorGray(image) # Data Augmentation Method - elastic distortion, image blur if self.distort: if random.randint(0, 1): image = self.distort_image(image) if self.blur: if random.randint(0, 1): blur_extent = 1 image = self.blur_image(image, blur_extent) image = imgproc.tranformToTensor(img=image, size=self.size) label = self.labels[index] label_num = self.labels_num[index] return image, label_num
def detectByImagePath(self, image_path, targetBoxes, qualityProfile=[1, 0, 0], debug=False): """ TODO: describe method """ image = imgproc.loadImage(image_path) for targetBox in targetBoxes: x = min(targetBox['x1'], targetBox['x2']) w = abs(targetBox['x2'] - targetBox['x1']) y = min(targetBox['y1'], targetBox['y2']) h = abs(targetBox['y2'] - targetBox['y1']) #print('x: {} w: {} y: {} h: {}'.format(x,w,y,h)) image_part = image[y:y + h, x:x + w] points = self.detectInBbox(image_part) propablyPoints = addCoordinatesOffset(points, x, y) targetBox['points'] = [] targetBox['imgParts'] = [] if (len(propablyPoints)): targetPointsVariants = makeRectVariants2( propablyPoints, h, w, qualityProfile) # targetBox['points'] = addCoordinatesOffset(points, x, y) # targetPointsVariants = [targetPoints, fixSideFacets(targetPoints)] if len(targetPointsVariants) > 1: imgParts = [ getCvZoneRGB(image, reshapePoints(rect, 1)) for rect in targetPointsVariants ] idx = detectBestPerspective( normalizePerspectiveImages(imgParts)) print('--------------------------------------------------') print('idx={}'.format(idx)) #targetBox['points'] = addoptRectToBbox2(targetPointsVariants[idx], image.shape,x,y) targetBox['points'] = targetPointsVariants[idx] targetBox['imgParts'] = imgParts else: targetBox['points'] = targetPointsVariants[0] return targetBoxes, image
def test(modelpara): # load net net = CRAFT() # initialize print('Loading weights from checkpoint {}'.format(modelpara)) #### # if args.cuda: # net.load_state_dict(copyStateDict(torch.load(modelpara))) # else: # net.load_state_dict(copyStateDict(torch.load(modelpara, map_location='cpu'))) # # if args.cuda: # net = net.cuda() # net = torch.nn.DataParallel(net) # cudnn.benchmark = False ### device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") net = net.to(device) net.eval() #stop update the weight of the neuron t = time.time() # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k+1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly) print("\n bboxes = ", bboxes, "\n poly = ", polys, "\n text = ", score_text, "\n text.shape = ", score_text.shape) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = result_folder + "/res_" + filename + '_mask.jpg' #cv2.imwrite(mask_file, score_text) print("save in" + result_folder) file_utils.saveResult(image_path, image[:,:,::-1], polys, dirname=result_folder) print("elapsed time : {}s".format(time.time() - t))
def reformat_input(image): if type(image) == str: if image.startswith('http://') or image.startswith('https://'): tmp, _ = urlretrieve(image, reporthook=printProgressBar( prefix='Progress:', suffix='Complete', length=50)) img_cv_grey = cv2.imread(tmp, cv2.IMREAD_GRAYSCALE) os.remove(tmp) else: img_cv_grey = cv2.imread(image, cv2.IMREAD_GRAYSCALE) image = os.path.expanduser(image) img = loadImage(image) # can accept URL elif type(image) == bytes: nparr = np.frombuffer(image, np.uint8) img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_cv_grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) elif type(image) == np.ndarray: if len(image.shape) == 2: # grayscale img_cv_grey = image img = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) elif len(image.shape) == 3 and image.shape[2] == 3: # BGRscale img = image img_cv_grey = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) elif len(image.shape) == 3 and image.shape[2] == 4: # RGBAscale img = image[:, :, :3] img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) img_cv_grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) else: LOGGER.warning( 'Invalid input type. Suppoting format = string(file path or url), bytes, numpy array' ) return img, img_cv_grey
def __getitem__(self, i): # Image loading image = imgproc.loadImage(self.images[i]) # Preprocess image img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, self.args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=self.args.mag_ratio) img_resized = imgproc.fill_canvas(img_resized, self.args.canvas_size) x = imgproc.normalizeMeanVariance(img_resized) x = torch.tensor(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] # Load labels label_dir = self.labels[i] region = torch.tensor(torch.load(label_dir + 'region.pt'), dtype=torch.float64) link = torch.tensor(torch.load(label_dir + 'link.pt'), dtype=torch.float64) conf = torch.tensor(torch.load(label_dir + 'conf.pt'), dtype=torch.float64) return x, region, link, conf
else: refine_net.load_state_dict( copyStateDict( torch.load(args.refiner_model, map_location='cpu'))) refine_net.eval() args.poly = True t = time.time() # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly, refine_net) # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = result_folder + "/res_" + filename + '_mask.jpg' cv2.imwrite(mask_file, score_text) file_utils.saveResult(image_path, image[:, :, ::-1], polys, dirname=result_folder)
def Detection(net, urlFilepath): try: #t = time.time() # CRAFT cuda_stats = False device = torch.device('cpu') # device = torch.device('cuda') if device.type == 'cpu': cuda_stats = False else: cuda_stats = True #"cuda":False, True를 False로 수정 args = {"trained_model":'/data/OCR_code/Pipeline/craft_mlt_25k.pth', "text_threshold":0.7, "low_text":0.4, "link_threshold":0.4, "cuda":cuda_stats, "canvas_size":1280, "mag_ratio": 1.5, "poly":False, "show_time":False, "test_folder": "/data/OCR_dir/", "filepath": 'Data//FoodDetection/data/text_detection/RDProject/ocr_1000056.jpg', "refine" : False, "refiner_model": 'weights/craft_refiner_CTW1500.pth' } #date = time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())) filename = urlFilepath.split("/")[-1] # 저장 된 이미지 확인 #filepath = "/Data/CRAFT_process/test_1/01_images/"+str(date)+filename.rstrip() filepath = urlFilepath if os.path.isfile(filepath): #print( "Yes. it is a file") ##if sys.argv[1] is null: # filepath = args["filepath"] # image_list = [args.filepath] image_list = [filepath] image_names = [] image_paths = [] # CUSTOMISE START ##start = '/Data/CRAFT_process/test_1/01_images' start = filepath.split(filename)[0] # 파일 경로에 따라 Flexible하게 결정 for num in range(len(image_list)): image_names.append(os.path.relpath(image_list[num], start)) ###result_folder = args.test_folder+'02_map' ###if not os.path.isdir(result_folder): ### os.mkdir(result_folder) crop_path = start+'%s_crop'%(filename.split('.')[0]) if not os.path.isdir(crop_path): os.mkdir(crop_path) data = pd.DataFrame(columns=['image_name', 'word_bboxes', 'pred_words', 'align_text']) data['image_name'] = image_names box_idx = 0 bbox_dict = {} # load data for k, image_path in enumerate(image_list): # print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\r') image = imgproc.loadImage(image_path) bboxes, polys, score_text, det_scores = test_.test_net(net, image, args["text_threshold"], args["link_threshold"], args["low_text"], args["cuda"], args["poly"], args) # refinenet = None bbox_score = {} bbox_list = [] for box_num in range(len(bboxes)): if det_scores[box_num] < 0.85: # score filtering continue key = str(det_scores[box_num]) item = bboxes[box_num] bbox_dict[box_idx] = item.tolist() box_idx += 1 bbox_score[key] = item data['word_bboxes'][k] = bbox_score csv_file = start+'%s_data.csv'%(filename.split('.')[0]) ### 처리한 이미지 이름_data.csv data.to_csv(csv_file, sep=',', na_rep='Unknown') del data data = pd.read_csv(csv_file) # Crop for image_num in range(data.shape[0]): image = cv2.imread(os.path.join(start, data['image_name'][image_num])) image_name = data['image_name'][image_num].strip('.jpg') score_bbox = data['word_bboxes'][image_num].split('),') cropped_imgs = crop_words_.generate_words(image_name, score_bbox, image, crop_path, bbox_show=False) print("=========Text Detection and Crop Ends ============") # else: # raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), filepath) except Exception as e: # 모든 예외의 에러 메시지를 출력할 때는 Exception을 사용 # print('예외가 발생했습니다.', e) traceback.print_exc() return str(e), 400 return [bbox_dict, cropped_imgs], 200
def test(modelpara): # load net net = CRAFT() # initialize print('Loading weights from checkpoint {}'.format(modelpara)) if args.cuda: net.load_state_dict(copyStateDict(torch.load(modelpara))) else: net.load_state_dict( copyStateDict(torch.load(modelpara, map_location='cpu'))) if args.cuda: net = net.cuda() net = torch.nn.DataParallel(net) cudnn.benchmark = False net.eval() t = time.time() # load data for k, image_path in enumerate(image_list): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\n') image = imgproc.loadImage(image_path) res = image.copy() # bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly) gh_pred, bboxes_pred, polys_pred, size_heatmap = test_net( net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly) filename, file_ext = os.path.splitext(os.path.basename(image_path)) result_dir = os.path.join(result_folder, filename) os.makedirs(result_dir, exist_ok=True) for gh_img, field in zip(gh_pred, CLASSES): img = imgproc.cvt2HeatmapImg(gh_img) img_path = os.path.join(result_dir, 'res_{}_{}.jpg'.format(filename, field)) cv2.imwrite(img_path, img) h, w = image.shape[:2] img = cv2.resize(image, size_heatmap)[::, ::, ::-1] img_path = os.path.join(result_dir, 'res_{}.jpg'.format(filename, field)) cv2.imwrite(img_path, img) # # save score text # filename, file_ext = os.path.splitext(os.path.basename(image_path)) # mask_file = result_folder + "/res_" + filename + '_mask.jpg' # cv2.imwrite(mask_file, score_text) res = cv2.resize(res, size_heatmap) for polys, field in zip(polys_pred, CLASSES): TEXT_WIDTH = 10 * len(field) + 10 TEXT_HEIGHT = 15 polys = np.int32([poly.reshape((-1, 1, 2)) for poly in polys]) res = cv2.polylines(res, polys, True, (0, 0, 255), 2) for poly in polys: poly[1, 0] = [poly[0, 0, 0] - 10, poly[0, 0, 1]] poly[2, 0] = [poly[0, 0, 0] - 10, poly[0, 0, 1] + TEXT_HEIGHT] poly[3, 0] = [ poly[0, 0, 0] - TEXT_WIDTH, poly[0, 0, 1] + TEXT_HEIGHT ] poly[0, 0] = [poly[0, 0, 0] - TEXT_WIDTH, poly[0, 0, 1]] res = cv2.fillPoly(res, polys, (224, 224, 224)) # print(poly) for poly in polys: res = cv2.putText(res, field, tuple(poly[3, 0] + [+5, -5]), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 0), thickness=1) res_file = os.path.join(result_dir, 'res_{}_bbox.jpg'.format(filename, field)) cv2.imwrite(res_file, res[::, ::, ::-1]) # break # file_utils.saveResult(image_path, image[:,:,::-1], polys, dirname=result_folder) print("elapsed time : {}s".format(time.time() - t))
def ground_truth(args): # initiate pretrained network net = CRAFT() # initialize print('Loading weights from checkpoint (' + args.trained_model + ')') if args.cuda: net.load_state_dict(test.copyStateDict(torch.load(args.trained_model))) else: net.load_state_dict(test.copyStateDict(torch.load(args.trained_model, map_location='cpu'))) if args.cuda: net = net.cuda() net = torch.nn.DataParallel(net) cudnn.benchmark = False net.eval() filelist, _, _ = file_utils.list_files('/home/ubuntu/Kyumin/Autotation/data/IC13/images') for img_name in filelist: # get datapath if 'train' in img_name: label_name = img_name.replace('images/train/', 'labels/train/gt_').replace('jpg', 'txt') else: label_name = img_name.replace('images/test/', 'labels/test/gt_').replace('jpg', 'txt') label_dir = img_name.replace('Autotation', 'craft').replace('images', 'labels').replace('.jpg', '/') os.makedirs(label_dir, exist_ok=True) image = imgproc.loadImage(img_name) gt_boxes = [] gt_words = [] with open(label_name, 'r', encoding='utf-8-sig') as f: lines = f.readlines() for line in lines: if 'IC13' in img_name: # IC13 gt_box, gt_word, _ = line.split('"') if 'train' in img_name: x1, y1, x2, y2 = [int(a) for a in gt_box.strip().split(' ')] else: x1, y1, x2, y2 = [int(a.strip()) for a in gt_box.split(',') if a.strip().isdigit()] gt_boxes.append(np.array([[x1, y1], [x2, y1], [x2, y2], [x1, y2]])) gt_words.append(gt_word) elif 'IC15' in img_name: gt_data = line.strip().split(',') gt_box = gt_data[:8] if len(gt_data) > 9: gt_word = ','.join(gt_data[8:]) else: gt_word = gt_data[-1] gt_box = [int(a) for a in gt_box] gt_box = np.reshape(np.array(gt_box), (4, 2)) gt_boxes.append(gt_box) gt_words.append(gt_word) score_region, score_link, conf_map = generate_gt(net, image, gt_boxes, gt_words, args) torch.save(score_region, label_dir + 'region.pt') torch.save(score_link, label_dir + 'link.pt') torch.save(conf_map, label_dir + 'conf.pt')
def applyCraft(image_file): # Initialize CRAFT parameters text_threshold = 0.7 low_text = 0.4 link_threshold = 0.4 cuda = False canvas_size = 1280 mag_ratio = 1.5 # if text image present curve --> poly=true poly = False refine = False show_time = False refine_net = None trained_model_path = './app/CRAFT/craft_mlt_25k.pth' net = CRAFT() net.load_state_dict( copyStateDict(torch.load(trained_model_path, map_location='cpu'))) net.eval() image = imgproc.loadImage(image_file) poly = False refine = False show_time = False refine_net = None bboxes, polys, score_text = test_net(net, canvas_size, mag_ratio, image, text_threshold, link_threshold, low_text, cuda, poly, refine_net) # Compute coordinate of central point in each bounding box returned by CRAFT # Purpose: easier for us to make cluster in G-DBScan step poly_indexes = {} central_poly_indexes = [] for i in range(len(polys)): poly_indexes[i] = polys[i] x_central = (polys[i][0][0] + polys[i][1][0] + polys[i][2][0] + polys[i][3][0]) / 4 y_central = (polys[i][0][1] + polys[i][1][1] + polys[i][2][1] + polys[i][3][1]) / 4 central_poly_indexes.append({i: [int(x_central), int(y_central)]}) # for i in central_poly_indexes: # print(i) # For each of these cordinates convert them to new Point instances X = [] for idx, x in enumerate(central_poly_indexes): point = Point(x[idx][0], x[idx][1], idx) X.append(point) # Cluster these central points clustered = GDBSCAN(Points(X), n_pred, 1, w_card) cluster_values = [] for cluster in clustered: sort_cluster = sorted(cluster, key=lambda elem: (elem.x, elem.y)) max_point_id = sort_cluster[len(sort_cluster) - 1].id min_point_id = sort_cluster[0].id max_rectangle = sorted(poly_indexes[max_point_id], key=lambda elem: (elem[0], elem[1])) min_rectangle = sorted(poly_indexes[min_point_id], key=lambda elem: (elem[0], elem[1])) right_above_max_vertex = max_rectangle[len(max_rectangle) - 1] right_below_max_vertex = max_rectangle[len(max_rectangle) - 2] left_above_min_vertex = min_rectangle[0] left_below_min_vertex = min_rectangle[1] if (int(min_rectangle[0][1]) > int(min_rectangle[1][1])): left_above_min_vertex = min_rectangle[1] left_below_min_vertex = min_rectangle[0] if (int(max_rectangle[len(max_rectangle) - 1][1]) < int( max_rectangle[len(max_rectangle) - 2][1])): right_above_max_vertex = max_rectangle[len(max_rectangle) - 2] right_below_max_vertex = max_rectangle[len(max_rectangle) - 1] cluster_values.append([ left_above_min_vertex, left_below_min_vertex, right_above_max_vertex, right_below_max_vertex ]) image = imgproc.loadImage(image_file) img = np.array(image[:, :, ::-1]) img = img.astype('uint8') ocr_res = [] for i, box in enumerate(cluster_values): poly = np.array(box).astype(np.int32).reshape((-1)) poly = poly.reshape(-1, 2) rect = cv2.boundingRect(poly) x, y, w, h = rect cropped = img[y:y + h, x:x + w].copy() # Preprocess cropped segment cropped = cv2.resize(cropped, None, fx=5, fy=5, interpolation=cv2.INTER_LINEAR) cropped = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY) cropped = cv2.GaussianBlur(cropped, (3, 3), 0) cropped = cv2.bilateralFilter(cropped, 5, 25, 25) cropped = cv2.dilate(cropped, None, iterations=1) cropped = cv2.threshold(cropped, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] #cropped = cv2.threshold(cropped, 90, 255, cv2.THRESH_BINARY)[1] #cropped = cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB) ocr_res.append(pytesseract.image_to_string(cropped, lang='eng')) return ocr_res