def main(args): np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) cudnn.benchmark = True torch.backends.cudnn.deterministic = True args.cuda = args.cuda and torch.cuda.is_available() if args.cuda: print('using cuda.') torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') # Create data loaders if args.height is None or args.width is None: args.height, args.width = (32, 100) dataset_info = DataInfo(args.voc_type) # Create model model = ModelBuilder(arch=args.arch, rec_num_classes=dataset_info.rec_num_classes, sDim=args.decoder_sdim, attDim=args.attDim, max_len_labels=args.max_len, eos=dataset_info.char2id[dataset_info.EOS], STN_ON=args.STN_ON) # Load from checkpoint if args.resume: checkpoint = load_checkpoint(args.resume) model.load_state_dict(checkpoint['state_dict']) if args.cuda: device = torch.device("cuda") model = model.to(device) model = nn.DataParallel(model) # Evaluation model.eval() img = image_process(args.image_path) with torch.no_grad(): img = img.to(device) input_dict = {} input_dict['images'] = img.unsqueeze(0) # TODO: testing should be more clean. # to be compatible with the lmdb-based testing, need to construct some meaningless variables. rec_targets = torch.IntTensor(1, args.max_len).fill_(1) rec_targets[:, args.max_len - 1] = dataset_info.char2id[dataset_info.EOS] input_dict['rec_targets'] = rec_targets input_dict['rec_lengths'] = [args.max_len] output_dict = model(input_dict) pred_rec = output_dict['output']['pred_rec'] pred_str, _ = get_str_list(pred_rec, input_dict['rec_targets'], dataset=dataset_info) print('Recognition result: {0}'.format(pred_str[0]))
def main(args): np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) cudnn.benchmark = True torch.backends.cudnn.deterministic = True args.cuda = args.cuda and torch.cuda.is_available() # args.cuda = False if args.cuda: print('using cuda.') torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') # Create data loaders if args.height is None or args.width is None: args.height, args.width = (32, 100) dataset_info = DataInfo(args.voc_type) # Create model model = ModelBuilder(arch=args.arch, rec_num_classes=dataset_info.rec_num_classes, sDim=args.decoder_sdim, attDim=args.attDim, max_len_labels=args.max_len, eos=dataset_info.char2id[dataset_info.EOS], STN_ON=args.STN_ON, encoder_block=4, decoder_block=4) # Load from checkpoint if args.resume: checkpoint = load_checkpoint(args.resume) model.load_state_dict(checkpoint['state_dict']) if args.cuda: device = torch.device("cuda") model = model.to(device) model = nn.DataParallel(model) #Save model torch.save(model, "model.pth") # Evaluation model.eval() img = image_process(args.image_path) with torch.no_grad(): img = img.to(device) input_dict = {} input_dict['images'] = img.unsqueeze(0) # TODO: testing should be more clean. # to be compatible with the lmdb-based testing, need to construct some meaningless variables. rec_targets = torch.IntTensor(1, args.max_len).fill_(1) rec_targets[:, args.max_len - 1] = dataset_info.char2id[dataset_info.EOS] input_dict['rec_targets'] = rec_targets input_dict['rec_lengths'] = [args.max_len] start = timeit.timeit() output_dict = model(input_dict) end = timeit.timeit() pred_rec = output_dict['output']['pred_rec'] import cv2 from matplotlib import cm import matplotlib.pyplot as plt rec_im = output_dict['output']['rectified_images'].squeeze().transpose( 2, 0) rec_im = rec_im.transpose(1, 0) rec_im = (rec_im * 0.5 + 0.5) * 255 rec_im = rec_im.cpu().detach().numpy() print(rec_im.shape) # new_im = Image.fromarray(rec_im) # plt.imsave("rec_im.png", rec_im) # print(rec_im*255) cv2.imwrite("rec.png", rec_im) pred_str, _ = get_str_list(pred_rec, input_dict['rec_targets'], dataset=dataset_info) print('Recognition result: {0}'.format(pred_str[0])) print('{:f}'.format(end - start))
def main(args): np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) cudnn.benchmark = True torch.backends.cudnn.deterministic = True args.cuda = args.cuda and torch.cuda.is_available() if args.cuda: print('using cuda.') torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') # Create data loaders if args.height is None or args.width is None: args.height, args.width = (32, 100) dataset_info = DataInfo(args.voc_type) # Create model model = ModelBuilder(arch=args.arch, rec_num_classes=dataset_info.rec_num_classes, sDim=args.decoder_sdim, attDim=args.attDim, max_len_labels=args.max_len, eos=dataset_info.char2id[dataset_info.EOS], STN_ON=args.STN_ON) # Load from checkpoint if args.resume: checkpoint = load_checkpoint(args.resume) model.load_state_dict(checkpoint['state_dict']) if args.cuda: device = torch.device("cuda") model = model.to(device) model = nn.DataParallel(model) # Evaluation model.eval() images_path = args.images_path box_path = args.box_path imgs = os.listdir(images_path) for img in imgs: image_path = os.path.join(images_path, img) print("Image path:", image_path) gt_name = img.replace('jpg', 'txt') gt_path = os.path.join(box_path, gt_name) recognizer(image_path, gt_path, model, device, dataset_info, savedir="outputs/", only_price=False)
def detect_NSyolov3(save_txt=False, save_img=True): img_size = (960, 960) if ONNX_EXPORT else opt.img_size # (320, 192) or (416, 256) or (608, 352) for (height, width) out, source, weights, half, view_img,save_img,save_txt = opt.output, opt.source, opt.weights, opt.half, opt.view_img,opt.save_img,opt.save_txt webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize device = torch_utils.select_device(device='cpu' if ONNX_EXPORT else opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Initialize model model = Darknet(opt.cfg, img_size) print('Load NSYOLOv3 Model ...') # Load weights attempt_download(weights) if weights.endswith('.pt'): # pytorch format model.load_state_dict(torch.load(weights, map_location=device)['model']) else: # darknet format _ = load_darknet_weights(model, weights) # Eval mode model.to(device).eval() print('NSYOLOv3 加载成功!') model_TSEAST = EAST_PVANet(inception_mid = False,inception_end = True,version=1,conv1_5=False,acb_block = False,dcn =False,with_modulated_dcn=True).to(device) print('Load TSEAST Model ...') model_TSEAST.load_state_dict(torch.load('pths/TSEAST.pth')) model_TSEAST.to(device).eval() print('TSEAST 加载成功!') np.random.seed(1001) torch.manual_seed(1001) torch.cuda.manual_seed(1001) torch.cuda.manual_seed_all(1001) cudnn.benchmark = True torch.backends.cudnn.deterministic = True torch.set_default_tensor_type('torch.cuda.FloatTensor') dataset_info = DataInfo('Traffic_Sign') print('Load ASTER Model ...') # Create model model_ASTER = ModelBuilder(arch='ResNet_ASTER', rec_num_classes=dataset_info.rec_num_classes, sDim=512, attDim=512, max_len_labels=22, eos=dataset_info.char2id[dataset_info.EOS], STN_ON=True) model_ASTER.load_state_dict(torch.load('pths/ASTER.pth')) device = torch.device("cuda") model_ASTER = model_ASTER.to(device) model_ASTER = nn.DataParallel(model_ASTER) model_ASTER.eval() print('ASTER 加载成功!') # Export mode if ONNX_EXPORT: img = torch.zeros((1, 3) + img_size) # (1, 3, 320, 192) torch.onnx.export(model, img, 'pths/export.onnx', verbose=True) return # Half precision half = half and device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=img_size, half=half) else: save_img = opt.save_img dataset = LoadImages(source, img_size=img_size, half=half) # Get classes and colors classes = ['Text-Based Traffic Sign']#load_classes(parse_data_cfg(opt.data)['names']) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))] # Run inference t0 = time.time() for path, img, im0s, vid_cap in dataset: t = time.time() # Get detections img = torch.from_numpy(img).to(device) if img.ndimension() == 3: img = img.unsqueeze(0) pred, _ = model(img) if opt.half: pred = pred.float() for i, det in enumerate(non_max_suppression(pred, opt.conf_thres, opt.nms_thres)): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i] else: p, s, im0 = path, '', im0s print(s) image_ori_PIL = Image.fromarray(cv2.cvtColor(im0,cv2.COLOR_BGR2RGB)) plot_img = image_ori_PIL save_path = str(Path(out) / Path(p).name) # s += '%gx%g ' % img.shape[2:] # print string if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '检测到 %g %s' % (n, '个文字类型交通标志') # add to string print(s) # Write results for *xyxy, conf, _, cls in det: label = '%s %.2f' % (classes[int(cls)], conf) img_east = image_ori_PIL.crop(list(map(int,xyxy))) boxes = detect_TSEAST(img_east, model_TSEAST, device) # if boxes is None: # # print('图片中 部分交通牌上 未检测 到文字 ! ', end = ' ') # continue plot_img = plot_boxes(plot_img,xyxy,boxes)############画图 if boxes is not None and xyxy is not None: for i,box in enumerate (boxes): pts1 = np.float32([[box[0]+xyxy[0], box[1]+xyxy[1]], [box[2]+xyxy[0], box[3]+xyxy[1]], [box[4]+xyxy[0], box[5]+xyxy[1]], [box[6]+xyxy[0], box[7]+xyxy[1]]]) w1 = np.sqrt(np.sum((box[2]-box[0])**2)) w2 = np.sqrt(np.sum((box[6]-box[4])**2)) h1 = np.sqrt(np.sum((box[7]-box[1])**2)) h2 = np.sqrt(np.sum((box[5]-box[3])**2)) w = int((w1+w2)//2) h = int((h1+h2)//2) pts2 = np.float32(([0,0],[w,0],[w,h],[0,h])) M = cv2.getPerspectiveTransform(pts1,pts2) dst = cv2.warpPerspective(im0,M,(w,h)) img = image_process(dst) # cv2.imwrite('/home/zj/OCR/projects/EAST/ICDAR_2015/temp/'+str(i)+'.jpg',dst) with torch.no_grad(): img = img.cuda() input_dict = {} input_dict['images'] = img.unsqueeze(0) rec_targets = torch.IntTensor(1, 22).fill_(1) rec_targets[:,22-1] = dataset_info.char2id[dataset_info.EOS] input_dict['rec_targets'] = rec_targets input_dict['rec_lengths'] = [22] output_dict = model_ASTER(input_dict) pred_rec = output_dict['output']['pred_rec'] pred_str, _ = get_str_list(pred_rec, input_dict['rec_targets'], dataset=dataset_info) print('Recognition result: {0} '.format(pred_str[0]),end=' ') box =list(map(int,[box[0]+xyxy[0], box[1]+xyxy[1], box[2]+xyxy[0], box[3]+xyxy[1], box[4]+xyxy[0], box[5]+xyxy[1], box[6]+xyxy[0], box[7]+xyxy[1]])) print(box,sep=',') if save_txt: # Write to file with open(str(Path(out))+'/' + 'results.txt', 'a') as file: file.write(('%s %s %g %g %g %g %g %g %g %g ' + '\n') % (path,pred_str[0] ,*box)) if save_img: plot_img.save(save_path) else: print('图片中 未检测 到文字型交通标志 !', end = ' ') print('Done. (%.3fs)' % (time.time() - t)) # Stream results # if view_img: # cv2.imshow(p, im0) # # Save results (image with detections) # if save_img: # if dataset.mode == 'images': # cv2.imwrite(save_path, im0) # else: # if vid_path != save_path: # new video # vid_path = save_path # if isinstance(vid_writer, cv2.VideoWriter): # vid_writer.release() # release previous video writer # fps = vid_cap.get(cv2.CAP_PROP_FPS) # w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) # vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + out + ' ' + save_path) print('All Done. (%.3fs)' % (time.time() - t0))
def main(args): np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) cudnn.benchmark = True torch.backends.cudnn.deterministic = True args.cuda = args.cuda and torch.cuda.is_available() if args.cuda: print('using cuda.') torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') # Create data loaders if args.height is None or args.width is None: args.height, args.width = (32, 100) dataset_info = DataInfo(args.voc_type) print(dataset_info.char2id) # Create model model = ModelBuilder(arch=args.arch, rec_num_classes=dataset_info.rec_num_classes, sDim=args.decoder_sdim, attDim=args.attDim, max_len_labels=args.max_len, eos=dataset_info.char2id[dataset_info.EOS], STN_ON=args.STN_ON) # Load from checkpoint if args.resume: checkpoint = load_checkpoint(args.resume) model.load_state_dict(checkpoint['state_dict']) if args.cuda: device = torch.device("cuda") model = model.to(device) model = nn.DataParallel(model) # Evaluation model.eval() try: test_list_file = open(os.path.join(args.image_path, 'annotation_test.txt'), 'r') test_list = test_list_file.read().splitlines() test_list_file.close() except IOError: test_list = os.listdir(args.image_path) # print(test_list) data_n = min(100, len(test_list)) aster_correct_cnt = 0 tesseract_correct_cnt = 0 custom_oem_psm_config = '--oem 3 --psm 7' for test_name in tqdm(test_list[:data_n]): img_path = os.path.join(args.image_path, test_name).split(' ')[0] target_str = img_path.split('_')[-2] print(img_path, target_str) img = image_process(img_path) with torch.no_grad(): img = img.to(device) input_dict = {} input_dict['images'] = img.unsqueeze(0) # TODO: testing should be more clean. # to be compatible with the lmdb-based testing, need to construct some meaningless variables. rec_targets = torch.IntTensor(1, args.max_len).fill_(1) rec_targets[:, args.max_len - 1] = dataset_info.char2id[dataset_info.EOS] input_dict['rec_targets'] = rec_targets input_dict['rec_lengths'] = [args.max_len] output_dict = model(input_dict) pred_rec = output_dict['output']['pred_rec'] # print(pred_rec) pred_str, _ = get_str_list(pred_rec, input_dict['rec_targets'], dataset=dataset_info, lower_flag=False) if pred_str[0] == target_str: aster_correct_cnt += 1 img = load_image_in_PIL(img_path).convert('RGB') detected_str = pytesseract.image_to_string(img, config=custom_oem_psm_config) # print(i, detected_str, dataset_info['id2char'][predicted[i].item()], dataset_info['id2char'][sample['target'][i].item()]) if detected_str == target_str: tesseract_correct_cnt += 1 print(f'GT: {target_str}, ASTER: {pred_str[0]}, Tesseract: {detected_str}') if detected_str == target_str: print('===================== correct') print(f'Aster acc: {aster_correct_cnt} / {data_n}. {aster_correct_cnt/data_n}') print(f'Tesseract acc: {tesseract_correct_cnt} / {data_n}. {tesseract_correct_cnt/data_n}')