def save_label(self, img_path, label): save_path = img_path.replace('img', 'save') if not os.path.exists(os.path.split(save_path)[0]): os.makedirs(os.path.split(save_path)[0]) img = draw_bbox(img_path, label) cv2.imwrite(save_path, img) return img
def run_detection_from_image(self, filenames, save_faces=True, outdir='./', plot_landmarks=False, show_frame=False): for filename in tqdm(filenames): # read the image image = Image.open(filename).convert('RGB') # create an image array copy so that we can use OpenCV functions on it image_array = np.array(image, dtype=np.float32) # cv2 image color conversion image_array = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR) faces, probs, bounding_boxes, landmarks = self.fast_mtcnn(image_array, save_faces=save_faces, id=self.uuid, outdir=outdir, return_all=self.return_all) # draw the bounding boxes around the faces try: image_array = utils.draw_bbox(bounding_boxes, image_array, probs[0]) if plot_landmarks: image_array = utils.plot_landmarks(landmarks, image_array) except: pass # show the image if show_frame: cv2.imshow('Image', image_array / 255.0) cv2.waitKey(0)
def detect(image_path=''): main_path = os.path.abspath(os.path.join(os.getcwd())) image_result_path = os.path.join(main_path, 'result/step2/image/') label_result_path = os.path.join(main_path, 'result/step2/label/') start = time.time() model_path = os.path.join(main_path, 'detection/output/best_loss0.000151.pth') image_name = image_path.split('/')[-1] image_result_path += image_name label_result_path = label_result_path + image_name[:-3] + 'txt' # 初始化网络 net = CTPN_Model(pretrained=False) model = Pytorch_model(model_path, net=net, gpu_id=None) boxes_list, t = model.predict(image_path) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) with open(label_result_path, "w") as f: for index, point in enumerate(boxes_list): box = point[0].astype(int) box[0][0] = max(box[0][0] - 10, 0) box[3][0] = max(box[3][0] - 10, 0) line = ",".join(str(k) for lst in box for k in lst) boxes_list[index][0] = box.astype(int) line += "\r\n" f.writelines(line) image = draw_bbox(image_path, boxes_list, color=(0, 0, 255), mode=1) cv2.imwrite(image_result_path, image) print('Detect Finished.') print('Created Image: ', image_result_path) print('Created Text: ', label_result_path)
def gen_img(self): word, font, word_size = self.pick_font() # Background's height should much larger than raw word image's height, # to make sure we can crop full word image after apply perspective bg = self.gen_bg(width=word_size[0] * 8, height=word_size[1] * 8) if self.vertical: word_img, text_box_pnts, word_color = self.draw_vertical_text_on_bg( word, font, bg) else: word_img, text_box_pnts, word_color = self.draw_horizontal_text_on_bg( word, font, bg) if apply(self.cfg.line): word_img, text_box_pnts = self.liner.apply(word_img, text_box_pnts, word_color) word_img, img_pnts_transformed, text_box_pnts_transformed = \ self.apply_perspective_transform(word_img, text_box_pnts, max_x=self.cfg.perspective_transform.max_x, max_y=self.cfg.perspective_transform.max_y, max_z=self.cfg.perspective_transform.max_z, gpu=self.gpu) if self.debug: word_img = draw_box(word_img, img_pnts_transformed, (0, 255, 0)) word_img = draw_box(word_img, text_box_pnts_transformed, (0, 0, 255)) _, crop_bbox = self.crop_img(word_img, text_box_pnts_transformed) word_img = draw_bbox(word_img, crop_bbox, (255, 0, 0)) else: word_img, crop_bbox = self.crop_img(word_img, text_box_pnts_transformed) if apply(self.cfg.noise): word_img = np.clip(word_img, 0., 255.) word_img = self.noiser.apply(word_img) blured = False if apply(self.cfg.blur): blured = True word_img = self.apply_blur_on_output(word_img) if not blured: if apply(self.cfg.prydown): word_img = self.apply_prydown(word_img) word_img = np.clip(word_img, 0., 255.) if apply(self.cfg.reverse_color): word_img = self.reverse_img(word_img) return word_img, word
def predict(): time_str = time.strftime('%Y-%m-%dT%H:%M:%S', time.localtime(time.time())) if flask.request.method == 'POST': try: start = time.time() # 入参 {"imgData": img_base64_str} data = json.loads(flask.request.data) t1 = time.time() # BGR img = read_base64(data['imgData'], mode='opencv') t2 = time.time() preds, boxes_list, score_list, det_time = det_model.predict(img, is_output_polygon=False, short_size=args.det_short_size) if args.debug: draw_img = draw_bbox(img, boxes_list) result = [] rec_time = 0 for i, box in enumerate(boxes_list): rec_img = CropWordBox.crop_image_by_bbox(img, box, args.rec_crop_ratio) text, prob, t = rec_model.predict(rec_img) prob = round(prob, 3) rec_time += t result.append({'id':time_str+ '_' + str(i), 'box':box.tolist(), 'recognition':text, 'prob':prob}) if args.debug: draw_img = cv2ImgAddText(draw_img, text, (box[0][0], box[0][1]-40), textColor=(255, 255, 0), textSize=40) draw_img = cv2ImgAddText(draw_img, f'{prob:.3f}', (box[3][0], box[3][1]+5), textColor=(255, 255, 0), textSize=40) if args.debug: cv2.imwrite(os.path.join('debug/draw_img', 'draw_' + time_str + '.jpg'), draw_img) cv2.imwrite(os.path.join('debug/org_img', time_str + '.png'), img) logger.info(f'get img time: {(t1-start)*1000: .1f}ms \n' f'read base64 img time: {(t2-t1)*1000: .1f}ms \n' f'det preprocess time: {det_time[0]*1000: .1f}ms \n' f'det inference time: {det_time[1]*1000: .1f}ms \n' f'det postprocess time: {det_time[2]*1000: .1f}ms \n' f'det total time: {det_time[3]*1000: .1f}ms \n' f'rec total time: {rec_time*1000: .1f}ms \n') end = time.time() out = {'data':result, 'code':1, 'message':'', 'getImageTime':time_str} logger.info(f'total cost time: {(end - start)*1000: .1f}ms') logger.info(pprint.pformat(out)) logger.info('========================================================================') if args.file_record: file_record.write(str(out)) return json.dumps(out, ensure_ascii=False) except: out = {'code':0, 'message':traceback.format_exc(), 'getImageTime':time_str} logger.error(traceback.format_exc()) logger.info('========================================================================') return json.dumps(out, ensure_ascii=False) else: out = {'code':0, 'message':'request method must be post', 'getImageTime':time_str} logger.error('request method must be post') logger.info('========================================================================') return json.dumps(out, ensure_ascii=False)
def run_detection_from_video_file(self, video_file, outdir='./', save_faces=True, profiling=False, plot_landmarks=False): print("[INFO] Loading video file") cap = cv2.VideoCapture(video_file) profiler = FPS() if not cap.isOpened(): print('Error while trying to read video. Please check path again') frame_count, total_fps, faces_detected = 0, 0, 0 # to count total frames # read until end of video while cap.isOpened(): # capture each frame of the video ret, frame = cap.read() if ret: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) profiler.start() faces, probs, bounding_boxes, landmarks = self.fast_mtcnn(frame, save_faces=save_faces, id=self.uuid, outdir=outdir, return_all=self.return_all) if faces is not None: faces_detected += len(faces) profiler.update(1) # color conversion for OpenCV frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # draw the bounding boxes around the faces try: frame = utils.draw_bbox(bounding_boxes, frame, probs[0]) if plot_landmarks: frame = utils.plot_landmarks(landmarks, frame) except: pass cv2.imshow('Face detection frame', frame) # press `q` to exit if cv2.waitKey(1) & 0xFF == ord('q'): break else: break profiler.stop() # release VideoCapture() print("[INFO] cleaning up...") cap.release() cv2.destroyAllWindows() # calculate and print the average FPS if profiling: print(f"Average FPS: {profiler.fps():.3f}")
def detect(image_path='', label_path=''): main_path = os.path.abspath(os.path.join(os.getcwd())) image_result_path = os.path.join(main_path, 'media/image/CEIR/result/step2/image/') label_result_path = os.path.join(main_path, 'media/image/CEIR/result/step2/label/') output_path = image_result_path print(output_path) output_image_path = os.path.join(main_path, 'media/image/CEIR/result/step2/reshape/') output_label_path = label_result_path import config from model import CTPN_Model import matplotlib.pyplot as plt from utils.utils import show_img, draw_bbox, draw_anchor # os.environ['CUDA_VISIBLE_DEVICES'] = str('2') model_path = 'output/ctpn_1_gpu1111/best_loss0.000151.pth' image_id = 1 if image_path == '': image_path = '/home/dong/Downloads/receipt/blog/CEIR/result/step2/image/test.jpg' if label_path == '': label_path = '/home/dong/Downloads/receipt/blog/CEIR/result/step2/label/test.txt' # 初始化网络 net = CTPN_Model(pretrained=False) model = Pytorch_model(model_path, net=net, gpu_id=None) boxes_list, t = model.predict(image_path) with open(label_path, "w") as f: for index, point in enumerate(boxes_list): box = point[0].astype(int) box[0][0] = max(box[0][0] - 10, 0) box[3][0] = max(box[3][0] - 10, 0) print(box) line = ",".join(str(k) for lst in box for k in lst) boxes_list[index][0] = box.astype(int) print(line) line += "\r\n" f.writelines(line) image = draw_bbox(image_path, boxes_list, color=(0, 0, 255), mode=1) cv2.imwrite('result.jpg', image) print('Finished.') return image_path, label_path
def run_detection_from_webcam_stream(self, save_faces=True, outdir='./', profiling=False, plot_landmarks=False): # initialize the video stream and allow the camera sensor to warm up print("[INFO] starting video stream...") v_cap = VideoStream().start() profiler = FPS().start() time.sleep(2.0) while True: # grab the frame from the threaded video stream and resize it # to have a maximum width of 600 pixels frame = v_cap.read() frame = imutils.resize(frame, width=600) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # grab the frame dimensions (self.frame_height, self.frame_width) = frame.shape[:2] faces, probs, bounding_boxes, landmarks = self.fast_mtcnn(frame, save_faces=save_faces, id=self.uuid, outdir=outdir, return_all=self.return_all) # color conversion for OpenCV frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) profiler.update(1) # draw the bounding boxes around the faces try: frame = utils.draw_bbox(bounding_boxes, frame, probs[0]) if plot_landmarks: frame = utils.plot_landmarks(landmarks, frame) except: pass # if the `q` key was pressed, break from the loop # show the output frame cv2.imshow("Output", frame) key = cv2.waitKey(1) & 0xFF if key == ord("q"): break # do a bit of cleanup profiler.stop() print("[INFO] cleaning up...") cv2.destroyAllWindows() v_cap.stop() if profiling: print(f"Average FPS: {profiler.fps(), profiler._numFrames}")
def predict(): time_str = time.strftime('%Y-%m-%dT%H:%M:%S', time.localtime(time.time())) if flask.request.method == 'POST': start = time.time() received_file = request.files['input_image'] imageFileName = received_file.filename if received_file: # 保存接收的图片到指定文件夹 received_dirPath = 'static/images' if not os.path.isdir(received_dirPath): os.makedirs(received_dirPath) imageFilePath = os.path.join(received_dirPath, time_str + '_' + imageFileName) received_file.save(imageFilePath) print('receive image and save: %s' % imageFilePath) usedTime = time.time() - start print('receive image and save cost time: %f' % usedTime) preds, boxes_list, score_list, det_time = det_model.predict(imageFilePath, is_output_polygon=False, short_size=args.det_short_size) img = cv2.imread(imageFilePath) draw_img = draw_bbox(img, boxes_list) drawed_imageFileName = time_str + '_draw_' + os.path.splitext(imageFileName)[0] + '.jpg' drawed_imageFilePath = os.path.join('static', drawed_imageFileName) result = [] for i, box in enumerate(boxes_list): rec_img = CropWordBox.crop_image_by_bbox(img, box, args.rec_crop_ratio) text, prob, t = rec_model.predict(rec_img) prob = round(prob, 3) draw_img = cv2ImgAddText(draw_img, text, (box[0][0], box[0][1] - 40), textColor=(255, 255, 0), textSize=40) draw_img = cv2ImgAddText(draw_img, f'{prob:.3f}', (box[3][0], box[3][1] + 5), textColor=(255, 255, 0), textSize=40) result.append(text) print(f'draw image save: {drawed_imageFilePath}') cv2.imwrite(drawed_imageFilePath, draw_img) image_source_url = url_for('static', filename=drawed_imageFileName) return jsonify(src=image_source_url, count=f'{result}')
# model_path = 'output/psenet_icd2015_new_loss/final.pth' image_root = 'Test Set/Image' annotation_root = 'Test Set/Annotation' Images = os.listdir(image_root) Labels = os.listdir(annotation_root) image_id = Images[np.random.randint(1, len(Images) + 1)].split('.jpg')[0] img_path = image_root + os.sep + '{}.jpg'.format(image_id) label_path = annotation_root + os.sep + '{}.txt'.format(image_id) print('Predicting for image', img_path) label = _get_annotation(label_path) # 初始化网络 net = PSENet(backbone='resnet18', pretrained=False, result_num=config.n) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = Pytorch_model(model_path, net=net, scale=1, device=device) # for i in range(100): # models.predict(img_path) preds, boxes_list, t = model.predict(img_path) print(boxes_list) show_img(preds) img = draw_bbox(img_path, boxes_list, color=(0, 0, 255)) cv2.imwrite('result.jpg', img) # img = draw_bbox(img, label,color=(0,0,255)) show_img(img, color=True) plt.show()
if __name__ == '__main__': import pathlib from tqdm import tqdm import matplotlib.pyplot as plt from utils.utils import show_img, draw_bbox, save_result, get_file_list args = init_args() print(args) # 初始化网络 model = DetModel(args.model_path, post_p_thre=args.thre, gpu_id=0) img_folder = pathlib.Path(args.input_folder) for img_path in tqdm(get_file_list(args.input_folder, p_postfix=['.jpg'])): preds, boxes_list, score_list, t = model.predict( img_path, is_output_polygon=args.polygon) img = draw_bbox(cv2.imread(img_path)[:, :, ::-1], boxes_list) if args.show: show_img(preds) show_img(img, title=os.path.basename(img_path)) plt.show() # 保存结果到路径 os.makedirs(args.output_folder, exist_ok=True) img_path = pathlib.Path(img_path) output_path = os.path.join(args.output_folder, img_path.stem + '_result.jpg') pred_path = os.path.join(args.output_folder, img_path.stem + '_pred.jpg') cv2.imwrite(output_path, img[:, :, ::-1]) cv2.imwrite(pred_path, preds * 255) save_result(output_path.replace('_result.jpg', '.txt'), boxes_list, score_list, args.polygon)
print('load label failed on {}'.format(label_path)) return np.array(boxes, dtype=np.float32) if __name__ == '__main__': from configs import config_tips as config from models import PSENet from utils.utils import show_img, draw_bbox #model_path = 'outp ut/psenet_icd2015_resnet152_author_crop_adam_warm_up_myloss/best_r0.714011_p0.708214_f10.711100.pth' #model_path = 'output/psenet_icd2015_new_loss/final.pth' model_path = 'checkpoint/pse_epoch_30.pth' #image_files = Path("/home/peizhao/data/icdar/2019/tips/test/img").rglob('*.jpg') #image_files = Path("/home/peizhao/data/temp/doc").rglob("*.jpg") image_files = Path("/home/peizhao/data/temp").rglob("*.png") # image_files = Path("/home/peizhao/data/temp").rglob("*.jpg") #image_files = Path("/home/peizhao/data/temp/test").rglob("*.jpg") # 初始化网络 #net = PSENet(backbone='resnet152', pretrained=False, result_num=config.n) net = PSENet(backbone='resnet50', pretrained=False, result_num=3) model = Pytorch_model2(model_path, net=net, scale=1, gpu_id=0) for item in image_files: preds, boxes_list, t = model.predict(str(item)) img = draw_bbox(str(item), boxes_list, color=(0, 0, 255)) h, w = img.shape[:2] scale = 640 / max(h, w) img_size = cv2.resize(img, None, fx=scale, fy=scale) cv2.imshow("result", img_size) cv2.waitKey(0)
x = tf.Session().run( yolo_boxes_and_scores(features, anchors[anchor_mask[0]], nb_classes, model_image_size, org_image_shape)) boxes = np.concatenate( [x[0], np.reshape(x[2][0], (n_shape[1] * n_shape[1] * 3, 1)), x[1]], axis=1) all_boxes.extend(boxes) boxes_, scores_, classes_ = postprocess_boxes_tf(all_boxes, score_threshold=.3) image = draw_boxes_tf(boxes_, scores_, classes_, classes, org_image) image.show() ######################################################################################################### bboxes = postprocess_boxes(all_boxes, org_image, model_image_size[0], 0.3) bboxes = nms(bboxes, 0.45, method='nms') image = draw_bbox(org_image, bboxes, classes) image = fromarray(image) image.show() ######################################################################################################### pred_bbox = np.concatenate([ np.reshape(predictions[0], (-1, 5 + nb_classes)), np.reshape(predictions[0], (-1, 5 + nb_classes)), np.reshape(predictions[0], (-1, 5 + nb_classes)) ], axis=0) bboxes = postprocess_boxes(pred_bbox, org_image, model_image_size[0], 0.3) bboxes = nms(bboxes, 0.45, method='nms') image = draw_bbox(org_image, bboxes, classes) image = fromarray(image)
# grab the frame dimensions and convert it to a blob (frame_height, frame_width) = frame.shape[:2] out = cv2.VideoWriter( '/Users/igkinis/Desktop/projects/faceBIO/data/output5.avi', fourcc, 20.0, (frame_width, frame_height)) faces, probs, bounding_boxes = fast_mtcnn( pil_image, save_faces=True, id="test_video", outdir='/Users/igkinis/Desktop/projects/faceBIO/data', return_prob=True) # color conversion for OpenCV frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # draw the bounding boxes around the faces try: frame = utils.draw_bbox(bounding_boxes, frame, probs[0]) except: pass # if the `q` key was pressed, break from the loop # show the output frame cv2.imshow("Output", frame) output = frame out.write(output) key = cv2.waitKey(1) & 0xFF if key == ord("q"): break # do a bit of cleanup print("[INFO] cleaning up...") cv2.destroyAllWindows() v_cap.stop()
if __name__ == '__main__': import config from model import PSENet import matplotlib.pyplot as plt from utils.utils import show_img, draw_bbox os.environ['CUDA_VISIBLE_DEVICES'] = str('2') model_path = 'output/psenet_icd2015_resnet152/best_r0.642754_p0.614924_f10.628531.pth' # model_path = 'output/psenet_icd2015_new_loss/final.pth' # img_path = '/data2/dataset/ICD15/img/img_1.jpg' img_path = '0.jpg' label_path = '/data2/dataset/ICD15/test/gt/gt_img_130.txt' # label = _get_annotation(label_path) # 初始化网络 net = PSENet(backbone='resnet152', pretrained=False, result_num=config.n) model = Pytorch_model(model_path, net=net, scale=1, gpu_id=0) # for i in range(100): # model.predict(img_path) preds, boxes_list = model.predict(img_path) show_img(preds) img = draw_bbox(img_path, boxes_list) # img = draw_bbox(img, label,color=(0,0,255)) show_img(img, color=True) plt.show()