def ocr_result_visualization(_image_info, _box_info_list, _text_list): """ 将检测的结果和识别的结果合并到一张图中 Args: _image_info: 图片信息 _box_info_list: 所有box的列表 _text_list: 跟box顺序一致的识别结果 Returns: 合并后的图片的oss的路径 """ to_return_result = {'bucket_name': '', 'path': ''} oss_handler = get_oss_handler() img = oss_handler.download_image_file(_image_info['bucket_name'], _image_info['path']) result_image = annotate_detect_rotated_bbox_and_text_result( img, _box_info_list, _text_list, (0, 0, 255), 3) date_string = get_date_string() uuid_name = get_uuid_name() image_path = os.path.join(date_string, uuid_name) final_image_path = oss_handler.upload_image_file('result', image_path, result_image, True, 50) to_return_result['bucket_name'] = 'result' to_return_result['path'] = final_image_path return to_return_result
def face_parsing(_image_info, _face_box_info, _face_landmark_info): """ 人脸语义分区 Args: _image_info: 待识别的完整图像 _face_box_info: 人脸所在区域 _face_landmark_info: 人脸landmark坐标信息 Returns: 人脸不同区域的mask的key """ to_return_result = { 'parsing_info': { 'bucket_name': '', 'path': '' }, } oss_handler = get_oss_handler() img = oss_handler.download_image_file(_image_info['bucket_name'], _image_info['path']) cropped_image = get_rotated_box_roi_from_image(img, _face_box_info, _scale_ratio=1.5) face_parsing_result = face_parsing_handler.execute(cropped_image, _face_landmark_info) parsing_result = face_parsing_result['semantic_segmentation'] date_string = get_date_string() name_string = get_uuid_name() target_path = os.path.join(date_string, name_string) target_path = oss_handler.upload_numpy_array('intermediate', target_path, parsing_result) to_return_result['parsing_info']['bucket_name'] = 'intermediate' to_return_result['parsing_info']['path'] = target_path return to_return_result
def execute( self, _video_url, _extract_mode, _interval_count, _oss_helper: CloudObjectStorage, _target_bucket=None, ): """ 进行视频关键帧提取,并存在在oss中 Args: _video_url: 视频地址 _extract_mode: 提取模式 _interval_count: 提取间隔 _oss_helper: oss _target_bucket: 目标bucket name Returns: 每个关键帧的bucket name和path """ cap = cv2.VideoCapture(_video_url) video_fps = cap.get(cv2.CAP_PROP_FPS) if _extract_mode == 'n_seconds': interval_frames = int(video_fps * _interval_count) elif _extract_mode == 'n_frames': interval_frames = _interval_count else: raise VideoExtractMethodNotSupportException( f'{_extract_mode} not support now') video_frame_position = 0 date_string = get_date_string() uuid_name = get_uuid_name() all_tasks = [] with ThreadPoolExecutor() as executor: while True: grabbed, m_frame = cap.read() if grabbed: if _oss_helper and _target_bucket: m_frame_target_bucket = _target_bucket m_frame_target_path = os.path.join( date_string, uuid_name, f'{video_frame_position}') all_tasks.append( executor.submit(_oss_helper.upload_image_file, m_frame_target_bucket, m_frame_target_path, m_frame, False)) video_frame_position += interval_frames cap.set(cv2.CAP_PROP_POS_FRAMES, video_frame_position) else: break cap.release() # 严格保证按顺序返回任务 for m_task in all_tasks: if m_task.done(): # 这里原本设计是可以返回frame信息的,但考虑到整体架构,这样反而会降低程序的并行度, # 提升单节点的耗时,所以将frame的返回直接去掉了。 yield _target_bucket, m_task.result()
def execute( self, _to_download_url, _oss_helper: CloudObjectStorage = None, _image_size_threshold=10, ): """ 下载指定url的图像文件 Args: _to_download_url: 待下载的图像url _oss_helper: oss helper用于存储下载好的数据,可以为空 _image_size_threshold: 图像字节数(KB)限制,如果低于阈值会异常 Returns: 下载完成的结果 """ to_return_result = OrderedDict() try: download_result_io = self.download_url(_to_download_url, 1024 * 2) request_image = Image.open(download_result_io) if request_image is None: raise ImageFormatNotSupportException( f'image:{_to_download_url} format not support,cannot decode by PILLOW') image_c = len(request_image.getbands()) image_h = request_image.height image_w = request_image.width request_image_np = convert_pil_to_numpy(request_image) if _image_size_threshold is not None and request_image_np.nbytes < _image_size_threshold * 1024: raise ImageFileSizeAbnormalException( f'image:{_to_download_url} is small than threshold,it may not be a normal picture') # 有些情况是不需要存储的,可能直接就用了。 if _oss_helper: file_name = get_uuid_name() oss_path = os.path.join(get_date_string(), file_name) # 存储原始图像 saved_path = _oss_helper.upload_image_file(self.bucket_name, oss_path, request_image, _enable_compress=False) else: saved_path = '' to_return_result['bucket_name'] = self.bucket_name to_return_result['saved_path'] = saved_path to_return_result['image_height'] = image_h to_return_result['image_width'] = image_w to_return_result['image_channel'] = image_c to_return_result['image'] = request_image_np return to_return_result except requests.exceptions.ConnectionError as connect_error: raise DownloadURLNotAvailableException(f'{_to_download_url} cannot reach') except TimeoutError as te: raise ImageDownloadTimeoutException(f'{_to_download_url} download timeout') except CustomException as ce: raise ce except Exception as e: raise ConsumerAlgorithmUncatchException(tb.format_exc())
def execute( self, _to_download_url, _oss_helper, _timeout=30, _image_size_threshold=10, ): to_return_result = OrderedDict() try: response = requests.get(_to_download_url, timeout=_timeout) data_stream = BytesIO(response.content) m_image_file_buffer = data_stream.read() request_image = cv2.imdecode( np.frombuffer(m_image_file_buffer, np.uint8), -1) if request_image is None: raise ImageFormatNotSupportException( f'image:{_to_download_url} format not support,cannot decode by opencv' ) if len(request_image.shape) == 3: image_h, image_w, image_c = request_image.shape else: image_h, image_w = request_image.shape[:2] image_c = 1 if _image_size_threshold is not None and request_image.nbytes < _image_size_threshold * 1024: raise ImageFileSizeAbnormalException( f'image:{_to_download_url} is small than threshold,it may not be a normal picture' ) file_name = get_uuid_name() oss_path = os.path.join(get_date_string(), file_name) # 存储原始图像 saved_path = _oss_helper.upload_image_file('downloaded-image', oss_path, request_image, _enable_compress=False) to_return_result['bucket_name'] = 'downloaded-image' to_return_result['saved_path'] = saved_path to_return_result['image_height'] = image_h to_return_result['image_width'] = image_w to_return_result['image_channel'] = image_c return to_return_result except requests.exceptions.Timeout as te: raise ImageDownloadTimeoutException( f'{_to_download_url} download timeout') except CustomException as ce: raise ce except Exception as e: raise ConsumerAlgorithmUncatchException(tb.format_exc())
def __init__(self, _is_test): self.logger = logging.getLogger(f'operator [{self.name}]') if _is_test: self.logger.setLevel(logging.DEBUG) else: self.logger.setLevel(logging.INFO) # 会在工作目录下面创建文件夹logs,用于装载日志 os.makedirs('logs', exist_ok=True) date_string = get_date_string() log_format = f'%(asctime)s : %(levelname)s : %(process)d: %(thread)x: {self.name}: line %(lineno)d: %(message)s' # 每天存储一个文件,存储31天,且文件名包含启动日期 log_file_handler = handlers.TimedRotatingFileHandler( f'logs/{self.name}_{date_string}启动.log', encoding='utf-8', when='D', interval=1, backupCount=31) log_file_handler.setFormatter(logging.Formatter(log_format)) self.logger.addHandler(log_file_handler) self.is_test = _is_test