def l_trim(cls, haystack, left=''): if is_empty(haystack): return haystack elif is_empty(left): return haystack.lstrip() else: return haystack.lstrip(left)
def get_asr_progress(self): """ 查询音频转写处理进度 :return: """ url = getattr(self, 'asr_progress_url', None) data = dict(app_id=self.app_id, signa=self.signa, ts=self.ts, task_id=self.task_id) res = requests.post(url=url, data=data) res.encoding = encodes.Unicode.UTF_8.value if res is None or res.status_code != codes.ok: return AsrInfo( ok=codes.failed, err_no=res.status_code if res is not None else codes.bad, failed='查询音频转写处理进度请求失败') progress_info, errors = AsrInfo.AsrInfoSchema().load(res.json()) Assert.is_true(is_empty(errors), errors) if progress_info.ok == codes.success: progress_details, errors = AsrProgress.AsrProgressSchema().load( json.loads(progress_info.data)) Assert.is_true(is_empty(errors), errors) progress_info.data = progress_details return progress_info
def lexer(self, text, ne_list=[], url=None): """ 词法分析 :param url: :param text: :param ne_list: 过滤命名实体类型 :return: """ url = url if url else getattr(self, 'lexer_url', None) url = url.format(access_token=self.token) res = requests.post(url=url, data=json.dumps(obj={'text': text}), headers=ContentType.JSON_UTF8.value) res.encoding = encodes.Unicode.UTF_8.value if res is None or res.status_code != codes.ok: raise MyError(code=codes.failed, msg='百度词法分析请求失败.') lexer_res, errors = LexerRes.LexerResSchema().load( res.json()) # type: LexerRes Assert.is_true(is_empty(errors), errors) if is_not_empty(ne_list): for lexer_item in lexer_res.items[:]: if lexer_item.ne not in ne_list: # 根据实体列表过滤 lexer_res.items.remove(lexer_item) return lexer_res
def probe(cls, file_path, is_ffprobe=True): """ 解析音频文件参数 :param is_ffprobe: 是否使用 ffprobe 解析文件格式 :param file_path: :return: :rtype: Audio """ Assert.is_true(os.path.isfile(file_path), '文件不存在:{0}'.format(file_path)) _, file_name, format_name = FileUtil.get_path_name_ext(file_path) probe_json = ffmpeg.probe(file_path) audio_json = probe_json.get('streams')[0] audio_json.update(probe_json.get('format')) audio_json.update({ 'file_name': '{name}.{ext}'.format(name=file_name, ext=format_name) }) audio, errors = AudioSchema().load(audio_json) Assert.is_true(is_empty(errors), errors) if not is_ffprobe: audio.format_name = format_name return audio
def init_object_dict(): """ 从配置文件初始化字典 :return: """ object_dict = {} file_paths = FileUtil.get_files_by_suffix(base_static, [ 'cfg', ]) if is_empty(file_paths): return object_dict for file_path in file_paths: object_dict.update(ConfigUtils(file_path).get_config_dict()) return object_dict
def _asr_prepare(self, **kwargs): """ 音频转写预处理 :return: """ url = getattr(self, 'asr_prepare_url', None) kwargs.update(dict(app_id=self.app_id, signa=self.signa, ts=self.ts)) res = requests.post(url=url, data=kwargs) res.encoding = encodes.Unicode.UTF_8.value if res is None or res.status_code != codes.ok: return AsrInfo( ok=codes.failed, err_no=res.status_code if res is not None else codes.bad, failed='音频转写预处理请求失败') prepare_info, errors = AsrInfo.AsrInfoSchema().load(res.json()) Assert.is_true(is_empty(errors), errors) return prepare_info
def get_asr_result(self): """ 查询音频转写结果 :return: """ url = getattr(self, 'asr_result_url', None) data = dict(app_id=self.app_id, signa=self.signa, ts=self.ts, task_id=self.task_id) res = requests.post(url=url, data=data) res.encoding = encodes.Unicode.UTF_8.value if res is None or res.status_code != codes.ok: return AsrInfo( ok=codes.failed, err_no=res.status_code if res is not None else codes.bad, failed='查询音频转写结果请求失败') result_info, errors = AsrInfo.AsrInfoSchema().load(res.json()) Assert.is_true(is_empty(errors), errors) return result_info
def hump_to_pep8(cls, s): """ 驼峰命名转 pep8 小写和大写紧挨一起的地方,加上分隔符,然后全部转小写 :param s: :return: """ if is_empty(s): return s res = s[0] for i in range(1, len(s)): # s[i] 直接copy 或 先加'_'再copy if s[i].isupper() and not s[i - 1].isupper(): # 加'_',当前为大写,前一个字母为小写 res += UNDER_LINE res += s[i] elif s[i].isupper() and s[i - 1].isupper() and s[i + 1].islower(): # 加'_',当前为大写,前一个字母为小写 res += UNDER_LINE res += s[i] else: res += s[i] return res.lower()
def _asr_upload(self, upload_file_path): """ 音频转写文件分片上传 :param upload_file_path: 待上传音频文件路径 :return: """ url = getattr(self, 'asr_upload_url', None) sig = SliceIdGenerator() with open(upload_file_path, 'rb') as f: while True: content = f.read(self.__FILE_PIECE_SIZE) if not content or len(content) == 0: break data = dict(app_id=self.app_id, signa=self.signa, ts=self.ts, task_id=self.task_id, slice_id=sig.get_next_slice_id()) res = requests.post(url=url, data=data, files={'content': content}) res.encoding = encodes.Unicode.UTF_8.value if res is None or res.status_code != codes.ok: return AsrInfo(ok=codes.failed, err_no=res.status_code if res is not None else codes.bad, failed='音频转写文件分片上传请求失败') upload_info, errors = AsrInfo.AsrInfoSchema().load(res.json()) Assert.is_true(is_empty(errors), errors) # 上传分片失败 Assert.is_true(upload_info.ok == codes.success, upload_info.err_no, '分片上传失败:{0}'.format(upload_info.failed))