Пример #1
0
def start_extract(opontions, prerequisite, requirement, model_dir):
    try:
        items1 = extract_approval_opinion(opontions,
                                          model_dir) if opontions else {}
        print('items1items1', items1)

    except Exception as e:
        logger.error('抽取审批意见报错')
        logger.error(e)
        items1 = {}

    try:
        items2 = extract_preconditions(prerequisite,
                                       model_dir) if prerequisite else {}
        print('items2items2', items2)

    except Exception as e:
        logger.error('抽取前提条件报错')
        logger.error(str(e) + traceback.format_exc())
        items2 = {}

    try:
        items3 = extract_management_condition(
            requirement, model_dir) if requirement else {}
        print('items3items3', items3)

    except Exception as e:
        logger.error('抽取管理要求报错')
        logger.error(e)
        items3 = {}
    logger.info('start_extract::{}'.format(return_items))
    return return_items
Пример #2
0
    def __init__(self,
                 output_dir,
                 tmp_dir,
                 upload_dir,
                 port,
                 model_links_dir='model',
                 config_links_dir='config'):
        # if not os.path.exists(output_dir):
        #     raise ValueError('参数output_dir目录并不存在'.encode('utf-8'))
        # if not os.path.exists(tmp_dir):
        #     raise ValueError('参数tmp_dir目录并不存在'.encode('utf-8'))
        # if not os.path.exists(upload_dir):
        #     raise ValueError('参数upload_dir目录并不存在'.encode('utf-8'))
        # if not isinstance(port, int) or port <= 0 or port > 65535:
        #     raise ValueError('参数port必须合法(0~65535)')

        self._output_dir = output_dir
        self._tmp_dir = tmp_dir
        self._upload_dir = upload_dir
        self._port = port
        self._model_links_dir = model_links_dir
        self._config_links_dir = config_links_dir

        u_shape_framework.set_logger(logger)
        logger.info('initializing u_shape_framework engine ...')
        initialize_engine(u_shape_framework_conf)
        logger.info('engine initialize finish')
Пример #3
0
 def predict(self, content, field_config):
     logger.info('start to run online workflow')
     workflow = get_current_engine().get_workflow('otonline')
     request_property = {'content': content, 'field_config': field_config}
     output = workflow.run(request_property)
     result = output['results']
     return result
Пример #4
0
 def predict(self, doctype, content, rich_content, fields=()):
     workflow_name = 'online'
     workflow = get_current_engine().get_workflow('online')
     logger.info('start to run {} workflow'.format(workflow_name))
     request_property = {
         # 'models_manager': self.models_manager,
         'doctype': doctype,
         'rich_content': rich_content,
         'content': content,
     }
     if fields:
         request_property['fields'] = fields
     output = workflow.run(request_property)
     result = {
         'result': '',
     }
     return result
Пример #5
0
def cut_prerequisite(prerequisite, type):
    CNYD_REG = re.compile(r'[^0-9::]+[::]')
    return_items = {'result': [], 'type': type}

    if not prerequisite:
        return return_items

    text, mapper = prerequisite[0], prerequisite[1]
    logger.info('前提条件文本内容:{}'.format(text))
    reg_1 = re.compile(r'[  1一、.()\s]+')
    result_reg_1 = reg_1.match(text)
    split_sign = result_reg_1.group() if result_reg_1 else 'xxx'
    split_sign = re.sub(r'[\s  ]', r'', split_sign)

    if split_sign not in SPLIT_SIGN_DICT:
        cnyd_result = CNYD_REG.match(text)
        if not cnyd_result:
            return_items['result'].append(
                cut_prerequisite_nosign(text, mapper, type))

        else:
            if re.search(r'承诺|约定|访谈|书面记录|(协议|合同).*[增添]加', cnyd_result.group()):
                return_items['type'] = r'承诺'
            return_items['result'].append({
                'result': [[cnyd_result.group(), mapper[cnyd_result.start()]]],
                'type':
                return_items['type']
            })
            start, end = cnyd_result.span()
            return_items['result'].append(
                cut_prerequisite([text[end:], mapper[end:]],
                                 return_items['type']))

    else:
        split_sign_reg = SPLIT_SIGN_DICT[split_sign]
        split_sign_len = len(split_sign)
        splited_ = re.split(split_sign_reg, text)
        index = 0
        for idx, each in enumerate(splited_[:]):
            if re.sub(r'\s', r'', each):
                return_items['result'].append(
                    cut_prerequisite([each, mapper[index:index + len(each)]],
                                     return_items['type']))
            index += len(each) + split_sign_len
    return return_items
Пример #6
0
    def start(self):

        # models_manager = ClassifyModelsManager(self._output_dir, ClassifyPredictor, logger, self._model_links_dir,
        #                                        self._config_links_dir)
        logger.info('start server...')
        app = tornado.web.Application(handlers=[
            (
                conf.PREDICT_ROUTER,
                PredictHandler,
                #      {
                #     # 'models_manager': models_manager,
                #     'tmp_dir': self._tmp_dir,
                #     'upload_dir': self._upload_dir
                # }
            ),
            (
                conf.PREDICT_PATH_ROUTER,
                PredictPathHandler,
                {
                    # 'models_manager': models_manager,
                    'tmp_dir': self._tmp_dir,
                    'upload_dir': self._upload_dir
                }),
            (
                conf.RELOAD_ROUTER,
                ReloadHandler,
                {
                    # 'models_manager': models_manager
                }),
            (
                conf.PREDICT_BY_FIELDS,
                PredictByFieldsHandler,
                {
                    # 'models_manager': models_manager,
                    'tmp_dir': self._tmp_dir,
                    'upload_dir': self._upload_dir
                })
        ])
        app.listen(address='0.0.0.0', port=self._port)
        logger.info('server starts with address: 0.0.0.0, port: {}'.format(
            self._port))
        tornado.ioloop.IOLoop.current().start()
Пример #7
0
def extract_management_condition(sent_map, model_dir):
    text, all_mapper = sent_map[0], sent_map[1]
    each_index = 0
    re_compile_mid = []
    each_index = 0
    each_sent_list_juhao = re.split(r'[;; 。]', text)
    for each_sent in each_sent_list_juhao:
        if not each_sent.replace(r' ', ''):
            each_index += len(each_sent) + 1
            continue
        field_id, prob = extract(each_sent, model_dir)

        if field_id not in return_items:
            return_items[field_id] = []
        return_items.get(field_id).append(
            [each_sent, prob, all_mapper[each_index]])

        each_index += len(each_sent) + 1
    logger.info('extract_management_condition::{}'.format(return_items))
    return return_items
Пример #8
0
def extract_preconditions(sents_map, model_dir):
    cuted_sent_dict = cut_prerequisite(sents_map, r'非承诺')  #
    # return_items = {u'high': [], u'mid': [], u'low': []}
    # return_items = {}
    tk_list = solve(cuted_sent_dict)

    # re_compile_list_high = PRECONDITIONS_HIGH
    # re_compile_list_MID = PRECONDITIONS_MID

    for each_tk in tk_list:
        tk_content = each_tk[0]
        tk_start_idx = each_tk[1]
        tk_type = each_tk[2]
        print('tk_content......................', tk_content)
        field_id, prob = extract(tk_content, model_dir)
        if field_id not in return_items:
            return_items[field_id] = []
        return_items.get(field_id).append([tk_content, prob, tk_start_idx])

    logger.info('extract_preconditions::{}'.format(return_items))
    return return_items
Пример #9
0
 def post(self):
     result = {'status': 'OK', 'msg': ''}
     init_time = time.time()
     try:
         data = json.loads(self.request.body)
         caller_request_id = data.get('caller_request_id', None)
         self_request_id = generate_request_id()
         logger.update_logger_extra({'caller_request_id': caller_request_id, 'self_request_id': self_request_id})
         doctype, content, rich_content_path = str(data['doctype']), data['content'], data['rich_content_path']
         logger.info('received data keys: {}, doctype: {}, content: {} ......'.format(
             list(data.keys()), doctype, content[:100]))
         with codecs.open('{}/{}'.format(self.upload_dir, rich_content_path)) as f:
             rich_content = json.loads(f.read())
         result.update(self.predict(doctype, content, rich_content))
     except Exception as e:
         result['status'] = 'ERROR'
         result['msg'] = '{}'.format(e)
         logger.exception(e)
     result_str = json.dumps(result, ensure_ascii=False)
     self.write(result_str)
     logger.info('results: {}, cost time: {}s'.format(result_str, time.time() - init_time))
     logger.update_logger_extra()
Пример #10
0
 def post(self):
     result = {'status': 'OK', 'msg': ''}
     init_time = time.time()
     try:
         data = json.loads(self.request.body)
         caller_request_id = data.get('caller_request_id', None)
         self_request_id = generate_request_id()
         logger.update_logger_extra({'caller_request_id': caller_request_id, 'self_request_id': self_request_id})
         doctype, content, rich_content, fields = str(data['doctype']), data['content'], \
                                                  data['rich_content'], data['fields']
         logger.info('received data keys: {}, doctype: {}, fields: {}, content: {} ......'.format(
             list(data.keys()), doctype, fields, content[:100]))
         if not isinstance(fields, list):
             raise ValueError('args: fields must be list')
         result.update(self.predict(doctype, content, rich_content, fields))
     except Exception as e:
         result['status'] = 'ERROR'
         result['msg'] = '{}'.format(e)
         logger.exception(e)
     result_str = json.dumps(result, ensure_ascii=False)
     self.write(result_str)
     logger.info('results: {}, cost time: {}s'.format(result_str, time.time() - init_time))
     logger.update_logger_extra()
Пример #11
0
 def post(self):
     try:
         # get argument
         model_version = self.get_argument('model_version')
         caller_request_id = self.get_argument('caller_request_id', default=None)
         self_request_id = generate_request_id()
         logger.update_logger_extra({'caller_request_id': caller_request_id, 'self_request_id': self_request_id})
         logger.info('model_version: {}'.format(model_version))
         # 更改model软链
         logger.info('links updating ...')
         # self.models_manager.update_links(model_version)
         # reload models
         logger.info('reloading ...')
         # self.models_manager.reload_models()
         logger.info('reloaded')
     except Exception as e:
         logger.exception(e)
         raise Exception(e)
     finally:
         logger.update_logger_extra()
Пример #12
0
 def setUp(self):
     # init u_shape_engine
     u_shape_framework.set_logger(logger)
     logger.info('initializing u_shape_framework engine ...')
     pass
Пример #13
0
 def _get_features(self, doctype, field, content):
     logger.info('get features for doctype: {}, field: {}'.format(
         doctype, field))
     return content