Пример #1
0
def demo(model, config, id_to_tag, tag_to_id):
    logger = get_logger(config.log_file)
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with tf.Session(config=tf_config) as sess:
        ckpt = tf.train.get_checkpoint_state(config.ckpt_path)
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        saver = tf.train.Saver()
        if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
            logger.info("Reading model parameters from %s" %
                        ckpt.model_checkpoint_path)
            # saver = tf.train.import_meta_graph('ckpt/ner.ckpt.meta')
            # saver.restore(session, tf.train.latest_checkpoint("ckpt/"))
            saver.restore(sess, ckpt.model_checkpoint_path)
        while True:
            line = input("input sentence, please:")
            inputs = input_from_line(line, config.max_seq_len, tag_to_id)
            trans = model.trans.eval(sess)
            feed_dict = get_feed_dict(model, False, inputs, config)
            lengths, scores = sess.run([model.lengths, model.logits],
                                       feed_dict)
            batch_paths = decode(scores, lengths, trans, config)
            tags = [id_to_tag[idx] for idx in batch_paths[0]]
            result = bio_to_json(inputs[0], tags[1:-1])
            print(result['entities'])
Пример #2
0
def bert_ner_infer():
    params = json.loads(request.get_data(), encoding="utf-8")
    text = params["text"]
    url = params["url"]
    x, len_list = create_infer_inputs(text, max_len, tokenizer)
    print("len_list: ", len_list)
    input_ids = x[0].tolist()
    token_type_ids = x[1].tolist()
    attention_mask = x[2].tolist()
    data = json.dumps({
        "signature_name": "serving_default",
        "inputs": {
            "input_ids": input_ids,
            "token_type_ids": token_type_ids,
            "attention_mask": attention_mask
        }
    })
    headers = {"content-type": "application/json"}
    result = requests.post(url, data=data, headers=headers)
    result = json.loads(result.text)
    pred_logits = result["outputs"][0]
    pred = np.argmax(pred_logits, axis=1).tolist()
    print("pred: ", pred)
    predict_label = []
    for j in range(min(len_list[0], max_len)):
        predict_label.append(id2tag[pred[j]])
    return_result = bio_to_json(text, predict_label)
    return jsonify(return_result)
Пример #3
0
 def evaluate_line(self, sess, inputs, id_to_tag):
     trans = self.trans.eval(sess)
     lengths, scores = self.run_step(sess, False, inputs)
     batch_paths = self.decode(scores, lengths, trans)
     tags = [id_to_tag[idx] for idx in batch_paths[0]]
     return bio_to_json(inputs[0], tags[1:-1])
Пример #4
0
def bert_ner_model_infer(sentence, tag_to_id, id_to_tag, max_length,
                         docker_url):
    final_result = []
    return_result = {"code": 200, "message": "success"}
    # sentence 去掉空格
    sentence_ = sentence.replace('\r\n', '✈').replace(' ', '✈')
    sentence_ = sentence_.replace('\u3000', '✈')  # todo 新增处理
    sentence_ = sentence_.replace('\xa0', '✈')
    # 统计空格和韩文的信息
    null_index_list, korean_index_list, null_korean_dict, m1 = count_korean_null_info(
        sentence_)
    sentence_ = ''.join(sentence_.split())
    # sentence 去掉韩文
    sentence_ = re.sub('[\uac00-\ud7ff]+', '', sentence_)
    try:
        # for sentence in text:
        # _, segment_ids, word_ids, word_mask, label_ids
        token_result = input_from_line(sentence_, max_length,
                                       tag_to_id)  # FLAGS.max_seq_len
        word_ids = token_result[2].tolist()
        word_mask = token_result[3].tolist()
        seg_ids = token_result[1].tolist()
        data = json.dumps({
            "signature_name": "serving_default",
            "inputs": {
                "input_ids": word_ids,
                "input_mask": word_mask,
                "segment_ids": seg_ids,
                "dropout": 1.0
            }
        })

        headers = {"content-type": "application/json"}
        json_response = requests.post(docker_url, data=data, headers=headers)
        if json_response.status_code == 200:
            result = json.loads(json_response.text)
            if result == '':
                temp = {'content': sentence, "Entity": []}
                logger.info("实体识别最终结果:" + str(temp))
                return temp
            else:
                pred = result["outputs"][0]
                pred = np.array(pred)
                label_list = pred.argmax(axis=1).tolist()[1:-1]
                # 还原空格和韩语
                if len(null_korean_dict) > 0:
                    label_list = preprocessing_korean_null(
                        korean_index_list, null_korean_dict, m1, label_list,
                        tag_to_id)
                pred_label = []
                for i in range(min(len(sentence),
                                   max_length - 2)):  # FLAGS.max_seq_len
                    pred_label.append(id_to_tag[label_list[i]])
                pred_label = processing_general_format(pred_label)
                logger.info("模型预测的结果: {}".format(pred_label))
                # pred_label = pred_label[1:-1]
                res = bio_to_json(sentence, pred_label)

                if len(res['entities']) != 0:
                    for i in range(len(res['entities'])):
                        if res['entities'][i]["name"] != " ":
                            final_result.append(res['entities'][i])
                else:
                    final_result = []
                temp = {'content': sentence, "Entity": final_result}
                logger.info("实体识别抽取结果:" + str(temp))
                return temp

        else:
            temp = {'content': sentence, "Entity": []}
            logger.info("实体识别抽取最终结果:" + str(temp))
            return temp
    except Exception as e:
        logger.error(traceback.format_exc())
        return_result["code"] = 400
        return_result["message"] = traceback.format_exc()
        return_result['content'] = ''
        return_result["Entity"] = []
        return return_result
Пример #5
0
 def evaluate_line(self, sess, inputs, id_to_tag):
    lengths, batch_paths = self.run_step(sess, False, inputs)
    print(batch_paths)
    tags = [id_to_tag[idx] for idx in batch_paths[0][2:lengths[0]]]
    return bio_to_json(inputs[0], tags)