示例#1
0
文件: app.py 项目: lijiarui/smp-nlu
def load_models(model_dir='./tmp/nlu_model'):
    """加载模型"""
    config_path = os.path.join(model_dir, 'config.json')

    if not os.path.exists(config_path):
        LOG.error('config_path not exsits "%s"', config_path)
        exit(1)
    pipeline_config = json.load(open(config_path))
    models = []
    for model_name in pipeline_config:
        model = pickle.load(
            open(os.path.join(
                model_dir,
                '{}.pkl'.format(model_name)), 'rb'))
        models.append((model_name, model))
    return models
示例#2
0
def build_model(nlu_data, model_dir, pipline_config):
    """构建模型"""
    models = []

    LOG.info('start build')

    intents, entities = load_nlu_data(nlu_data)
    iob = [None, None, None]

    def _get_iob(iob):
        """load iob only once"""
        if iob[0] is None:
            LOG.info('build IOB data')
            (sentence_result, slot_result,
             domain_result) = data_to_iob(intents, entities)
            iob = sentence_result, slot_result, domain_result
        return iob

    for item in pipline_config:
        LOG.info('train "%s"', item)

        if item == 'regex_engine':
            reng = RegexEngine(intents, entities)
            models.append(('regex_engine', reng))

        elif item == 'ml_intent_classifier':
            ml_intent = MLIntentClassifier()
            iob = _get_iob(iob)
            sentence_result, _, domain_result = iob
            ml_intent.fit(sentence_result, domain_result)
            models.append(('ml_intent_classifier', ml_intent))

        elif item == 'dl_intent_classifier':
            dl_intent = DLIntentClassifier()
            iob = _get_iob(iob)
            sentence_result, _, domain_result = iob
            dl_intent.fit(sentence_result, domain_result)
            models.append(('dl_intent_classifier', dl_intent))

        elif item == 'crf_slot_filler':
            crf_slot = CRFSlotFiller()
            iob = _get_iob(iob)
            sentence_result, slot_result, _ = iob
            crf_slot.fit(sentence_result, slot_result)
            models.append(('crf_slot_filler', crf_slot))

        elif item == 'neural_slot_filler':
            crf_slot = NeuralSlotFiller()
            iob = _get_iob(iob)
            sentence_result, slot_result, _ = iob
            crf_slot.fit(sentence_result, slot_result)
            models.append(('neural_slot_filler', crf_slot))

        elif item == 'neural_intent_classifier_slot_filler':
            nicsf = NeuralIntentClassifierSlotFiller()
            iob = _get_iob(iob)
            sentence_result, slot_result, domain_result = iob
            y_data = list(zip(slot_result, domain_result))
            nicsf.fit(sentence_result, y_data)
            models.append(('neural_intent_classifier_slot_filler', nicsf))

        else:
            LOG.error('invalid engine "%s"', item)
            raise Exception('invalid engine "%s"' % item)

    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    config_path = os.path.join(model_dir, 'config.json')
    with open(config_path, 'w') as fp:
        json.dump(pipline_config, fp, indent=4, ensure_ascii=False)

    for model_name, model in models:
        model_path = os.path.join(model_dir, '{}.pkl'.format(model_name))
        with open(model_path, 'wb') as fp:
            pickle.dump(model, fp)

    LOG.info('train and saved')