示例#1
0
def model_train_done_rnn(model_id, lag_dates, pcas, file_path_list,
                         sub_model_names, outputs_list, events_set, model_dir):
    """
    更新t_event_model_file、t_event_model_detail、t_event_model_tran表的数据。

    :param model_id: string. 模型编号
    :param file_path_list: array. 模型地址
    """
    db = DatabaseWrapper(dbname=event_dbname)
    try:
        db.begin_transaction()

        sql = "INSERT INTO t_event_model_file(file_id, file_url, model_id) VALUES (%s, %s, %s) "
        params = []
        for model_fp in file_path_list:
            param = (UuidHelper.guid(), model_fp, model_id)
            params.append(param)
        db.executemany(sql, params)

        # 子模型信息入库
        detail_ids = []
        sql = "INSERT INTO t_event_model_detail(detail_id, model_name, status, model_id, lag_date, pca, create_date, " \
              "create_time) values (%s, %s, %s, %s, %s, %s, %s, %s)"
        params = []
        for sub_model_name, lag_date, pca in zip(sub_model_names, lag_dates,
                                                 pcas):
            detail_id = UuidHelper.guid()
            detail_ids.append(detail_id)
            params.append(
                (detail_id, sub_model_name, DataStatus.SUCCESS.value, model_id,
                 int(lag_date), int(pca), sys_date(sys_date_formatter),
                 sys_time(sys_time_formatter)))
        db.executemany(sql, params)

        # 分事件模型信息入库
        sql = "INSERT INTO t_event_model_tran(tran_id, event_name, num, detail_id, status, create_date, create_time) " \
              "values (%s, %s, %s, %s, %s, %s, %s)"
        params = []
        for detail_id, outputs in zip(detail_ids, outputs_list):
            events_num = pp.get_event_num(outputs, events_set)
            for e in events_set:
                tran_id = UuidHelper.guid()
                event_num = events_num[e]
                param = (tran_id, e, event_num,
                         detail_id, DataStatus.SUCCESS.value,
                         sys_date(sys_date_formatter),
                         sys_time(sys_time_formatter))
                params.append(param)
        db.executemany(sql, params)

        sql = "UPDATE t_event_model SET model_dir = %s WHERE model_id = %s"
        db.execute(sql, (model_dir, model_id))

        db.commit()
        return detail_ids
    except Exception as e:
        db.rollback()
        raise RuntimeError(e)
    finally:
        db.close()
 def evaluate(self):
     inputs_test, outputs_test = \
         preprocess.gen_samples_by_pred_date(self.data,
                                             self.events_p_oh,
                                             self.input_len,
                                             self.output_len,
                                             self.dates,
                                             self.eval_start_date,
                                             self.eval_end_date)
     event_col = self.events_set.index(self.eval_event)
     outputs_test = outputs_test[:, :, event_col]
     outputs_test = np.reshape(outputs_test, [*outputs_test.shape, 1])
     events_num = preprocess.get_event_num(outputs_test, [self.eval_event])
     preds = predict_cnn.predict_samples(self.model, inputs_test)
     evals_summary, evals_separate, eval_events = \
         model_evalution.evaluate_sub_model(
             preds,
             outputs_test,
             [self.eval_event],
             events_num)
     return evals_summary[0]
    def evaluate(self):
        inputs_test, outputs_test = \
            preprocess.gen_samples_by_pred_date(self.data,
                                                self.events_p_oh,
                                                self.input_len,
                                                self.output_len,
                                                self.dates,
                                                self.eval_start_date,
                                                self.eval_end_date)

        events_num = preprocess.get_event_num(outputs_test, self.events_set)
        preds = predict_rnn.predict_samples(self.encoder, self.decoder,
                                            inputs_test, self.output_len,
                                            len(self.events_set))
        evals_summary, evals_separate, eval_events = \
            model_evalution.evaluate_sub_model(
                preds,
                outputs_test,
                self.events_set,
                events_num)
        return evals_summary[0]
示例#4
0
def model_train_done_cnn(model_id, kernel_size_array, pool_size_array,
                         lag_date_array, file_path_array, sub_model_name_array,
                         output_array, event_set_array, model_dir):
    """
    更新t_event_model_file、t_event_model_detail、t_event_model_tran表的数据。

    :param model_id: string. 模型编号
    :param kernel_size_array: array. 卷积核列表
    :param pool_size_array: array. 过滤器列表
    :param lag_date_array: array. 滞后期列表
    :param file_path_array: array. 模型文件地址列表
    :param sub_model_name_array: array. 模型文件名列表
    :param output_array: array.事件样本列表(事件表数据)
    :param event_set_array: array.事件类别列表
    :param model_dir: str.模型存放地址
    :return array, 子模型编号列表
    """
    db = DatabaseWrapper(dbname=event_dbname)
    try:
        db.begin_transaction()

        sql = "INSERT INTO t_event_model_file(file_id, file_url, model_id) VALUES (%s, %s, %s) "
        params = []
        for model_fp in file_path_array:
            param = (UuidHelper.guid(), model_fp, model_id)
            params.append(param)
        db.executemany(sql, params)

        # 子模型信息入库
        detail_ids = []
        sql = "INSERT INTO t_event_model_detail(detail_id, model_name, status, model_id, lag_date, kernel_size, " \
              "pool_size, create_date, create_time) values (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
        params = []
        for sub_model_name, lag_date, kernel_size, pool_size in zip(
                sub_model_name_array, lag_date_array, kernel_size_array,
                pool_size_array):
            detail_id = UuidHelper.guid()
            detail_ids.append(detail_id)
            params.append(
                (detail_id, sub_model_name, DataStatus.SUCCESS.value, model_id,
                 int(lag_date), int(kernel_size), int(pool_size),
                 sys_date(sys_date_formatter), sys_time(sys_time_formatter)))
        db.executemany(sql, params)

        # 分事件模型信息入库
        sql = "INSERT INTO t_event_model_tran(tran_id, event_name, num, detail_id, status, create_date, create_time) " \
              "values (%s, %s, %s, %s, %s, %s, %s)"
        params = []
        for detail_id, outputs in zip(detail_ids, output_array):
            events_num = pp.get_event_num(outputs, event_set_array)
            for event in event_set_array:
                tran_id = UuidHelper.guid()
                event_num = events_num[event]
                param = (tran_id, event, event_num,
                         detail_id, DataStatus.SUCCESS.value,
                         sys_date(sys_date_formatter),
                         sys_time(sys_time_formatter))
                params.append(param)
        db.executemany(sql, params)

        sql = "UPDATE t_event_model SET model_dir = %s WHERE model_id = %s"
        db.execute(sql, (model_dir, model_id))

        db.commit()
        return detail_ids
    except Exception as e:
        db.rollback()
        raise RuntimeError(e)
    finally:
        db.close()
def evaluate_models(data, dates, detail_ids, sub_model_dirs, params_list,
                    events_p_oh, events_set, n_classes, start_date, end_date,
                    model_type, eval_event, model_dir):
    """
    评估子模型并将评估结果存入数据库
    Args:
      data: 评估范围内的数据表数据, 用于生成预测值
      dates: 数据表的日期列表, 全部日期, 不仅为评估范围内
      detail_ids: 子模型对应的 detail_id 列表
      sub_model_dirs: 子模型对应的模型文件存放路径列表
      params_list: 子模型对应的超参数列表
      events_p_oh: 补 0 one-hot 形式的事件列表, 全部日期范围
      events_set: 去重后有序事件集合
      n_classes: 事件类别个数, 包含 0 事件, 为预测值最后一维长度
      start_date: 开始评估日期, 为预测开始日期
      end_date: 结束评估日期, 为预测结束日期
      model_type: string.模型类型,对应代码项为ModelType
      eval_event: cnn模型专用参数,该参数不为none时表示该模型是cnn
      model_dir: str,模型存放地址

    Returns:
      scores: 子模型综合评分列表
      events_num: dict, 测试集事件出现次数: {event: event_num}
    """
    scores = []
    events_num = {}

    for detail_id, sub_model_dir, params in zip(detail_ids, sub_model_dirs,
                                                params_list):
        logger.info(f'评估模型: {sub_model_dir}, detail_id: {detail_id}')
        if ModelType.CNN.value == model_type:
            input_len, output_len = params

            inputs_test, outputs_test = pp.gen_samples_by_pred_date(
                data, events_p_oh, input_len, output_len, dates, start_date,
                end_date)
            event_col = events_set.index(eval_event)
            outputs_test = outputs_test[:, :, event_col]
            outputs_test = np.reshape(outputs_test, [*outputs_test.shape, 1])
            events_set_ = [eval_event]

            model = load_cnn_model(sub_model_dir)
            preds = predict_cnn.predict_samples(model, inputs_test)
        elif ModelType.RNN.value == model_type:
            input_len, output_len, n_pca = params
            values_pca = pp.apply_pca(n_pca, model_dir, data, True)
            events_set_ = events_set

            inputs_test, outputs_test = \
                pp.gen_samples_by_pred_date(values_pca, events_p_oh, input_len, output_len, dates, start_date, end_date)
            encoder, decoder = load_rnn_models(sub_model_dir)
            preds = predict_rnn.predict_samples(encoder, decoder, inputs_test,
                                                output_len, n_classes)
        else:
            raise RuntimeError(f"Unsupport model type {model_type}")

        events_num = pp.get_event_num(outputs_test, events_set_)
        evals_summary, evals_separate, eval_events = evaluate_sub_model(
            preds, outputs_test, events_set_, events_num)
        if len(evals_summary) == 0:  # 若evals_summary无元素,则表示评估的事件在评估日期范围内没有发生
            return None, None

        for event, eval_separate in zip(eval_events, evals_separate):
            event, event_num, false_report, recall, false_alert, tier_precision, tier_recall, bleu_score = eval_separate
            event_num = events_num[event]
            pgsql.insert_model_test(event, event_num, false_report, recall,
                                    false_alert, tier_precision, tier_recall,
                                    bleu_score, detail_id)

        score_summary, bleu_summary, tier_precision_summary, tier_recall_summary, fr_summary, rc_summary, fa_summary = \
            evals_summary

        scores.append([
            score_summary, bleu_summary, tier_precision_summary,
            tier_recall_summary, fr_summary, rc_summary, fa_summary, detail_id
        ])

    return scores, events_num