def model_train_done_rnn(model_id, lag_dates, pcas, file_path_list, sub_model_names, outputs_list, events_set, model_dir): """ 更新t_event_model_file、t_event_model_detail、t_event_model_tran表的数据。 :param model_id: string. 模型编号 :param file_path_list: array. 模型地址 """ db = DatabaseWrapper(dbname=event_dbname) try: db.begin_transaction() sql = "INSERT INTO t_event_model_file(file_id, file_url, model_id) VALUES (%s, %s, %s) " params = [] for model_fp in file_path_list: param = (UuidHelper.guid(), model_fp, model_id) params.append(param) db.executemany(sql, params) # 子模型信息入库 detail_ids = [] sql = "INSERT INTO t_event_model_detail(detail_id, model_name, status, model_id, lag_date, pca, create_date, " \ "create_time) values (%s, %s, %s, %s, %s, %s, %s, %s)" params = [] for sub_model_name, lag_date, pca in zip(sub_model_names, lag_dates, pcas): detail_id = UuidHelper.guid() detail_ids.append(detail_id) params.append( (detail_id, sub_model_name, DataStatus.SUCCESS.value, model_id, int(lag_date), int(pca), sys_date(sys_date_formatter), sys_time(sys_time_formatter))) db.executemany(sql, params) # 分事件模型信息入库 sql = "INSERT INTO t_event_model_tran(tran_id, event_name, num, detail_id, status, create_date, create_time) " \ "values (%s, %s, %s, %s, %s, %s, %s)" params = [] for detail_id, outputs in zip(detail_ids, outputs_list): events_num = pp.get_event_num(outputs, events_set) for e in events_set: tran_id = UuidHelper.guid() event_num = events_num[e] param = (tran_id, e, event_num, detail_id, DataStatus.SUCCESS.value, sys_date(sys_date_formatter), sys_time(sys_time_formatter)) params.append(param) db.executemany(sql, params) sql = "UPDATE t_event_model SET model_dir = %s WHERE model_id = %s" db.execute(sql, (model_dir, model_id)) db.commit() return detail_ids except Exception as e: db.rollback() raise RuntimeError(e) finally: db.close()
def evaluate(self): inputs_test, outputs_test = \ preprocess.gen_samples_by_pred_date(self.data, self.events_p_oh, self.input_len, self.output_len, self.dates, self.eval_start_date, self.eval_end_date) event_col = self.events_set.index(self.eval_event) outputs_test = outputs_test[:, :, event_col] outputs_test = np.reshape(outputs_test, [*outputs_test.shape, 1]) events_num = preprocess.get_event_num(outputs_test, [self.eval_event]) preds = predict_cnn.predict_samples(self.model, inputs_test) evals_summary, evals_separate, eval_events = \ model_evalution.evaluate_sub_model( preds, outputs_test, [self.eval_event], events_num) return evals_summary[0]
def evaluate(self): inputs_test, outputs_test = \ preprocess.gen_samples_by_pred_date(self.data, self.events_p_oh, self.input_len, self.output_len, self.dates, self.eval_start_date, self.eval_end_date) events_num = preprocess.get_event_num(outputs_test, self.events_set) preds = predict_rnn.predict_samples(self.encoder, self.decoder, inputs_test, self.output_len, len(self.events_set)) evals_summary, evals_separate, eval_events = \ model_evalution.evaluate_sub_model( preds, outputs_test, self.events_set, events_num) return evals_summary[0]
def model_train_done_cnn(model_id, kernel_size_array, pool_size_array, lag_date_array, file_path_array, sub_model_name_array, output_array, event_set_array, model_dir): """ 更新t_event_model_file、t_event_model_detail、t_event_model_tran表的数据。 :param model_id: string. 模型编号 :param kernel_size_array: array. 卷积核列表 :param pool_size_array: array. 过滤器列表 :param lag_date_array: array. 滞后期列表 :param file_path_array: array. 模型文件地址列表 :param sub_model_name_array: array. 模型文件名列表 :param output_array: array.事件样本列表(事件表数据) :param event_set_array: array.事件类别列表 :param model_dir: str.模型存放地址 :return array, 子模型编号列表 """ db = DatabaseWrapper(dbname=event_dbname) try: db.begin_transaction() sql = "INSERT INTO t_event_model_file(file_id, file_url, model_id) VALUES (%s, %s, %s) " params = [] for model_fp in file_path_array: param = (UuidHelper.guid(), model_fp, model_id) params.append(param) db.executemany(sql, params) # 子模型信息入库 detail_ids = [] sql = "INSERT INTO t_event_model_detail(detail_id, model_name, status, model_id, lag_date, kernel_size, " \ "pool_size, create_date, create_time) values (%s, %s, %s, %s, %s, %s, %s, %s, %s)" params = [] for sub_model_name, lag_date, kernel_size, pool_size in zip( sub_model_name_array, lag_date_array, kernel_size_array, pool_size_array): detail_id = UuidHelper.guid() detail_ids.append(detail_id) params.append( (detail_id, sub_model_name, DataStatus.SUCCESS.value, model_id, int(lag_date), int(kernel_size), int(pool_size), sys_date(sys_date_formatter), sys_time(sys_time_formatter))) db.executemany(sql, params) # 分事件模型信息入库 sql = "INSERT INTO t_event_model_tran(tran_id, event_name, num, detail_id, status, create_date, create_time) " \ "values (%s, %s, %s, %s, %s, %s, %s)" params = [] for detail_id, outputs in zip(detail_ids, output_array): events_num = pp.get_event_num(outputs, event_set_array) for event in event_set_array: tran_id = UuidHelper.guid() event_num = events_num[event] param = (tran_id, event, event_num, detail_id, DataStatus.SUCCESS.value, sys_date(sys_date_formatter), sys_time(sys_time_formatter)) params.append(param) db.executemany(sql, params) sql = "UPDATE t_event_model SET model_dir = %s WHERE model_id = %s" db.execute(sql, (model_dir, model_id)) db.commit() return detail_ids except Exception as e: db.rollback() raise RuntimeError(e) finally: db.close()
def evaluate_models(data, dates, detail_ids, sub_model_dirs, params_list, events_p_oh, events_set, n_classes, start_date, end_date, model_type, eval_event, model_dir): """ 评估子模型并将评估结果存入数据库 Args: data: 评估范围内的数据表数据, 用于生成预测值 dates: 数据表的日期列表, 全部日期, 不仅为评估范围内 detail_ids: 子模型对应的 detail_id 列表 sub_model_dirs: 子模型对应的模型文件存放路径列表 params_list: 子模型对应的超参数列表 events_p_oh: 补 0 one-hot 形式的事件列表, 全部日期范围 events_set: 去重后有序事件集合 n_classes: 事件类别个数, 包含 0 事件, 为预测值最后一维长度 start_date: 开始评估日期, 为预测开始日期 end_date: 结束评估日期, 为预测结束日期 model_type: string.模型类型,对应代码项为ModelType eval_event: cnn模型专用参数,该参数不为none时表示该模型是cnn model_dir: str,模型存放地址 Returns: scores: 子模型综合评分列表 events_num: dict, 测试集事件出现次数: {event: event_num} """ scores = [] events_num = {} for detail_id, sub_model_dir, params in zip(detail_ids, sub_model_dirs, params_list): logger.info(f'评估模型: {sub_model_dir}, detail_id: {detail_id}') if ModelType.CNN.value == model_type: input_len, output_len = params inputs_test, outputs_test = pp.gen_samples_by_pred_date( data, events_p_oh, input_len, output_len, dates, start_date, end_date) event_col = events_set.index(eval_event) outputs_test = outputs_test[:, :, event_col] outputs_test = np.reshape(outputs_test, [*outputs_test.shape, 1]) events_set_ = [eval_event] model = load_cnn_model(sub_model_dir) preds = predict_cnn.predict_samples(model, inputs_test) elif ModelType.RNN.value == model_type: input_len, output_len, n_pca = params values_pca = pp.apply_pca(n_pca, model_dir, data, True) events_set_ = events_set inputs_test, outputs_test = \ pp.gen_samples_by_pred_date(values_pca, events_p_oh, input_len, output_len, dates, start_date, end_date) encoder, decoder = load_rnn_models(sub_model_dir) preds = predict_rnn.predict_samples(encoder, decoder, inputs_test, output_len, n_classes) else: raise RuntimeError(f"Unsupport model type {model_type}") events_num = pp.get_event_num(outputs_test, events_set_) evals_summary, evals_separate, eval_events = evaluate_sub_model( preds, outputs_test, events_set_, events_num) if len(evals_summary) == 0: # 若evals_summary无元素,则表示评估的事件在评估日期范围内没有发生 return None, None for event, eval_separate in zip(eval_events, evals_separate): event, event_num, false_report, recall, false_alert, tier_precision, tier_recall, bleu_score = eval_separate event_num = events_num[event] pgsql.insert_model_test(event, event_num, false_report, recall, false_alert, tier_precision, tier_recall, bleu_score, detail_id) score_summary, bleu_summary, tier_precision_summary, tier_recall_summary, fr_summary, rc_summary, fa_summary = \ evals_summary scores.append([ score_summary, bleu_summary, tier_precision_summary, tier_recall_summary, fr_summary, rc_summary, fa_summary, detail_id ]) return scores, events_num