def test_windows_model(): from windows_model import model_t2m_file, model_rh2m_file, model_w10m_file, exract_feature test_file = "../data/ai_challenger_wf2018_testb1_20180829-20181028.nc" out_time = pprd_time = '2018-10-28 03' if (not os.path.exists(data_path_1 + "test.csv")): transfer_data_to_csv(test_file, data_path_1 + "test.csv") test_df = load_data(data_path_1 + "test.csv") # test_df = fill_missing_data(test_df) for col in [ 'psur_obs', 't2m_obs', 'q2m_obs', 'w10m_obs', 'd10m_obs', 'rh2m_obs', 'u10m_obs', 'v10m_obs', 'RAIN_obs' ]: col_filled = col.split( '_')[0] + '_M' if 'psur' not in col else 'psfc_M' test_df[col].fillna(test_df[col_filled], inplace=True) #用超算值填充 test_df_processed = exract_feature(test_df, True) # 加载模型 t2m_model = pickle.load(open(model_t2m_file, 'rb')) rh2m_model = pickle.load(open(model_rh2m_file, 'rb')) w10m_model = pickle.load(open(model_w10m_file, 'rb')) df_submit = predict(test_df_processed, [t2m_model, rh2m_model, w10m_model], prd_time) # 预测并打印输出 df_submit.to_csv(output_path + ans_name, index=False) # 计算分数 anen_file = output_path + ans_name print_score(fore_file, obs_file, anen_file)
def test_old_days_model3(): from old_days_model3 import model_t2m_file, model_rh2m_file, model_w10m_file, exract_feature df = load_data("../data/all_data.csv") df_processed = exract_feature(df) del df test_idx = df_processed[lambda x: x.station_date_time.str.split('_').apply( lambda x: x[1]) == prd_time.replace('-', '').replace(' ', '')].index df_test = df_processed.loc[test_idx] del df_processed # df_test.drop(df_test.columns[33:45], axis=1, inplace=True) df_test.drop(pd.Index([ 'psur_obs', 't2m_obs', 'q2m_obs', 'w10m_obs', 'd10m_obs', 'rh2m_obs', 'u10m_obs', 'v10m_obs', 'RAIN_obs', 't2m_obj', 'rh2m_obj', 'w10m_obj' ]), axis=1, inplace=True) df_test_X = pd.concat( [df_test[df_test.columns[1]], df_test[df_test.columns[3:]]], axis=1) # 加载模型 t2m_model = pickle.load(open(model_t2m_file, 'rb')) rh2m_model = pickle.load(open(model_rh2m_file, 'rb')) w10m_model = pickle.load(open(model_w10m_file, 'rb')) df_submit = pd.DataFrame([ [ f'{item[0]}_{int(item[2]):02d}' for item in df_test.station_date_time.str.split('_') ], (df_test.t2m_M + t2m_model.predict(df_test_X)).tolist(), (df_test.rh2m_M + rh2m_model.predict(df_test_X)).tolist(), (df_test.w10m_M + w10m_model.predict(df_test_X)).tolist(), ]).T.rename(columns={ 0: 'FORE_data', 1: 't2m', 2: 'rh2m', 3: 'w10m' }) df_submit.to_csv(output_path + ans_name, index=False) # 预测并打印输出 # 计算分数 anen_file = output_path + ans_name print_score(fore_file, obs_file, anen_file)
def test_windows_station_model(): from windows_station_model import model_t2m_file, model_rh2m_file, model_w10m_file, exract_feature test_file = "../data/ai_challenger_wf2018_testb1_20180829-20181028.nc" out_time = pprd_time = '2018-10-28 03' if (not os.path.exists(data_path_1 + "test.csv")): transfer_data_to_csv(test_file, data_path_1 + "test.csv") test_df = load_data(data_path_1 + "test.csv") # test_df = fill_missing_data(test_df) for col in [ 'psur_obs', 't2m_obs', 'q2m_obs', 'w10m_obs', 'd10m_obs', 'rh2m_obs', 'u10m_obs', 'v10m_obs', 'RAIN_obs' ]: col_filled = col.split( '_')[0] + '_M' if 'psur' not in col else 'psfc_M' test_df[col].fillna(test_df[col_filled], inplace=True) #用超算值填充 station_id = [ 90001, 90002, 90003, 90004, 90005, 90006, 90007, 90008, 90009, 90010 ] test_station_df = {} test_df_processed = exract_feature(test_df, True) for id in station_id: test_station_df[str(id)] = test_df_processed[ test_df_processed["stations"] == id] # 加载模型 t2m_model = pickle.load(open(model_t2m_file, 'rb')) rh2m_model = pickle.load(open(model_rh2m_file, 'rb')) w10m_model = pickle.load(open(model_w10m_file, 'rb')) df_submit = predict(test_station_df[str(station_id[0])], [ t2m_model[str(station_id[0])], rh2m_model[str(station_id[0])], w10m_model[str(station_id[0])] ], prd_time) for id in station_id[1:]: df_submit = df_submit.append( predict( test_station_df[str(id)], [t2m_model[str(id)], rh2m_model[str(id)], w10m_model[str(id)]], prd_time)) df_submit.to_csv(output_path + ans_name, index=False) # 预测并打印输出 # 计算分数 anen_file = output_path + ans_name print_score(fore_file, obs_file, anen_file)