def create_json(data, path='config/'): ''' Parameters ---------- data : ang 想要创建json的数据. path : string, optional 创建路径. The default is 'config/record.json'. Returns ------- None. ''' file_tools.check_dir_and_mkdir(path) with open(path + 'record.json', "w") as f: json.dump(data, f) print("加载入文件完成...")
def select_ec_merge_by_month(ID, time, season, predict_day, type, file_path='./data/ob_EC_merge', save_path='./data/last_15_days'): file = file_path + '/' + str( predict_day) + '天' + '/' + time + '/' + type + '/' save = save_path + '/' + str( predict_day) + '天' + '/' + season + '/' + time + '/' + type + '/' file_tools.check_dir_and_mkdir(file) file_tools.check_dir_and_mkdir(save) orign_data = pd.read_csv(file + ID + '.csv') orign_data = orign_data.loc[orign_data['predict_time'].apply( lambda x: x[5:7] in date_tools.month_list(season))] orign_data = orign_data.dropna(axis=0) orign_data.to_csv(save + ID + '.csv', index=False)
def generate_word_local(predict_df, save_path='./data/word'): # 存放路径 print("预测数据存入本地" + Station_ID + ".doc中") # nt = datetime.datetime.now() document = Document() document.styles['Normal'].font.name = u'宋体' document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体') # 文档标题 title = document.add_paragraph() title.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER title_cont = title.add_run('漳州市风速预测') title_cont.font.size = Pt(16) title_cont.bold = True pic = document.add_paragraph() # 图片居中设置 pic.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER pic_cont = pic.add_run("") pic_cont.add_picture(r'./test.png') #, width=Inches(2) # document.add_picture(r'./test.png') # 文档中插入表格 11行*4列 Table Grid table = document.add_table(rows=len(predict_df) + 1, cols=4, style='Medium Grid 1 Accent 1') table.autofit = False # 设置每列宽度 table.columns[0].width = Cm(20) table.columns[1].width = Cm(20) table.columns[2].width = Cm(10) table.columns[3].width = Cm(10) ID_cols = table.columns[0].cells date_cols = table.columns[1].cells _10UV_cols = table.columns[2].cells _10FG6_cols = table.columns[3].cells ID_cols[0].add_paragraph('站点').alignment = WD_PARAGRAPH_ALIGNMENT.CENTER date_cols[0].add_paragraph('日期').alignment = WD_PARAGRAPH_ALIGNMENT.CENTER _10UV_cols[0].add_paragraph( '平均分').alignment = WD_PARAGRAPH_ALIGNMENT.CENTER _10FG6_cols[0].add_paragraph( '阵风').alignment = WD_PARAGRAPH_ALIGNMENT.CENTER ID_group = predict_df.groupby("id").groups ID_list = list(ID_group.keys()) # ID date 10UV 10FG6 merge_begin = 1 merge_index = 1 for ID in ID_list: ob_part = predict_df.loc[predict_df['id'] == ID].reset_index(drop=True) table.cell(len(ob_part) + merge_begin - 1, 0).merge(table.cell(merge_begin, 0)) ID_cols[merge_begin].add_paragraph( ID).alignment = WD_PARAGRAPH_ALIGNMENT.CENTER merge_begin = len(ob_part) + merge_begin for index in range(len(ob_part)): date_cols[merge_index].add_paragraph( ob_part.loc[index, 'date']).alignment = WD_PARAGRAPH_ALIGNMENT.CENTER _10UV_cols[merge_index].add_paragraph( ob_part.loc[index, '10UV']).alignment = WD_PARAGRAPH_ALIGNMENT.CENTER _10FG6_cols[merge_index].add_paragraph( ob_part.loc[index, '10FG6']).alignment = WD_PARAGRAPH_ALIGNMENT.CENTER merge_index += 1 file_tools.check_dir_and_mkdir(save_path) word_path = os.path.join(save_path, Station_ID + '.docx') document.save(word_path)
def add_obp(ID,season,predict_day,time,type,data_path='data/last_15_days/', obp_path = 'data/obp/',models_save_path = 'models/lstm/'): """ 通过lstm模型,添加lstm的预测值obp ---------- ID : string 要建模的站点 season : string 要建模的季节(3-4) predict_day : int 要预测的天数 time : string 要预测几点起报(08) data_path : string 路径,过去15天的ob,ec数据 obp_path : string obp的保存路径 look_after : int 预测未来多少个数据 models_save_path : string 路径,lstm模型存放地点 ---------- """ print('*'*10) print(ID,season,predict_day,'start') FILES_PATH = data_path+str(predict_day)+'天/'+season+'/'+time+'/'+type+'/'+ID+'.csv' SAVE_PATH = obp_path+str(predict_day)+'天/'+season+'/'+time+'/'+type+'/' MODEL_SAVE_PATH = models_save_path+time+'/'+ID+'_1.h5' origin_data = pd.read_csv(FILES_PATH) origin_data['ob_p'] = '' # 获取数据 data = origin_data cols = [] for i in range(-15,-(predict_day-1),1): column = 'ob_'+str(i) cols.append(column) for i in range(-(predict_day-1),0,1): column = type+'_'+str(i) cols.append(column) data = np.array(data[cols]) #归一化 scaler = MinMaxScaler(feature_range=(0, 1)) data = scaler.fit_transform(data) X = data.reshape(data.shape[0],data.shape[1],1) # 加载模型并预测 model = load_model(MODEL_SAVE_PATH) Predicts = model.predict(X) # 保存obp结果 origin_data['ob_p'] = Predicts cols = ['predict_time','MSL','ob',type,'ob_p'] file_tools.check_dir_and_mkdir(SAVE_PATH) origin_data[cols].to_csv(SAVE_PATH+ID+'_p.csv',index=False)
def build_svr(ID,season,predict_day,time,type,data_path='data/obp/', models_save_path='models/svr/', images_save_path='images/svr/'): ''' Parameters ---------- ID : string 要建模的站点 season : string 要建模的季节(3-4) predict_day : int 要预测的天数 time : string 要预测的小时(08) data_path : string 路径,用来训练svr的文件路径,包含10UV,msl,obp等特征 models_save_path : string svr模型的保存路径 images_save_path : string 图片的保存路径 Returns ------- None. ''' FILE_PATH = data_path+str(predict_day)+'天/'+season+'/'+time+'/'+type+'/'+ID+'_p.csv' orgin_data = pd.read_csv(FILE_PATH) # 分trian,test index = int(len(orgin_data)*0.9) columns_list = ['MSL',type,'ob_p'] # x_train, x_test, y_train, y_test = train_test_split(orgin_data[columns_list], orgin_data['ob'], test_size=0.2,random_state=113) x_train = orgin_data[columns_list][:index] x_test = orgin_data[columns_list][index:] y_train = orgin_data['ob'][:index] y_test = orgin_data['ob'][index:] # 归一化 min_max_scaler = MinMaxScaler() x_train_scaler = min_max_scaler.fit_transform(x_train) x_test_scaler = min_max_scaler.fit_transform(x_test) # 训练模型,并保存 model = SVR(kernel='rbf') model.fit(x_train_scaler, y_train) model_save_path = models_save_path+season+'/'+str(predict_day)+'天/'+time+'/'+type+'/' file_tools.check_dir_and_mkdir(model_save_path) joblib.dump(model, model_save_path+ID+'.pkl') # 绘制训练Loss图 # plot_learning_curves(model,x_train_scaler,y_train) predictions = model.predict(x_test_scaler) # 检查文件夹路径 dir_path = images_save_path+ID+'/' file_tools.check_dir_and_mkdir(dir_path) # 画图 X_label = [] for i in range(predictions.shape[0]): X_label.append(i) plt.figure(figsize=(10,3)) plt.plot(X_label, predictions,'r',label='预测结果') plt.plot(X_label, y_test,'black',label='理想结果') plt.plot(X_label, x_test[type],'g--',label='ec') plt.title(ID+' '+season+' '+str(predict_day)) plt.legend() plt.savefig(dir_path+ID+'_'+season+'_'+str(predict_day)+'.png')
def build_lstm(ID, data_path='data/lstm/', look_back=15, look_after=1, models_save_path='models/lstm/', images_save_path='images/lstm/'): """ lstm建模 ---------- ID : string 要建模的站点 data_path : string 路径,以站点为划分的所有ob数据,按日期排序 look_back : int 用过去多少个数据做参数 look_after : int 预测未来多少个数据 models_save_path : string 路径,lstm模型存放地点 images_save_path : string 路径,lstm模型,建模的图像存放地点 ---------- return: """ FILE_PATH = data_path + ID + '.csv' MODEL_SAVE_PATH = models_save_path + ID + '_' + str(look_after) + '.h5' dataframe = pd.read_csv(FILE_PATH) dataframe.dropna(axis=0, inplace=True) dataset = dataframe['ob'].values # 将整型变为float dataset = dataset.astype('float64').reshape(-1, 1) #归一化 scaler = MinMaxScaler(feature_range=(0, 1)) dataset = scaler.fit_transform(dataset) train_size = int(len(dataset) * 0.8) trainlist = dataset[:train_size] testlist = dataset[train_size:] # 创建数据集 trainX, trainY = create_dataset(trainlist, look_back, look_after) testX, testY = create_dataset(testlist, look_back, look_after) #结果反归一化 trainY[:, :, 0] = scaler.inverse_transform(trainY[:, :, 0]) testY[:, :, 0] = scaler.inverse_transform(testY[:, :, 0]) trainX = numpy.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1)) testX = numpy.reshape(testX, (testX.shape[0], testX.shape[1], 1)) # create and fit the LSTM network model = Sequential() model.add(LSTM(4, input_shape=(None, 1))) model.add(Dense(look_after)) model.compile(loss='mean_squared_error', optimizer='adam') model.fit(trainX, trainY, epochs=100, batch_size=16) file_tools.check_dir_and_mkdir(models_save_path) model.save(MODEL_SAVE_PATH) # make predictions trainPredict = model.predict(trainX) testPredict = model.predict(testX) # 保存图像 file_tools.check_dir_and_mkdir(images_save_path) plt.figure(figsize=(10, 3)) plt.plot(trainY[:, look_after - 1, 0], 'r') plt.plot(trainPredict[:, 0], 'g') plt.title(ID + '_train') plt.savefig(images_save_path + ID + '_train' + '.png') plt.show() plt.figure(figsize=(10, 3)) plt.plot(testY[:, look_after - 1, 0], 'r') plt.plot(testPredict[:, 0], 'g') plt.title(ID + 'test') plt.savefig(images_save_path + ID + '_test' + '.png') plt.show() # 评估 from sklearn.metrics import mean_absolute_error my_mae = mean_absolute_error(testPredict[:, 0], testY[:, look_after - 1, 0]) print(ID + ' ' + 'my_mae:' + str(my_mae)) return
def build_lstm(ID, time, data=None, data_path='data/lstm/', look_back=15, look_after=1, models_save_path='models/lstm/', images_save_path='images/lstm/'): """ lstm建模 ---------- ID : string 要建模的站点 time : string 起报时间 data_path : string 路径,以站点为划分的所有ob数据,按日期排序 look_back : int 用过去多少个数据做参数 look_after : int 预测未来多少个数据 models_save_path : string 路径,lstm模型存放地点 images_save_path : string 路径,lstm模型,建模的图像存放地点 ---------- return: """ dataframe = pd.DataFrame() if not (data is None): dataframe = data else: FILE_PATH = data_path + time + '/' + ID + '.csv' dataframe = pd.read_csv(FILE_PATH) models_save_path = models_save_path + time + '/' MODEL_SAVE_PATH = models_save_path + ID + '_' + str(look_after) + '.h5' dataframe.dropna(axis=0, inplace=True) dataset = dataframe['ob'].values # 将整型变为float dataset = dataset.astype('float64').reshape(-1, 1) #归一化 scaler = MinMaxScaler(feature_range=(0, 1)) dataset = scaler.fit_transform(dataset) # 全部用于train 暂时不分 train_size = int(len(dataset)) trainlist = dataset[:train_size] # 创建数据集 trainX, trainY = create_dataset(trainlist, look_back, look_after) #结果反归一化 trainY[:, :, 0] = scaler.inverse_transform(trainY[:, :, 0]) trainX = numpy.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1)) # create and fit the LSTM network model = Sequential() model.add(LSTM(4, input_shape=(None, 1))) model.add(Dense(look_after)) model.compile(loss='mean_squared_error', optimizer='adam') model.fit(trainX, trainY, epochs=3000, batch_size=256) file_tools.check_dir_and_mkdir(models_save_path) model.save(MODEL_SAVE_PATH) # make predictions trainPredict = model.predict(trainX) # 保存图像 file_tools.check_dir_and_mkdir(images_save_path) plt.figure(figsize=(10, 3)) plt.plot(trainY[:, look_after - 1, 0], 'r') plt.plot(trainPredict[:, 0], 'g') plt.title(ID + '_' + time + '_train') plt.savefig(images_save_path + ID + '_' + time + '_train' + '.png') return
def build_svr(ID, season, predict_day, time, data_path='data/obp/', models_save_path='models/svr/', images_save_path='images/svr/'): ''' Parameters ---------- ID : string 要建模的站点 season : string 要建模的季节(3-4) predict_day : int 要预测的天数 time : string 要预测的小时(08) data_path : string 路径,用来训练svr的文件路径,包含10UV,msl,obp等特征 models_save_path : string svr模型的保存路径 images_save_path : string 图片的保存路径 Returns ------- None. ''' FILE_PATH = data_path + str( predict_day) + '天/' + season + '/' + time + '/' + ID + '_p.csv' orgin_data = pd.read_csv(FILE_PATH) # 相关矩阵 # corr_matrix = orgin_data.corr() # print(corr_matrix["ob"].sort_values(ascending=False)) # 分trian,test index = int(len(orgin_data) * 0.8) columns_list = ['MSL', '10UV', 'ob_p'] # x_train, x_test, y_train, y_test = train_test_split(orgin_data[columns_list], orgin_data['ob'], test_size=0.2,random_state=113) x_train = orgin_data[columns_list][:index] x_test = orgin_data[columns_list][index:] y_train = orgin_data['ob'][:index] y_test = orgin_data['ob'][index:] # 归一化 min_max_scaler = MinMaxScaler() x_train_scaler = min_max_scaler.fit_transform(x_train) x_test_scaler = min_max_scaler.fit_transform(x_test) # 训练模型,并保存 model = SVR(kernel='rbf') model.fit(x_train_scaler, y_train) model_save_path = models_save_path + season + '/' + str( predict_day) + '天/' + time + '/' file_tools.check_dir_and_mkdir(model_save_path) joblib.dump(model, model_save_path + ID + '.pkl') # 绘制训练Loss图 # plot_learning_curves(model,x_train_scaler,y_train) predictions = model.predict(x_test_scaler) # 评估 from sklearn.metrics import mean_absolute_error my_mae = mean_absolute_error(predictions, y_test) my_rmse = rmse(predictions, y_test) print('---------------' + str(predict_day) + '---------------') print('my_mae:' + str(round(my_mae, 2))) print('my_rmse:' + str(round(my_rmse, 2))) ec_mae = mean_absolute_error(x_test['10UV'], y_test) ec_rmse = rmse(x_test['10UV'], y_test) print('ec_mae:' + str(round(ec_mae, 2))) print('ec_rmse:' + str(round(ec_rmse, 2))) print('样本数量:' + str(len(orgin_data))) print('模型提升率:' + str(round((ec_rmse - my_rmse) / ec_rmse * 100, 4)) + '%') # 检查文件夹路径 dir_path = images_save_path + ID + '/' file_tools.check_dir_and_mkdir(dir_path) # 画图 X_label = [] for i in range(predictions.shape[0]): X_label.append(i) plt.figure(figsize=(10, 3)) plt.plot(X_label, predictions, 'r', label='预测结果') plt.plot(X_label, y_test, 'black', label='理想结果') plt.plot(X_label, x_test['10UV'], 'g--', label='ec') plt.title(ID + ' ' + season + ' ' + str(predict_day)) plt.legend() plt.savefig(dir_path + ID + '_' + season + '_' + str(predict_day) + '.png') plt.show()