def fill_missing_data(): # 获取历史用电负荷数据 length_load, time_load, load = load_data.read_load_history(is_full=True) # 获取历史温度数据 length_temperature, time_temperature, temperature = load_data.read_temperature_history( ) print('Filling missing data...') time_temperature = time_temperature.reshape((len(time_temperature), 4)) temperature = temperature.reshape((len(temperature), 1)) load = load.reshape((len(load), 1)) # 分别记录填充缺失值后的时间戳、温度和用电负荷数据 time_temperature_full = time_temperature[0:39414] print('00:', len(time_temperature_full), time_temperature_full[-1]) temperature_full = temperature[0:39414] load_full = load[0:39414] step_size = 365 * 24 for i in range(168 + 18): index = len(temperature_full) time = np.zeros((1, 4)) print('xx:', index - step_size - 24) time = np.add(time, time_temperature_full[index - step_size - 24]) time[0][0] += 1 time_temperature_full = np.append(time_temperature_full, time, axis=0) load_tmp = np.zeros((1, 1)) load_tmp = np.add(load_tmp, load_full[index - 7 * 24]) load_full = np.append(load_full, load_tmp, axis=0) temperature_tmp = np.zeros((1, 1)) for j in range(1, 2): prev_index = index - j * step_size - 24 prev_temp = temperature_full[prev_index] temperature_tmp = np.add(temperature_tmp, prev_temp) temperature_tmp = temperature_tmp / 4 temperature_full = np.append(temperature_full, temperature_tmp, axis=0) # 写入csv文件 df_time = pd.DataFrame(time_temperature_full) df_time.to_csv(r'./LSTM/dataset/time_full.csv', header=False, index=False) df_temperature = pd.DataFrame(temperature_full) df_temperature.to_csv(r'./LSTM/dataset/temperature_full.csv', header=False, index=False) df_load = pd.DataFrame(load_full) df_load.to_csv(r'./LSTM/dataset/load_full.csv', header=False, index=False) print('Done')
def set_training_data(): # 读取数据集 length_temperature, time_temperature, temperature = load_data.read_temperature_history( is_full=True) length_load, time_load, load = load_data.read_load_history(is_full=True) # 归一化 time_temperature = time_temperature.reshape(len(time_temperature), 4) mean_time, std_time, time_temperature = normalization(time_temperature) temperature = temperature.reshape(len(temperature), 11) mean, std, temperature = normalization(temperature) load = load.reshape(len(load), 20) mean_load, std_load, load = normalization(load, if_log=True) time_temperature = time_temperature.reshape((len(time_temperature), 1, 4)) temperature = temperature.reshape((len(temperature), 1, 11)) load = load.reshape((len(load), 1, 20)) # 使用以前的用电负荷数据作为预测的输入 load_for_inputs = np.copy(load) for i in range(len(load_for_inputs) - 1, -1, -1): index = i - 24 * 7 if index < 0: index = i load_for_inputs[i] = np.copy(load_for_inputs[index]) inputs_full = np.concatenate( (temperature, time_temperature, load_for_inputs), axis=2) inputs_full = inputs_full.reshape((len(inputs_full), 1, 35)) load = load.reshape(len(load), 20) # inputs_full = inputs_full[0:39414] inputs = inputs_full[0:len(load) - 24 * 7] # outputs_full = load[0:39414] outputs_full = load outputs = load[0:len(load) - 24 * 7] print "Full inputs shape:", inputs_full.shape print "Full outputs shape:", outputs_full.shape print "Input shape:", inputs.shape print "Output shape:", outputs.shape return mean_load, std_load, inputs_full, inputs, outputs_full, outputs
y_1, = plt.plot(expected_output, color='blue') y_2, = plt.plot(predicted_output, color='green') legend_var.append(y_1) legend_var.append(y_2) plt.title(plot_name + str(zone_id), fontproperties=font_title) plt.legend(legend_var, legend_name, prop=font) plt.ylabel(u"用电量/kWh", fontproperties=font) plt.xlabel(u"时间/h", fontproperties=font) plt.show() def plot_feature(data_frame): plt.figure() data_frame.plot(kind='barh', x='feature', y='fscore') font = FontProperties(fname='C:\Windows\Fonts\simsun.ttc', size=12) font_title = FontProperties(fname='C:\Windows\Fonts\simsun.ttc', size=14) plt.title(u"XGBoost模型:特征重要性排名", fontproperties=font_title) plt.xlabel(u"相对重要性", fontproperties=font) plt.show() if __name__ == "__main__": # 绘制原始数据曲线图 length_load, time_load, load_load = load_data.read_load_history() plot_origin(length_load, time_load, load_load, 20, u"用电负荷历史数据:zone-", u"用电量/kWh") length_temp, time_temp, temp_temp = load_data.read_temperature_history() plot_origin(length_temp, time_temp, temp_temp, 11, u"历史温度数据:station-", u"温度/℃")