示例#1
0
def fill_missing_data():
    # 获取历史用电负荷数据
    length_load, time_load, load = load_data.read_load_history(is_full=True)
    # 获取历史温度数据
    length_temperature, time_temperature, temperature = load_data.read_temperature_history(
    )

    print('Filling missing data...')
    time_temperature = time_temperature.reshape((len(time_temperature), 4))
    temperature = temperature.reshape((len(temperature), 1))
    load = load.reshape((len(load), 1))

    # 分别记录填充缺失值后的时间戳、温度和用电负荷数据
    time_temperature_full = time_temperature[0:39414]
    print('00:', len(time_temperature_full), time_temperature_full[-1])
    temperature_full = temperature[0:39414]
    load_full = load[0:39414]
    step_size = 365 * 24
    for i in range(168 + 18):
        index = len(temperature_full)
        time = np.zeros((1, 4))
        print('xx:', index - step_size - 24)
        time = np.add(time, time_temperature_full[index - step_size - 24])
        time[0][0] += 1
        time_temperature_full = np.append(time_temperature_full, time, axis=0)

        load_tmp = np.zeros((1, 1))
        load_tmp = np.add(load_tmp, load_full[index - 7 * 24])
        load_full = np.append(load_full, load_tmp, axis=0)

        temperature_tmp = np.zeros((1, 1))
        for j in range(1, 2):
            prev_index = index - j * step_size - 24
            prev_temp = temperature_full[prev_index]
            temperature_tmp = np.add(temperature_tmp, prev_temp)
        temperature_tmp = temperature_tmp / 4
        temperature_full = np.append(temperature_full, temperature_tmp, axis=0)
    # 写入csv文件
    df_time = pd.DataFrame(time_temperature_full)
    df_time.to_csv(r'./LSTM/dataset/time_full.csv', header=False, index=False)
    df_temperature = pd.DataFrame(temperature_full)
    df_temperature.to_csv(r'./LSTM/dataset/temperature_full.csv',
                          header=False,
                          index=False)
    df_load = pd.DataFrame(load_full)
    df_load.to_csv(r'./LSTM/dataset/load_full.csv', header=False, index=False)
    print('Done')
示例#2
0
def set_training_data():
    # 读取数据集
    length_temperature, time_temperature, temperature = load_data.read_temperature_history(
        is_full=True)
    length_load, time_load, load = load_data.read_load_history(is_full=True)
    # 归一化
    time_temperature = time_temperature.reshape(len(time_temperature), 4)
    mean_time, std_time, time_temperature = normalization(time_temperature)
    temperature = temperature.reshape(len(temperature), 11)
    mean, std, temperature = normalization(temperature)
    load = load.reshape(len(load), 20)
    mean_load, std_load, load = normalization(load, if_log=True)

    time_temperature = time_temperature.reshape((len(time_temperature), 1, 4))
    temperature = temperature.reshape((len(temperature), 1, 11))
    load = load.reshape((len(load), 1, 20))

    # 使用以前的用电负荷数据作为预测的输入
    load_for_inputs = np.copy(load)
    for i in range(len(load_for_inputs) - 1, -1, -1):
        index = i - 24 * 7
        if index < 0:
            index = i
        load_for_inputs[i] = np.copy(load_for_inputs[index])
    inputs_full = np.concatenate(
        (temperature, time_temperature, load_for_inputs), axis=2)
    inputs_full = inputs_full.reshape((len(inputs_full), 1, 35))

    load = load.reshape(len(load), 20)
    # inputs_full = inputs_full[0:39414]
    inputs = inputs_full[0:len(load) - 24 * 7]
    # outputs_full = load[0:39414]
    outputs_full = load
    outputs = load[0:len(load) - 24 * 7]

    print "Full inputs shape:", inputs_full.shape
    print "Full outputs shape:", outputs_full.shape
    print "Input shape:", inputs.shape
    print "Output shape:", outputs.shape
    return mean_load, std_load, inputs_full, inputs, outputs_full, outputs
    y_1, = plt.plot(expected_output, color='blue')
    y_2, = plt.plot(predicted_output, color='green')
    legend_var.append(y_1)
    legend_var.append(y_2)

    plt.title(plot_name + str(zone_id), fontproperties=font_title)
    plt.legend(legend_var, legend_name, prop=font)
    plt.ylabel(u"用电量/kWh", fontproperties=font)
    plt.xlabel(u"时间/h", fontproperties=font)
    plt.show()


def plot_feature(data_frame):
    plt.figure()
    data_frame.plot(kind='barh', x='feature', y='fscore')
    font = FontProperties(fname='C:\Windows\Fonts\simsun.ttc', size=12)
    font_title = FontProperties(fname='C:\Windows\Fonts\simsun.ttc', size=14)
    plt.title(u"XGBoost模型:特征重要性排名", fontproperties=font_title)
    plt.xlabel(u"相对重要性", fontproperties=font)
    plt.show()


if __name__ == "__main__":
    # 绘制原始数据曲线图
    length_load, time_load, load_load = load_data.read_load_history()
    plot_origin(length_load, time_load, load_load, 20, u"用电负荷历史数据:zone-",
                u"用电量/kWh")
    length_temp, time_temp, temp_temp = load_data.read_temperature_history()
    plot_origin(length_temp, time_temp, temp_temp, 11, u"历史温度数据:station-",
                u"温度/℃")