示例#1
0
def create_json(data, path='config/'):
    '''
    Parameters
    ----------
    data : ang
        想要创建json的数据.
    path : string, optional
        创建路径. The default is 'config/record.json'.
    Returns
    -------
    None.
    '''
    file_tools.check_dir_and_mkdir(path)
    with open(path + 'record.json', "w") as f:
        json.dump(data, f)
        print("加载入文件完成...")
示例#2
0
def select_ec_merge_by_month(ID,
                             time,
                             season,
                             predict_day,
                             type,
                             file_path='./data/ob_EC_merge',
                             save_path='./data/last_15_days'):
    file = file_path + '/' + str(
        predict_day) + '天' + '/' + time + '/' + type + '/'
    save = save_path + '/' + str(
        predict_day) + '天' + '/' + season + '/' + time + '/' + type + '/'

    file_tools.check_dir_and_mkdir(file)
    file_tools.check_dir_and_mkdir(save)

    orign_data = pd.read_csv(file + ID + '.csv')
    orign_data = orign_data.loc[orign_data['predict_time'].apply(
        lambda x: x[5:7] in date_tools.month_list(season))]
    orign_data = orign_data.dropna(axis=0)
    orign_data.to_csv(save + ID + '.csv', index=False)
示例#3
0
def generate_word_local(predict_df, save_path='./data/word'):
    # 存放路径
    print("预测数据存入本地" + Station_ID + ".doc中")

    # nt = datetime.datetime.now()
    document = Document()
    document.styles['Normal'].font.name = u'宋体'
    document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')

    # 文档标题
    title = document.add_paragraph()
    title.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
    title_cont = title.add_run('漳州市风速预测')
    title_cont.font.size = Pt(16)
    title_cont.bold = True
    pic = document.add_paragraph()
    # 图片居中设置
    pic.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
    pic_cont = pic.add_run("")
    pic_cont.add_picture(r'./test.png')  #, width=Inches(2)

    # document.add_picture(r'./test.png')

    # 文档中插入表格 11行*4列        Table Grid
    table = document.add_table(rows=len(predict_df) + 1,
                               cols=4,
                               style='Medium Grid 1 Accent 1')
    table.autofit = False

    # 设置每列宽度
    table.columns[0].width = Cm(20)
    table.columns[1].width = Cm(20)
    table.columns[2].width = Cm(10)
    table.columns[3].width = Cm(10)

    ID_cols = table.columns[0].cells
    date_cols = table.columns[1].cells
    _10UV_cols = table.columns[2].cells
    _10FG6_cols = table.columns[3].cells
    ID_cols[0].add_paragraph('站点').alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
    date_cols[0].add_paragraph('日期').alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
    _10UV_cols[0].add_paragraph(
        '平均分').alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
    _10FG6_cols[0].add_paragraph(
        '阵风').alignment = WD_PARAGRAPH_ALIGNMENT.CENTER

    ID_group = predict_df.groupby("id").groups
    ID_list = list(ID_group.keys())
    # ID date 10UV 10FG6
    merge_begin = 1
    merge_index = 1
    for ID in ID_list:
        ob_part = predict_df.loc[predict_df['id'] == ID].reset_index(drop=True)
        table.cell(len(ob_part) + merge_begin - 1,
                   0).merge(table.cell(merge_begin, 0))
        ID_cols[merge_begin].add_paragraph(
            ID).alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
        merge_begin = len(ob_part) + merge_begin
        for index in range(len(ob_part)):
            date_cols[merge_index].add_paragraph(
                ob_part.loc[index,
                            'date']).alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
            _10UV_cols[merge_index].add_paragraph(
                ob_part.loc[index,
                            '10UV']).alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
            _10FG6_cols[merge_index].add_paragraph(
                ob_part.loc[index,
                            '10FG6']).alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
            merge_index += 1
    file_tools.check_dir_and_mkdir(save_path)
    word_path = os.path.join(save_path, Station_ID + '.docx')
    document.save(word_path)
示例#4
0
def add_obp(ID,season,predict_day,time,type,data_path='data/last_15_days/',
            obp_path = 'data/obp/',models_save_path = 'models/lstm/'):
    """
    通过lstm模型,添加lstm的预测值obp
    ----------
    ID : string
        要建模的站点
    season : string
        要建模的季节(3-4)
    predict_day : int
        要预测的天数
    time : string
        要预测几点起报(08)
    data_path : string
        路径,过去15天的ob,ec数据
    obp_path : string
        obp的保存路径
    look_after : int
        预测未来多少个数据
    models_save_path : string
        路径,lstm模型存放地点
    ----------
    """
    print('*'*10)
    print(ID,season,predict_day,'start')
    
    FILES_PATH = data_path+str(predict_day)+'天/'+season+'/'+time+'/'+type+'/'+ID+'.csv'
    SAVE_PATH = obp_path+str(predict_day)+'天/'+season+'/'+time+'/'+type+'/'
    MODEL_SAVE_PATH = models_save_path+time+'/'+ID+'_1.h5'
    
    origin_data = pd.read_csv(FILES_PATH)
    origin_data['ob_p'] = ''
    
    # 获取数据
    data = origin_data
    cols = []
    for i in range(-15,-(predict_day-1),1):
        column = 'ob_'+str(i)
        cols.append(column)
    for i in range(-(predict_day-1),0,1):
        column = type+'_'+str(i)
        cols.append(column)
    
    data = np.array(data[cols])
    
    
    #归一化
    scaler = MinMaxScaler(feature_range=(0, 1))
    data = scaler.fit_transform(data)
    
    X = data.reshape(data.shape[0],data.shape[1],1)
    
    # 加载模型并预测
    model = load_model(MODEL_SAVE_PATH)
    Predicts = model.predict(X)
    
    # 保存obp结果
    origin_data['ob_p'] = Predicts
    cols = ['predict_time','MSL','ob',type,'ob_p']
    file_tools.check_dir_and_mkdir(SAVE_PATH)
    origin_data[cols].to_csv(SAVE_PATH+ID+'_p.csv',index=False)
示例#5
0
def build_svr(ID,season,predict_day,time,type,data_path='data/obp/',
               models_save_path='models/svr/',
               images_save_path='images/svr/'):
    '''
    Parameters
    ----------
    ID : string
        要建模的站点
    season : string
        要建模的季节(3-4)
    predict_day : int
        要预测的天数
    time : string
        要预测的小时(08)
    data_path : string
        路径,用来训练svr的文件路径,包含10UV,msl,obp等特征
    models_save_path : string
        svr模型的保存路径
    images_save_path : string
        图片的保存路径
    Returns
    -------
    None.
    '''
    FILE_PATH = data_path+str(predict_day)+'天/'+season+'/'+time+'/'+type+'/'+ID+'_p.csv'
    
    orgin_data = pd.read_csv(FILE_PATH)
    
    # 分trian,test
    index = int(len(orgin_data)*0.9)
    columns_list = ['MSL',type,'ob_p']
    # x_train, x_test, y_train, y_test = train_test_split(orgin_data[columns_list], orgin_data['ob'], test_size=0.2,random_state=113)
    x_train = orgin_data[columns_list][:index]
    x_test = orgin_data[columns_list][index:]
    y_train = orgin_data['ob'][:index]
    y_test = orgin_data['ob'][index:]
    
    # 归一化
    min_max_scaler = MinMaxScaler()
    x_train_scaler = min_max_scaler.fit_transform(x_train)
    x_test_scaler = min_max_scaler.fit_transform(x_test)
    
    # 训练模型,并保存
    model = SVR(kernel='rbf')
    model.fit(x_train_scaler, y_train)
    model_save_path = models_save_path+season+'/'+str(predict_day)+'天/'+time+'/'+type+'/'
    file_tools.check_dir_and_mkdir(model_save_path)
    joblib.dump(model, model_save_path+ID+'.pkl')
    
    # 绘制训练Loss图
    # plot_learning_curves(model,x_train_scaler,y_train)
    
    predictions = model.predict(x_test_scaler)
    
    
    # 检查文件夹路径
    dir_path = images_save_path+ID+'/'
    file_tools.check_dir_and_mkdir(dir_path)
    
    # 画图
    X_label = []
    for i in range(predictions.shape[0]):
        X_label.append(i)
    plt.figure(figsize=(10,3))
    plt.plot(X_label, predictions,'r',label='预测结果')
    plt.plot(X_label, y_test,'black',label='理想结果')
    plt.plot(X_label, x_test[type],'g--',label='ec')
    plt.title(ID+' '+season+' '+str(predict_day))
    plt.legend()
    plt.savefig(dir_path+ID+'_'+season+'_'+str(predict_day)+'.png')
示例#6
0
def build_lstm(ID,
               data_path='data/lstm/',
               look_back=15,
               look_after=1,
               models_save_path='models/lstm/',
               images_save_path='images/lstm/'):
    """
    lstm建模
    ----------
    ID : string
        要建模的站点
    data_path : string
        路径,以站点为划分的所有ob数据,按日期排序
    look_back : int
        用过去多少个数据做参数
    look_after : int
        预测未来多少个数据
    models_save_path : string
        路径,lstm模型存放地点
    images_save_path : string
        路径,lstm模型,建模的图像存放地点
    ----------
    return:
        
    """
    FILE_PATH = data_path + ID + '.csv'
    MODEL_SAVE_PATH = models_save_path + ID + '_' + str(look_after) + '.h5'

    dataframe = pd.read_csv(FILE_PATH)
    dataframe.dropna(axis=0, inplace=True)
    dataset = dataframe['ob'].values
    # 将整型变为float
    dataset = dataset.astype('float64').reshape(-1, 1)

    #归一化
    scaler = MinMaxScaler(feature_range=(0, 1))
    dataset = scaler.fit_transform(dataset)

    train_size = int(len(dataset) * 0.8)
    trainlist = dataset[:train_size]
    testlist = dataset[train_size:]

    # 创建数据集
    trainX, trainY = create_dataset(trainlist, look_back, look_after)
    testX, testY = create_dataset(testlist, look_back, look_after)

    #结果反归一化
    trainY[:, :, 0] = scaler.inverse_transform(trainY[:, :, 0])
    testY[:, :, 0] = scaler.inverse_transform(testY[:, :, 0])

    trainX = numpy.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
    testX = numpy.reshape(testX, (testX.shape[0], testX.shape[1], 1))

    # create and fit the LSTM network
    model = Sequential()
    model.add(LSTM(4, input_shape=(None, 1)))
    model.add(Dense(look_after))
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(trainX, trainY, epochs=100, batch_size=16)
    file_tools.check_dir_and_mkdir(models_save_path)
    model.save(MODEL_SAVE_PATH)

    # make predictions
    trainPredict = model.predict(trainX)
    testPredict = model.predict(testX)

    # 保存图像
    file_tools.check_dir_and_mkdir(images_save_path)
    plt.figure(figsize=(10, 3))
    plt.plot(trainY[:, look_after - 1, 0], 'r')
    plt.plot(trainPredict[:, 0], 'g')
    plt.title(ID + '_train')
    plt.savefig(images_save_path + ID + '_train' + '.png')
    plt.show()
    plt.figure(figsize=(10, 3))
    plt.plot(testY[:, look_after - 1, 0], 'r')
    plt.plot(testPredict[:, 0], 'g')
    plt.title(ID + 'test')
    plt.savefig(images_save_path + ID + '_test' + '.png')
    plt.show()

    # 评估
    from sklearn.metrics import mean_absolute_error
    my_mae = mean_absolute_error(testPredict[:, 0], testY[:, look_after - 1,
                                                          0])
    print(ID + ' ' + 'my_mae:' + str(my_mae))

    return
示例#7
0
def build_lstm(ID,
               time,
               data=None,
               data_path='data/lstm/',
               look_back=15,
               look_after=1,
               models_save_path='models/lstm/',
               images_save_path='images/lstm/'):
    """
    lstm建模
    ----------
    ID : string
        要建模的站点
    time : string
        起报时间
    data_path : string
        路径,以站点为划分的所有ob数据,按日期排序
    look_back : int
        用过去多少个数据做参数
    look_after : int
        预测未来多少个数据
    models_save_path : string
        路径,lstm模型存放地点
    images_save_path : string
        路径,lstm模型,建模的图像存放地点
    ----------
    return:
        
    """
    dataframe = pd.DataFrame()
    if not (data is None):
        dataframe = data
    else:
        FILE_PATH = data_path + time + '/' + ID + '.csv'
        dataframe = pd.read_csv(FILE_PATH)
    models_save_path = models_save_path + time + '/'
    MODEL_SAVE_PATH = models_save_path + ID + '_' + str(look_after) + '.h5'

    dataframe.dropna(axis=0, inplace=True)
    dataset = dataframe['ob'].values
    # 将整型变为float
    dataset = dataset.astype('float64').reshape(-1, 1)

    #归一化
    scaler = MinMaxScaler(feature_range=(0, 1))
    dataset = scaler.fit_transform(dataset)

    # 全部用于train 暂时不分
    train_size = int(len(dataset))
    trainlist = dataset[:train_size]

    # 创建数据集
    trainX, trainY = create_dataset(trainlist, look_back, look_after)

    #结果反归一化
    trainY[:, :, 0] = scaler.inverse_transform(trainY[:, :, 0])

    trainX = numpy.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))

    # create and fit the LSTM network
    model = Sequential()
    model.add(LSTM(4, input_shape=(None, 1)))
    model.add(Dense(look_after))
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(trainX, trainY, epochs=3000, batch_size=256)
    file_tools.check_dir_and_mkdir(models_save_path)
    model.save(MODEL_SAVE_PATH)

    # make predictions
    trainPredict = model.predict(trainX)

    # 保存图像
    file_tools.check_dir_and_mkdir(images_save_path)
    plt.figure(figsize=(10, 3))
    plt.plot(trainY[:, look_after - 1, 0], 'r')
    plt.plot(trainPredict[:, 0], 'g')
    plt.title(ID + '_' + time + '_train')
    plt.savefig(images_save_path + ID + '_' + time + '_train' + '.png')

    return
示例#8
0
def build_svr(ID,
              season,
              predict_day,
              time,
              data_path='data/obp/',
              models_save_path='models/svr/',
              images_save_path='images/svr/'):
    '''
    Parameters
    ----------
    ID : string
        要建模的站点
    season : string
        要建模的季节(3-4)
    predict_day : int
        要预测的天数
    time : string
        要预测的小时(08)
    data_path : string
        路径,用来训练svr的文件路径,包含10UV,msl,obp等特征
    models_save_path : string
        svr模型的保存路径
    images_save_path : string
        图片的保存路径
    Returns
    -------
    None.
    '''
    FILE_PATH = data_path + str(
        predict_day) + '天/' + season + '/' + time + '/' + ID + '_p.csv'

    orgin_data = pd.read_csv(FILE_PATH)

    # 相关矩阵
    # corr_matrix = orgin_data.corr()
    # print(corr_matrix["ob"].sort_values(ascending=False))

    # 分trian,test
    index = int(len(orgin_data) * 0.8)
    columns_list = ['MSL', '10UV', 'ob_p']
    # x_train, x_test, y_train, y_test = train_test_split(orgin_data[columns_list], orgin_data['ob'], test_size=0.2,random_state=113)
    x_train = orgin_data[columns_list][:index]
    x_test = orgin_data[columns_list][index:]
    y_train = orgin_data['ob'][:index]
    y_test = orgin_data['ob'][index:]

    # 归一化
    min_max_scaler = MinMaxScaler()
    x_train_scaler = min_max_scaler.fit_transform(x_train)
    x_test_scaler = min_max_scaler.fit_transform(x_test)

    # 训练模型,并保存
    model = SVR(kernel='rbf')
    model.fit(x_train_scaler, y_train)
    model_save_path = models_save_path + season + '/' + str(
        predict_day) + '天/' + time + '/'
    file_tools.check_dir_and_mkdir(model_save_path)
    joblib.dump(model, model_save_path + ID + '.pkl')

    # 绘制训练Loss图
    # plot_learning_curves(model,x_train_scaler,y_train)

    predictions = model.predict(x_test_scaler)

    # 评估
    from sklearn.metrics import mean_absolute_error
    my_mae = mean_absolute_error(predictions, y_test)
    my_rmse = rmse(predictions, y_test)
    print('---------------' + str(predict_day) + '---------------')
    print('my_mae:' + str(round(my_mae, 2)))
    print('my_rmse:' + str(round(my_rmse, 2)))
    ec_mae = mean_absolute_error(x_test['10UV'], y_test)
    ec_rmse = rmse(x_test['10UV'], y_test)
    print('ec_mae:' + str(round(ec_mae, 2)))
    print('ec_rmse:' + str(round(ec_rmse, 2)))
    print('样本数量:' + str(len(orgin_data)))
    print('模型提升率:' + str(round((ec_rmse - my_rmse) / ec_rmse * 100, 4)) + '%')

    # 检查文件夹路径
    dir_path = images_save_path + ID + '/'
    file_tools.check_dir_and_mkdir(dir_path)

    # 画图
    X_label = []
    for i in range(predictions.shape[0]):
        X_label.append(i)
    plt.figure(figsize=(10, 3))
    plt.plot(X_label, predictions, 'r', label='预测结果')
    plt.plot(X_label, y_test, 'black', label='理想结果')
    plt.plot(X_label, x_test['10UV'], 'g--', label='ec')
    plt.title(ID + ' ' + season + ' ' + str(predict_day))
    plt.legend()
    plt.savefig(dir_path + ID + '_' + season + '_' + str(predict_day) + '.png')
    plt.show()