示例#1
0
def call_data():
    loading = instant_data()
    df, mode = loading.hourly_instant(), 'hour'

    df = df[start_p:stop_p]
    data = df
    data = data.interpolate(limit=300000000, limit_direction='both').astype(
        'float32'
    )  #interpolate neighbor first, for rest NA fill with mean() #.apply(lambda x: x.fillna(x.mean()),axis=0)
    data['Day'] = data.index.dayofyear
    # # MARS
    mars_cutoff = 0.3
    data_mar = call_mar(data, target, mode, cutoff=mars_cutoff)
    data_mar = move_column_inplace(data_mar, target, 0)
    return data_mar
示例#2
0
def datapreprocess():
    loading = instant_data()
    df,mode = loading.hourly_instant(),'hour'
    
    df = df[start_p:stop_p]
    data = df
    data = data.interpolate(limit=300000000,limit_direction='both').astype('float32')#interpolate neighbor first, for rest NA fill with mean() #.apply(lambda x: x.fillna(x.mean()),axis=0)
    data[target].plot()
    # # MARS
    mars_cutoff = 0.3
    data_mar = call_mar(data,target,mode,cutoff=mars_cutoff)
    data_mar = move_column_inplace(data_mar,target,0)
    # # SCALE
    # scaler = MinMaxScaler()
    # df_scaled = pd.DataFrame(scaler.fit_transform(data_mar), columns=data_mar.columns,index=data_mar.index)
    scaler_tar = MinMaxScaler()
    scaler_tar.fit(data[target].to_numpy().reshape(-1,1))
    return data_mar,scaler_tar
示例#3
0
    print('incorrect input')

st = 'CPY012'
target, start_p, stop_p, host_path = station_sel(st, mode)

start_p = '2016-01-01'
stop_p = '2017-01-01'
#-----------------------------
data = df[start_p:stop_p]
split_date = int(len(data) * .7)
data = data.interpolate(limit=3000000000,
                        limit_direction='both').astype('float32')
data['Day'] = data.index.dayofyear  #add day
#-----------------------------
cutoff = .3
data_mar = call_mar(data, target, mode, cutoff=cutoff)
data_mar = move_column_inplace(data_mar, target, 0)
n_features = len(data_mar.columns)
#----------------------------
out_t_step = 1

X, Y, _ = to_supervise(data_mar, target, out_t_step)
#trainX, testX = X[:split_date].dropna(),X[split_date:].dropna()
#trainY, testY = Y[:split_date].dropna(),Y[split_date:].dropna()
trainX, testX = X.iloc[:split_date].dropna(), X.iloc[split_date:].dropna()
trainY, testY = Y.iloc[:split_date].dropna(), Y.iloc[split_date:].dropna()
#--------------------------------------------#

scaler = StandardScaler()
trainX = scaler.fit_transform(trainX)
testX = scaler.transform(testX)