Exemplo n.º 1
0
def create_supervised_ds(window_size):
    path = config.selected_path
    input_file = config.input_file_ds

    # To pair with the other models, this model gets 1438 first rows.
    series = read_csv(path + input_file, header=0, sep=',', nrows=1438)
    series = series.iloc[::-1]
    avg = series['Avg']
    global avg_values
    avg_values = avg.values
    lag = 1

    # Stationary Data
    # Difference only works for one step. Please implement for other steps
    avg_diff_values = data_misc.difference(avg_values, lag)
    # Avg values converted into supervised model
    avg_supervised = data_misc.timeseries_to_supervised(
        avg_diff_values, window_size)
    # The first [Window size number] contains zeros which need to be cut.
    avg_supervised = avg_supervised.values[window_size:, :]
    supervised = avg_supervised

    print('Window Size:         %s' % str(window_size))
    print('Lag:                 %s' % str(lag))

    return supervised
Exemplo n.º 2
0
def supervised_diff_dt(df_data, window_size):
    processed_columns = []
    for column_name in df_data:
        values_column = df_data[column_name]
        diff_col = data_misc.difference(values_column, 1)
        supervised_col = data_misc.timeseries_to_supervised(diff_col, window_size)
        # We drop the last column from weight_supervised because it is not the target we want
        supervised_col = supervised_col.values[:, :-1]
        processed_columns.append(supervised_col)

    flag = True
    result = []
    for col in processed_columns:
        if flag:
            result = col
            flag = False
        else:
            result = np.concatenate((result, col), axis=1)

    return result
def compare(y_test, y_predicted):
    rmse = sqrt(mean_squared_error(y_test, y_predicted))
    return rmse


for x in range(12, 13):
    window_size = x  # 15
    print('Window Size: %i' % (x))
    series = read_csv('../data/airline-passengers.csv', header=0, sep='\t')
    date = series['Date']
    series = series.drop(['Date'], axis=1)
    date = date.iloc[window_size:]
    date = date.values

    raw_values = series.values
    raw_values = data_misc.timeseries_to_supervised(raw_values, window_size)
    # print(raw_values)
    raw_values = raw_values.values[window_size:, :]

    size_raw_values = len(raw_values)
    split = int(size_raw_values * 0.80)

    train, test = raw_values[0:split], raw_values[split:]
    # fit transform
    transformer = StandardScaler()
    transformer.fit(train)
    train = transformer.transform(train)
    test = transformer.transform(test)
    print(train)
    print(test)
series = read_csv(path + input_file, header=0, sep=',', nrows=1438)
series = series.iloc[::-1]

dfx = DataFrame()
for column in columns:
    dfx[column] = series[column]

date = series['Date']
avg = series['Avg']
date = date.iloc[window_size:]
date = date.values


avg_values = avg.values
avg_values = data_misc.timeseries_to_supervised(avg_values, window_size)
avg_values = avg_values.values[window_size:, :]
print('-----')
#raw_values = range(len(avg_values))
#raw_values = [i*0 for i in raw_values]

raw_values = avg_values

for column_name in columns:
    col = dfx[column_name]
    col_values = data_misc.timeseries_to_supervised(col, window_size)
    col_values = col_values.values[window_size:, :]

    # Concatenate with numpy
    raw_values = np.concatenate((col_values,raw_values), axis=1)
Exemplo n.º 5
0
        cut_beginning = [window_size_avg, window_size_btc]
    if not use_bitcoin_columns and use_trend_columns:
        cut_beginning = [window_size_avg, window_size_trend]

    total_window_size = max(cut_beginning)

    print('----------')
    print(combination)
    print('Model %s / %s' % (str(model_count), str(total_models)))

    # Stationary Data
    # Difference only works for one step. Please implement for other steps
    avg_diff_values = data_misc.difference(avg_values, lag)

    # Avg values converted into supervised model
    avg_supervised = data_misc.timeseries_to_supervised(
        avg_diff_values, window_size_avg)
    # avg_supervised = data_misc.timeseries_to_supervised(avg_values, window_size_avg)

    supervised = avg_supervised.values

    # we cut according to the biggest window size
    supervised = supervised[total_window_size:, :]

    # For the previous year we make it supervised and diff.
    avg_previous = supervised_diff_dt(df, window_size_avg)

    # we cut according to the biggest window size
    avg_previous = avg_previous[total_window_size:, :]
    # Concatenate with numpy
    supervised = np.concatenate((avg_previous, supervised), axis=1)
Exemplo n.º 6
0
# http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html#sklearn.linear_model.ElasticNet
from sklearn.linear_model import ElasticNet
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from math import sqrt
from Util import misc
from Util import data_misc

series = read_csv('../Thesis/Bitcoin_historical_data_processed_supervised2.csv', header=0, sep='\t')

# transform data to be stationary
raw_values = series['Avg'].values
diff_values = data_misc.difference(raw_values, 1)

# transform data to be supervised learning
supervised = data_misc.timeseries_to_supervised(diff_values, 1)
supervised_values = supervised.values

# split data into train and test-sets
train, test = supervised_values[0:-10], supervised_values[-10:]

# transform the scale of the data
scaler, train_scaled, test_scaled = data_misc.scale(train, test)

X_train, y_train = train_scaled[:, 0:-1], train_scaled[:, -1]
X_test, y_test = test_scaled[:, 0:-1], test_scaled[:, -1]

print(X_train.shape)
X_train = X_train.reshape((X_train.shape[0], 1))
print(X_train.shape)
print(X_test.shape)
Exemplo n.º 7
0
date = series['Date']
avg = series['Avg']

lag = 365
avg, avg_previous = data_misc.slide_data(avg.values, lag)
date, date_previous = data_misc.slide_data(series['Date'].values, lag)

df = DataFrame({'date_previous': date_previous,
                'avg_previous': avg_previous,
                'date': date,
                'avg': avg})

print(df.head(10))

# The data is made supervised
avg = data_misc.timeseries_to_supervised(avg, window_size)
avg_previous = data_misc.timeseries_to_supervised(avg_previous, window_size)

# The first [Window size number] contains zeros which need to be cut.
avg = avg.values[window_size:, :]
avg_previous = avg_previous.values[window_size:, :]

# series we no longer use series obj because contains columns that we don't need
raw_values = dfx.values[lag:]
# print(series)

# We cut the values which are zero
raw_values = raw_values[:-window_size, :]
date_previous = DataFrame(date_previous)
date_previous = date_previous.values[:-window_size]
Exemplo n.º 8
0
from Util import misc
from Util import data_misc
import numpy as np
import itertools
from model import values

avg_window_size = 4
weight_window_size = 2
avg = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
btc = [110, 120, 130, 140, 150, 160, 170, 180, 190, 200]
trend = [210, 220, 230, 240, 250, 260, 270, 280, 290, 300]

#btc=[[110,210], [120,220], [130,230], [140,240], [150,250], [160,260], [170,270], [180,280], [190,290], [200,300]]
print(btc)

avg_supervised = data_misc.timeseries_to_supervised(avg, avg_window_size)
btc_supervised = data_misc.timeseries_to_supervised(btc, weight_window_size)

# We drop the last column from weight_supervised because it is not the target we want
btc_supervised = btc_supervised.values[:, :-1]

# We pair the avg_supervised column with the weight_supervised
cut_beginning = [avg_window_size, weight_window_size]
cut_beginning = max(cut_beginning)

avg_supervised = avg_supervised.values[cut_beginning:, :]
btc_supervised = btc_supervised[cut_beginning:, :]

# Concatenate with numpy
supervised = np.concatenate((btc_supervised, avg_supervised), axis=1)
Exemplo n.º 9
0
    return result


avg_window_size = 3
btc_window_size = 4
avg = [10, 22, 30, 42, 50, 62, 70, 82, 90, 102]
btc = [110, 123, 130, 143, 150, 163, 170, 183, 190, 203]
trend = [212, 224, 232, 244, 252, 264, 272, 284, 292, 304]

df = DataFrame({'btc': btc,
                'trend': trend})
print(btc)

avg_diff = data_misc.difference(avg, 1)
avg_supervised = data_misc.timeseries_to_supervised(avg_diff, avg_window_size)
print(avg_supervised)

btc_supervised = supervised_diff_dt(df, btc_window_size)

# We pair the avg_supervised column with the weight_supervised
cut_beginning = [avg_window_size, btc_window_size]
cut_beginning = max(cut_beginning)

avg_supervised = avg_supervised.values[cut_beginning:, :]
btc_supervised = btc_supervised[cut_beginning:, :]

# Concatenate with numpy
supervised = np.concatenate((btc_supervised, avg_supervised), axis=1)

print(supervised)