def main(): df, cfg = load_data() """ fig = plt.figure() ax = fig.gca() ax.plot(df) plt.xlabel('Year') plt.ylabel('Number of air passengers (in 1000s)') plt.title('Air Passengers Data (1949-1960)') ax.set_xticks(df.index[::12]) ax.set_xticklabels(df.index[::12]) plt.show() """ if cfg['differencing']: df = df.diff(periods=1).dropna() df = df.diff(periods=12).dropna() run_multiple_neural_networks(df, cfg) #baseline_models(df, 0.95, cfg) #baseline_models(df, 0.80, cfg) #exponential_smoothing(df, cfg) #arima(df, cfg) print(cfg)
def main(): df, cfg = load_data() scaler = MinMaxScaler((0, 1)) df['#Passengers'] = scaler.fit_transform(df['#Passengers'].values.reshape(-1, 1)) evaluate_model(df['#Passengers'].values.reshape(-1, 1), cfg)
def main(): df, cfg = load_data(data_set='gunpoint') df_train = df.iloc[:-int(len(df) * cfg['test_size'])] df_test = df.iloc[-int(len(df) * cfg['test_size']):] """ scaler = MinMaxScaler(-1, 1) scaled_df = pd.DataFrame() for index, row in df.iterrows(): print(row.values.reshape(-1, 1)) scaled_df = scaler.fit_transform(row.values.reshape(-1, 1)) # df['y'] = np.log(df['y']) #scaled_df = scaler.fit_transform(df.values) print(scaled_df) """ predictions, mc_mean, total_uncertainty, average_mse = run_multiple_neural_networks( np.expand_dims(df_train.values, axis=-1), np.expand_dims(df_test.values, axis=-1), cfg) plot_predictions( df, mc_mean, average_mse, np.array([ mc_mean - 1.28 * total_uncertainty, mc_mean + 1.28 * total_uncertainty ]), np.array([ mc_mean - 1.96 * total_uncertainty, mc_mean + 1.96 * total_uncertainty ]), cfg) print(cfg)
def main(): df, cfg = load_data() print(cfg) passengers = Airpassengers(cfg) print(len(passengers.data)) print(len(passengers.train)) print(len(passengers.test)) passengers.plot_series()
def main(): df, cfg = load_data() data = Avocado(cfg) pipeline(data, cfg) print(cfg)
def main(): df, cfg = load_data() data = Airpassengers(cfg) pipeline(data, cfg) print(cfg)
def main(): df, cfg = load_data() avocado = Avocado(cfg) print(len(avocado.data)) avocado.plot_series('Albany', 'organic') x, y, f = avocado.get_train_sequence() # print(avocado.get_test_sequence()) x, y, f = avocado.get_holdout_sequence(['organic']) print(avocado.train['AveragePrice'].shape) print(avocado.train) print(avocado.test)
def main(): df, cfg = load_data() avocado = Avocado(cfg) print(len(avocado.data)) avocado.plot_series('Albany', 'organic') train_x, _ = avocado.get_x() train_y, _ = avocado.get_y() train_f, _ = avocado.get_features() x, f, y = avocado.get_train_sequence() print(train_x.shape, x.shape) # print(avocado.get_test_sequence()) x, f, y = avocado.get_holdout_sequence(['organic']) x2, f2, y2 = avocado.get_holdout() print(np.array_equal(x, x2)) print(np.array_equal(f, f2)) print(np.array_equal(y, y2)) print(avocado.train['AveragePrice'].shape)
def main(): df, cfg = load_data(data_set='avocado') df['Date'] = pd.to_datetime(df['Date']) df.set_index('Date', inplace=True) df = df.loc[:, ('AveragePrice', 'region', 'type')] df = df.pivot_table(index='Date', columns=['region', 'type'], aggfunc='mean') df = df.fillna(method='backfill').dropna() df.sort_index(inplace=True) #if cfg['differencing']: # df = df.diff(periods=1, axis=0).dropna() # df = df.diff(periods=12).dropna() run_multiple_neural_networks(df, cfg) #pipeline_baseline(df, cfg, model='es') #pipeline_baseline(df, cfg, model='arima') print(cfg)
def main(): # Load data df, cfg = load_data() print(df.shape) # Initialize lists coverage_80pi = np.zeros([len(df.columns), cfg['forecasting_horizon']]) coverage_95pi = np.zeros([len(df.columns), cfg['forecasting_horizon']]) i = 0 print(df) # Pre train autoencoder # encoder, scaler = pre_training(df, cfg) # Train over all time series in df for (columnName, columnData) in df.iteritems(): print('Column Name : ', columnName) # print('Column Contents : ', columnData.values) df_i = scaler.fit_transform(df[[columnName]].values) prediction_sequence, mc_mean, mc_median, total_uncertainty, quantile_80, quantile_95, test = walk_forward_validation( df_i, cfg) for j in range(cfg['forecasting_horizon']): coverage_80pi[i, j] = compute_coverage( upper_limits=mc_mean[:, j] + 1.28 * total_uncertainty[:, j], lower_limits=mc_mean[:, j] - 1.28 * total_uncertainty[:, j], actual_values=test) coverage_95pi[i, j] = compute_coverage( upper_limits=mc_mean[:, j] + 1.96 * total_uncertainty[:, j], lower_limits=mc_mean[:, j] - 1.96 * total_uncertainty[:, j], actual_values=test) # plot_predictions(df_i, mc_mean, [mc_mean - 1.28 * total_uncertainty, mc_mean + 1.28 * total_uncertainty], # [mc_mean - 1.96 * total_uncertainty, mc_mean + 1.96 * total_uncertainty]) i += 1 # Print coverage for each forecasting horizon for j in range(cfg['forecasting_horizon']): print('Mean intervals over', len(df.columns.values), 'data sets') print('80%-prediction interval coverage: ', j, np.mean(coverage_80pi[:, j])) print('95%-prediction interval coverage: ', j, np.mean(coverage_95pi[:, j]))
def main(): df, cfg = load_data() # if cfg['differencing']: # df = df.diff(periodes=1) print(df.shape) cfg['target_feature'] = 'AveragePrice' cols = ['AveragePrice', 'Total Volume', '4046', '4225', '4770', 'Total Bags', 'Small Bags', 'Large Bags', 'XLarge Bags'] data = Avocado(cfg) # data.data[data.data.columns.values] = data.data[data.data.columns.values].diff(periods=1, axis=0) # data.data = data.data.drop(data.data.index[0]) # data = Airpassengers(cfg) if cfg['autoencoder']: # encoder, cfg = pre_training(data=avocado_data, cfg=cfg) autoencoder = Autoencoder(data, cfg) autoencoder.train() autoencoder.test() pipeline(data, cfg, autoencoder) else: pipeline(data, cfg) print(cfg)
def main(): df, cfg = load_data() """ fig = plt.figure() ax = fig.gca() ax.plot(df) plt.xlabel('Year') plt.ylabel('Number of air passengers (in 1000s)') plt.title('Air Passengers Data (1949-1960)') ax.set_xticks(df.index[::12]) ax.set_xticklabels(df.index[::12]) plt.show() """ print(df) scaler = MinMaxScaler() # df['y'] = np.log(df['y']) df['y'] = scaler.fit_transform(df['y'].values.reshape(-1, 1)) if cfg['differencing']: df = df.diff(periods=1).dropna() df = df.diff(periods=12).dropna() predictions, mc_mean, total_uncertainty, average_mse = run_multiple_neural_networks( df, cfg) plot_predictions( df, mc_mean, average_mse, np.array([ mc_mean - 1.28 * total_uncertainty, mc_mean + 1.28 * total_uncertainty ]), np.array([ mc_mean - 1.96 * total_uncertainty, mc_mean + 1.96 * total_uncertainty ]), cfg) print(cfg)
import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler, MinMaxScaler from statsmodels.graphics.tsaplots import plot_acf, plot_pacf from statsmodels.tsa.stattools import acf, pacf from src.preparation.load_data import load_data from statsmodels.tsa.seasonal import seasonal_decompose df, cfg = load_data(data_set='AirPassengers') print(len(df)) plot_acf(df.values, lags=30) plot_pacf(df.values, lags=30) plt.show() result = seasonal_decompose(df.values, model='multiplicative', freq=12) result.plot() plt.show() fig = plt.figure() ax = fig.gca() ax.plot(df) plt.xlabel('Year') plt.ylabel('Number of air passengers (in 1000s)') plt.title('Air Passengers Data (1949-1960)') ax.set_xticks(df.index[::12]) ax.set_xticklabels(df.index[::12]) plt.show() scaler = MinMaxScaler()
import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler, MinMaxScaler from statsmodels.graphics.tsaplots import plot_acf, plot_pacf from statsmodels.tsa.seasonal import seasonal_decompose from src.preparation.load_data import load_data df, cfg = load_data(data_set='oslo_temperature') plot_acf(df.values, lags=30) plot_pacf(df.values, lags=30) plt.show() result = seasonal_decompose(df.values, model='add', freq=12) result.plot() plt.show() fig = plt.figure() ax = fig.gca() ax.plot(df) plt.xlabel('Date') plt.ylabel('Temperature') plt.title('Temperature of Oslo') ax.set_xticks(df.index[::10 * 12].date) ax.set_xticklabels(df.index[::10 * 12].date) plt.show() scaler = MinMaxScaler()
import numpy as np import seaborn as sb import pandas as pd import matplotlib.pyplot as plt from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import StandardScaler from statsmodels.tsa.seasonal import seasonal_decompose from statsmodels.graphics.tsaplots import plot_acf from src.preparation.load_data import load_data from src.dataclasses.Avocado import Avocado import matplotlib.dates as mdates df, cfg = load_data(data_set='avocado') """ data = df label = LabelEncoder() dicts = {} label.fit(data.type.drop_duplicates()) dicts['type'] = list(label.classes_) data.type = label.transform(data.type) cols = ['AveragePrice', 'type', 'year', 'Total Volume', 'Total Bags'] cm = np.corrcoef(data[cols].values.T) sb.set(font_scale=1.7) hm = sb.heatmap(cm, cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size': 15}, yticklabels=cols, xticklabels=cols) #plt.show() """ #print(df) #print(df['region'].unique())