def test_next_batch_production(self): rec_ts = TimeSeries(self.input_data) X_feature_space, y_target = rec_ts.next_batch(batch_size=4, n_steps=10) self.assertEqual(len(X_feature_space), 4) self.assertEqual(len(X_feature_space[0]), 10) self.assertEqual(len(X_feature_space[0][0]), 2) self.assertEqual(X_feature_space[3][0][0], y_target[3][0][0])
def test_next_batch_covariates(self): """ Feature space is supplied in input if target_only is False (no need to lag y dataset) """ rec_ts = TimeSeries(self.input_data) X_feature_space, y_target = rec_ts.next_batch(batch_size=1, n_steps=10) self.assertEqual(len(X_feature_space), 1) self.assertEqual(len(X_feature_space[0][0]), 2)
def test_sample_ts(self): """ When the length of the pandas df is longer than required length the function should sample from the time series and return that sample """ rec_instance = TimeSeries(pandas_df=self.data_to_pad) results = rec_instance._sample_ts(pandas_df=self.data_to_pad, desired_len=3) self.assertEqual(results.shape[0], 3)
def test_padding_with_one_hot(self): rec_ts = TimeSeries(pandas_df=self.data_to_pad_with_categorical, one_hot_root_list=["one_hot"]) results = rec_ts._pad_ts(pandas_df=self.data_to_pad_with_categorical, desired_len=10) self.assertEqual(results.shape[0], 10) self.assertEqual(results.one_hot_yes.values[0], 1) self.assertEqual(results.one_hot_no.values[0], 0)
def test_next_batch_covariates_3(self): """ Feature space is supplied in input if target_only is False (no need to lag y dataset) """ rec_ts = TimeSeries(self.input_data) X_feature_space, y_target = rec_ts.next_batch(batch_size=2, n_steps=20) print('X_feature_space:', X_feature_space.shape, X_feature_space) print('y_target:', y_target.shape, y_target) self.assertEqual(len(X_feature_space), 2) self.assertEqual(len(X_feature_space[0][0]), 2)
def _create_ts_object(df, dataset): if dataset == '56_sunspots': ds = TimeSeries(df, target_idx=4, timestamp_idx=1, index_col=0) elif dataset == 'LL1_736_population_spawn': ds = TimeSeries(df, target_idx=2, timestamp_idx=1, index_col=0, grouping_idx=3, count_data=True) return ds
def test_zero_len_padding(self): rec_instance = TimeSeries(pandas_df=self.data_to_pad) results = rec_instance._pad_ts( pandas_df=self.data_to_pad, desired_len=6) # len is the same as the original time series self.assertEqual(results.shape[0], 6)
def test_len_padding(self): rec_instance = TimeSeries(pandas_df=self.data_to_pad) results = rec_instance._pad_ts(pandas_df=self.data_to_pad, desired_len=10) self.assertEqual(results.shape[0], 10)
from deepar.model.loss import gaussian_likelihood,gaussian_likelihood_2 import os data = pd.read_csv(os.getcwd()+'\data\%s' % 'B007SIR08C-A23TNQB4GVF91M-ATVPDKIKX0DER-1.csv', header=None, names=['date','order','seller','marketplace']) data['count'] = data['date'].apply(lambda x: int(x.split('-')[0])*10000+int(x.split('-')[1])*100+int(x.split('-')[2])) data['promotion'] = 0 data.loc[data['order'] > 150, 'promotion'] = 1 order_max = data['order'].max() order_min = data['order'].min() data[['order','count']] = data[['order','count']].apply(lambda x : (x-np.min(x))/(np.max(x)-np.min(x))) data['date'] = pd.to_datetime(data['date']) data.set_index('date', inplace=True) data.drop(columns=['seller','marketplace'], inplace=True) ts = TimeSeries(data.head(500)) # ts = MockTs() dp_model = DeepAR(ts, epochs=100) dp_model.init() dp_model.model.load_weights('1.h5', by_name=True) # dp_model.more_fit() def sigmoid(x): y = 1/(1+np.exp(-1*x)) return y def tanh(x): y = 2*sigmoid(2*x)-1
import pandas as pd import numpy as np from matplotlib import pyplot as plt import os file_list = os.listdir('data') for file_path in file_list: data = pd.read_csv(os.getcwd() + '\data\%s' % file_path, header=None, names=['date', 'order', 'seller', 'marketplace']) data['date'] = pd.to_datetime(data['date']) data.set_index('date', inplace=True) data.drop(columns=['seller', 'marketplace'], inplace=True) ts = TimeSeries(data.head(265)) # ts = MockTs() dp_model = DeepAR(ts, epochs=150) dp_model.instantiate_and_fit() def get_sample_prediction(sample, fn): sample = np.array(sample).reshape(1, 30, 1) output = fn([sample]) samples = [] # return output[0].reshape(1) for mu, sigma in zip(output[0].reshape(1), output[1].reshape(1)): samples.append(normal(loc=mu, scale=np.sqrt(sigma), size=1)[0]) return np.array(samples) # predict_data = ts.next_batch(1, 50)[0]
source_df = pd.DataFrame({'feature_1': air[:-1], 'target': air[1:]}) source_df['category'] = ['1' for i in range(source_df.shape[0])] hrv = pd.read_csv("RR_train.csv") dataset_df = pd.DataFrame() from deepar.dataset.time_series import TimeSeries from deepar.model.lstm import DeepAR from sklearn.preprocessing import MinMaxScaler ts = TimeSeries(source_df, scaler=MinMaxScaler) dp_model = DeepAR(ts, epochs=100) dp_model.instantiate_and_fit() %matplotlib inline from numpy.random import normal import tqdm import pandas as pd from matplotlib import pyplot as plt import numpy as np batch = ts.next_batch(1, 20) def get_sample_prediction(sample, prediction_fn):