Exemplo n.º 1
0
 def test_next_batch_production(self):
     rec_ts = TimeSeries(self.input_data)
     X_feature_space, y_target = rec_ts.next_batch(batch_size=4, n_steps=10)
     self.assertEqual(len(X_feature_space), 4)
     self.assertEqual(len(X_feature_space[0]), 10)
     self.assertEqual(len(X_feature_space[0][0]), 2)
     self.assertEqual(X_feature_space[3][0][0], y_target[3][0][0])
Exemplo n.º 2
0
 def test_next_batch_covariates(self):
     """
     Feature space is supplied in input if target_only is False (no need to lag y dataset)
     """
     rec_ts = TimeSeries(self.input_data)
     X_feature_space, y_target = rec_ts.next_batch(batch_size=1, n_steps=10)
     self.assertEqual(len(X_feature_space), 1)
     self.assertEqual(len(X_feature_space[0][0]), 2)
Exemplo n.º 3
0
 def test_sample_ts(self):
     """
     When the length of the pandas df is longer than required length the function should sample
     from the time series and return that sample
     """
     rec_instance = TimeSeries(pandas_df=self.data_to_pad)
     results = rec_instance._sample_ts(pandas_df=self.data_to_pad,
                                       desired_len=3)
     self.assertEqual(results.shape[0], 3)
Exemplo n.º 4
0
    def test_padding_with_one_hot(self):
        rec_ts = TimeSeries(pandas_df=self.data_to_pad_with_categorical,
                            one_hot_root_list=["one_hot"])
        results = rec_ts._pad_ts(pandas_df=self.data_to_pad_with_categorical,
                                 desired_len=10)

        self.assertEqual(results.shape[0], 10)
        self.assertEqual(results.one_hot_yes.values[0], 1)
        self.assertEqual(results.one_hot_no.values[0], 0)
Exemplo n.º 5
0
 def test_next_batch_covariates_3(self):
     """
     Feature space is supplied in input if target_only is False (no need to lag y dataset)
     """
     rec_ts = TimeSeries(self.input_data)
     X_feature_space, y_target = rec_ts.next_batch(batch_size=2, n_steps=20)
     print('X_feature_space:', X_feature_space.shape, X_feature_space)
     print('y_target:', y_target.shape, y_target)
     self.assertEqual(len(X_feature_space), 2)
     self.assertEqual(len(X_feature_space[0][0]), 2)
Exemplo n.º 6
0
def _create_ts_object(df, dataset):
    if dataset == '56_sunspots':
        ds = TimeSeries(df, target_idx=4, timestamp_idx=1, index_col=0)
    elif dataset == 'LL1_736_population_spawn':
        ds = TimeSeries(df,
                        target_idx=2,
                        timestamp_idx=1,
                        index_col=0,
                        grouping_idx=3,
                        count_data=True)
    return ds
Exemplo n.º 7
0
 def test_zero_len_padding(self):
     rec_instance = TimeSeries(pandas_df=self.data_to_pad)
     results = rec_instance._pad_ts(
         pandas_df=self.data_to_pad,
         desired_len=6)  # len is the same as the original time series
     self.assertEqual(results.shape[0], 6)
Exemplo n.º 8
0
 def test_len_padding(self):
     rec_instance = TimeSeries(pandas_df=self.data_to_pad)
     results = rec_instance._pad_ts(pandas_df=self.data_to_pad,
                                    desired_len=10)
     self.assertEqual(results.shape[0], 10)
Exemplo n.º 9
0
from deepar.model.loss import gaussian_likelihood,gaussian_likelihood_2
import os


data = pd.read_csv(os.getcwd()+'\data\%s' % 'B007SIR08C-A23TNQB4GVF91M-ATVPDKIKX0DER-1.csv', header=None, names=['date','order','seller','marketplace'])
data['count'] = data['date'].apply(lambda x: int(x.split('-')[0])*10000+int(x.split('-')[1])*100+int(x.split('-')[2]))
data['promotion'] = 0
data.loc[data['order'] > 150, 'promotion'] = 1
order_max = data['order'].max()
order_min = data['order'].min()
data[['order','count']] = data[['order','count']].apply(lambda x : (x-np.min(x))/(np.max(x)-np.min(x)))
data['date'] = pd.to_datetime(data['date'])
data.set_index('date', inplace=True)
data.drop(columns=['seller','marketplace'], inplace=True)

ts = TimeSeries(data.head(500))

# ts = MockTs()
dp_model = DeepAR(ts, epochs=100)
dp_model.init()
dp_model.model.load_weights('1.h5', by_name=True)
# dp_model.more_fit()


def sigmoid(x):
    y = 1/(1+np.exp(-1*x))
    return y


def tanh(x):
    y = 2*sigmoid(2*x)-1
Exemplo n.º 10
0
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import os

file_list = os.listdir('data')

for file_path in file_list:
    data = pd.read_csv(os.getcwd() + '\data\%s' % file_path,
                       header=None,
                       names=['date', 'order', 'seller', 'marketplace'])
    data['date'] = pd.to_datetime(data['date'])
    data.set_index('date', inplace=True)
    data.drop(columns=['seller', 'marketplace'], inplace=True)

    ts = TimeSeries(data.head(265))

    # ts = MockTs()
    dp_model = DeepAR(ts, epochs=150)
    dp_model.instantiate_and_fit()

    def get_sample_prediction(sample, fn):
        sample = np.array(sample).reshape(1, 30, 1)
        output = fn([sample])
        samples = []
        # return output[0].reshape(1)
        for mu, sigma in zip(output[0].reshape(1), output[1].reshape(1)):
            samples.append(normal(loc=mu, scale=np.sqrt(sigma), size=1)[0])
        return np.array(samples)

    # predict_data = ts.next_batch(1, 50)[0]
Exemplo n.º 11
0
source_df = pd.DataFrame({'feature_1': air[:-1], 'target': air[1:]})
source_df['category'] = ['1' for i in range(source_df.shape[0])]



hrv = pd.read_csv("RR_train.csv")

dataset_df = pd.DataFrame()




from deepar.dataset.time_series import TimeSeries
from deepar.model.lstm import DeepAR
from sklearn.preprocessing import MinMaxScaler
ts = TimeSeries(source_df, scaler=MinMaxScaler)
dp_model = DeepAR(ts, epochs=100)
dp_model.instantiate_and_fit()





%matplotlib inline
from numpy.random import normal
import tqdm
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
batch = ts.next_batch(1, 20)
def get_sample_prediction(sample, prediction_fn):