Пример #1
0
    def fit(self, nsamples=5000):
        '''

        :return:
        '''
        myDLM = dlm(self.yln)
        myDLM = myDLM + trend(degree=1, discount=0.9, name='trend1')
        myDLM.fit()

        results = np.array(myDLM.result.predictedObs)[:, 0, 0]
        results_var = np.array(myDLM.result.predictedObsVar)[:, 0, 0]
        predicted, predicted_var = myDLM.predictN(self.forecast_length - 1,
                                                  myDLM.n - 1)

        ###INCOMPLETE HOW TO DEAL WITH UNCERTAINTIES!!!!
        coef = np.array(myDLM.getLatentState())
        cov = myDLM.result.smoothedCov
        self.myDLM = myDLM

        yln_all = np.append(results, predicted)
        yln_all_var = np.append(results_var, predicted_var)
        nall = len(yln_all)
        yln_models = np.random.randn(nall, nsamples) * \
                     np.tile(np.sqrt(yln_all_var), nsamples).reshape(nall,nsamples) + \
                     np.tile(yln_all, nsamples).reshape(nall, nsamples)
        y_models = np.exp(yln_models)
        self.y_proj = np.percentile(y_models, [25, 50, 75], axis=1).T
        self.cov = cov
        self.coef = coef
Пример #2
0
    def __init__(self, data=None, dlms=None):

        # if data is provided then this is going to be a
        # homogeneous multivariate dlm, i.e., all the underlying dlm have
        # the same structure.
        if data is None:
            self.dlmType = 'heterogeneity'
        elif len(data) == 0:
            raise NameError('data can not be empty. It can be None type ' +
                            '(indicating this is a heterogeneous mvdlm) ' +
                            'or it must be a list of list containing ' +
                            'multivariate data')
        else:
            self.dlmType = 'homogeneity'
            self._checkMultivariate(data)

        # create the dictionary to store all dlms
        self.dlms = {}

        # store the data length
        self.n = None

        # store the dimension
        self.d = None

        # store the order of the dlms by their names
        self.order = []

        # iteration number
        self.iteration = 5

        # initialization status
        self._initialized = False

        if self.dlmType == 'homogeneity':
            self.n = len(data)
            self.d = len(data[0])
            for i in range(len(data[0])):
                self.dlms[i] = dlm([num[i] for num in data])
                self.dlms[i].printSystemInfo(False)
                self.order.append(i)

        elif self.dlmType == 'heterogeneity' and dlms is not None:
            for name in dlms:
                if self.n is None:
                    self.n = dlms[name].n
                elif self.n != dlms[name].n:
                    raise NameError('The data length for some dlms' +
                                    ' are different.')
                self.dlms[name] = deepcopy(dlms[name])
                self.dlms[name].printSystemInfo(False)
                self.order.append(name)

            self.d = len(self.dlms)
Пример #3
0
    def forecazt(datos, predice, zeazon):
        qq = scipy.stats.norm.ppf(0.5 * (1+0.95))

        lysta = ""
        for i in range(len(datos)):
            lysta = lysta + ", " + str(datos[i])
        
            
        lysta = lysta[2:]
        lysta = "c(" + lysta + ")"

            
        ro.r('datin <- ' + lysta)
        ro.r("tdatin <- ts(datin, start = c(2012,1), frequency = " + str(zeazon) + ")")
        datos = ro.r("tdatin <- tsclean(tdatin)")
        
        #Esta es la tendencia
        n1 = datos
        m1 = dlm(n1) + trend(1, discount = 1, name = 'a') + seasonality(zeazon, discount = 1, name = 'b')
        m1.fit()

        
        cons = list(n1)
        opti = list(n1)
        pesi = list(n1)
        
        
        for i in range(predice):
            if i == 0:
                (p1Mean, p1Var) = m1.predict(date = m1.n-1)        
            else:
                (p1Mean, p1Var) = m1.continuePredict()
        
            mean1 = str(p1Mean[[0]])[3:]
            mean2 = np.float(mean1[:-2])
            
            cons.append(mean2)
        
            vari1 = str(np.sqrt(p1Var[[0]]))[3:]
            vari2 = np.float(vari1[:-2])
            
            opti.append(mean2 + qq * vari2)
            pesi.append(mean2 - qq * vari2)
        
        
            
        df = pd.DataFrame()
        df['optimista'] = opti
        df['conservador'] = cons
        df['pesimista'] = pesi
        
        return df
Пример #4
0
 def fit(self,
         y,
         period,
         x=None,
         metric="smape",
         val_size=None,
         verbose=False):
     """
     Build the model using best-tuned hyperparameter values.
     :param y: pd.Series or 1-D np.array, time series to predict.
     :param period: Int or Str, the number of observations per cycle: 1 or "annual" for yearly data, 4 or "quarterly"
     for quarterly data, 7 or "daily" for daily data, 12 or "monthly" for monthly data, 24 or "hourly" for hourly
     data, 52 or "weekly" for weekly data. First-letter abbreviations of strings work as well ("a", "q", "d", "m",
     "h" and "w", respectively). Additional reference: https://robjhyndman.com/hyndsight/seasonal-periods/.
     :param x: pd.DataFrame or 2-D np.array, exogeneous predictors, optional
     :param metric: Str, the metric used for model selection. One of "mse" (mean squared error), "mae" (mean absolute
     error).
     :param val_size: Int, the number of most recent observations to use as validation set for tuning.
     :param verbose: Boolean, True for printing additional info while tuning.
     :return: None
     """
     self.y = y
     self.name = "Bayesian Dynamic Linear Model"
     self.key = "bdlm"
     self._tune(y=y,
                period=period,
                x=x,
                metric=metric,
                val_size=val_size,
                verbose=verbose)
     self.model = pydlm.dlm(y)
     self.model = self.model + pydlm.trend(degree=self.params["trend"],
                                           discount=0.5)
     self.model = self.model + pydlm.seasonality(period=self.period,
                                                 discount=0.99)
     if self.params["ar"] is not None:
         self.model = self.model + pydlm.autoReg(degree=self.params["ar"],
                                                 discount=0.99)
     if x is not None:
         for variable_id, x_variable in enumerate(x.T):
             self.model = self.model + pydlm.dynamic(
                 features=[[v] for v in x_variable],
                 discount=0.99,
                 name=str(variable_id))
     with SuppressStdoutStderr():
         self.model.tune()
         self.model.fit()
Пример #5
0
def dlm_univariate_r3(y, s: dict, k: int, a=None, t=None, e=None, r=None):
    """ Univariate filter

            - Uses the discounting method of H/W so, doesn't need to be fit as often
            - Discount factors are periodically tuned
            - The hyper-parameter controls 'auto_degree', 'trend_degree',  'period'

        :returns: x, x_std, s
    """
    assert r is not None, 'Requires hyper-parameter (interpreted in dimension 3) '
    if not s:
        s = dict()
        s = dlm_set_univariate_params(s=s, r=r)
        s['dim'] = dimension(y)
        s['n_obs'] = 0
        s['model'] = dlm([], printInfo=False) + trend(
            s['trend_degree'], s['discount']) + seasonality(
                s['period'], s['discount'])
        s['model'] = s['model'] + fixedAutoReg(
            degree=s['auto_degree'], name='ar', w=1.0)

    if y is not None:
        s['n_obs'] += 1
        assert isinstance(y, float) or len(
            y) == s['dim'], ' Cannot change dimension of input in flight '
        y0, exog = split_exogenous(y=y)
        y0_passed_in = None if np.isnan(
            y0) else y0  # pydlm uses None for missing values
        s['model'].append([y0_passed_in])
        num_obs = len(s['model'].data) if s.get('model') else 0
        if num_obs % s['n_fit'] == s['n_fit'] - 1:
            # Perform periodic tuning of discount factors
            _, _, s = dlm_univariate_r3(y=None,
                                        s=s,
                                        k=k,
                                        a=a,
                                        t=t,
                                        e=1000,
                                        r=r)
        s['model'].fitForwardFilter()
        return _dlm_prediction_helper(s=s, k=k, y=y)

    if y is None and e > 60:
        s['model'].tune()  # Tunes discount factors
        s['model'].fit()
        return None, None, s
Пример #6
0
def SerBayes(sDay,nAhead,x0,hWeek):
    dta = sDay['y']
    dta.index = [pd.datetime.strptime(str(x)[0:10],'%Y-%m-%d') for x in dta.index]
    t_line = [float(calendar.timegm(x.utctimetuple()))/1000000 for x in dta.index]
    dta.index = t_line
    model = pydlm.dlm(dta)
    model = model + pydlm.trend(degree=1,discount=0.98,name='a',w=10.0)
    model = model + pydlm.dynamic(features=[[v] for v in t_line],discount=1,name='b',w=10.0)
    model = model + pydlm.autoReg(degree=3,data=dta.values,name='ar3',w=1.0)
    allStates = model.getLatentState(filterType='forwardFilter')
    model.evolveMode('independent')
    model.noisePrior(2.0)
    model.fit()
    model.plot()
    model.turnOff('predict')
    model.plotCoef(name='a')
    model.plotCoef(name='b')
    model.plotCoef(name='ar3')
Пример #7
0
# Dynamic Linear Models (DLM) with pydlm
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

import numpy as np
import matplotlib.pyplot as plt
import pydlm

# Simple example (random walk)
n = 100
a = 1.0 + np.random.normal(0, 5, n)  # the intercept
x = np.random.normal(0, 2, n)  # the control variable
b = 3.0  # the coefficient
y = a + b * x

dlm = pydlm.dlm(y)
dlm = dlm + pydlm.trend(degree=0, discount=0.98, name='a', w=10.0)
dlm = dlm + pydlm.dynamic(
    features=[[v] for v in x], discount=1, name='b', w=10.0)

# randomly generate data
data = [0] * 100 + [3] * 100

# creadte model
dlm = pydlm.dlm(data)

# add components
dlm = dlm + pydlm.trend(1, name='lineTrend', w=1.0)  # covariance=1
dlm = dlm + pydlm.seasonality(7, name='7day', w=1.0)
dlm = dlm + pydlm.autoReg(degree=3, data=data, name='ar3', w=1.0)
dlm.ls()
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from os import environ, path

from pocketsphinx.pocketsphinx import *
from sphinxbase.sphinxbase import *
import unittest


# A linear trend
linear_trend = trend(degree=1, discount=0.95, name='linear_trend', w=10)
# A seasonality
seasonal96 = seasonality(period=96, discount=0.99, name='seasonal52', w=10)
# Build a simple dlm
simple_dlm = dlm(Disk_Avg) + linear_trend + seasonal96
# Fit the model
simple_dlm.fit()
# Plot the fitted results
simple_dlm.turnOff('data points')
simple_dlm.plot()

# Plot each component (attribute the time series to each component)
simple_dlm.turnOff('predict plot')
simple_dlm.turnOff('filtered plot')
simple_dlm.plot('linear_trend')
simple_dlm.plot('seasonal96')

# Plot the prediction give the first 351 weeks and forcast the next 200 weeks.
simple_dlm.plotPredictN(date=35805, N=96)
# Plot the prediction give the first 251 weeks and forcast the next 200 weeks.
    def ts_fit(self, suppress=False):
        """Fit DLM to the time series data.

         Parameters:
         ----------
         suppress: bool
            Suppress or not some of the output messages
         """
        self._prepare_fit()
        self._model = None
        self.ts_split()

        ts_df = self._train_dt.copy()

        # Fit
        self._dlm_logger.info("Trying to fit the DLM model....")
        try:
            if not suppress:
                self._dlm_logger.info("...via using parameters\n")
                print_attributes(self)

            ts_df = ts_df.reset_index()
            ts_df.columns = self._ts_df_cols

            self._model = dlm(ts_df['y'])

            # trend
            if self._dlm_trend is not None:
                self._model = self._model + trend(
                    degree=self._dlm_trend['degree'],
                    discount=self._dlm_trend['discount'],
                    name=self._dlm_trend['name'],
                    w=self._dlm_trend['w'])
            # seasonality
            if self._dlm_seasonality is not None:
                self._model = self._model + seasonality(
                    period=self._dlm_seasonality['period'],
                    discount=self._dlm_seasonality['discount'],
                    name=self._dlm_seasonality['name'],
                    w=self._dlm_seasonality['w'])
            # dynamic
            if self._train_dlm_dynamic is not None:
                for i in range(len(self._train_dlm_dynamic['features'])):
                    self._model = self._model + dynamic(
                        features=self._train_dlm_dynamic['features'][i]
                        ['features'],
                        discount=self._train_dlm_dynamic['features'][i]
                        ['discount'],
                        name=self._train_dlm_dynamic['features'][i]['name'],
                        w=self._train_dlm_dynamic['features'][i]['w'])
            # auto_reg
            if self._dlm_auto_reg is not None:
                self._model = self._model + autoReg(
                    degree=self._dlm_auto_reg['degree'],
                    discount=self._dlm_auto_reg['discount'],
                    name=self._dlm_auto_reg['name'],
                    w=self._dlm_auto_reg['w'])
            # long_season
            if self._dlm_long_season is not None:
                ls = longSeason(period=self._dlm_long_season['period'],
                                stay=self._dlm_long_season['stay'],
                                data=ts_df,
                                name=self._dlm_long_season['name'],
                                w=self._dlm_long_season['w'])
                self._model = self._model + ls

            if not suppress:
                self._dlm_logger.info("The constructed DLM model components:")
                print(self._model.ls())

            # tic
            start = time()
            if self._use_rolling_window:
                self._model.fitForwardFilter(useRollingWindow=True,
                                             windowLength=self._window_size)
                self._model.fitBackwardSmoother()
            else:
                self._model.fit()
            self.model_fit = self._model
            # toc
            if not suppress:
                self._dlm_logger.info("Time elapsed: {} sec.".format(time() -
                                                                     start))
        except (Exception, ValueError) as e:
            self._dlm_logger.exception("DLM error...{}".format(e))
            return -1
        else:
            self._dlm_logger.info("Model successfully fitted to the data!")
            self._dlm_logger.info("Computing fitted values and residuals...")

            # Residuals
            self.residuals = pd.Series(self.model_fit.getResidual(),
                                       index=self._train_dt.index)
            try:
                self.lower_conf_int = pd.Series(
                    self.model_fit.getInterval()[1],
                    index=self._train_dt.index)
                self.upper_conf_int = pd.Series(
                    self.model_fit.getInterval()[0],
                    index=self._train_dt.index)
            except ValueError as e:
                self._dlm_logger.exception(
                    "Something went wrong in getInterval...{}".format(e))

            self.mse = self.model_fit.getMSE()

            # Fitted values
            # this is not elegant, but found no other way
            self.fittedvalues = self._train_dt['y'] + self.residuals

            return self
Пример #10
0
def monthly_pydlm_model(prod,
                        cus_no,
                        mat_no,
                        min_train_days=731,
                        test_points=1,
                        **kwargs):
    """

    :param prod: data
    :param cus_no: customer number
    :param mat_no: product number
    :param min_train_days: Min training data from where cross validation starts
    :param test_points: number of points ahead prediction(for the time max is 1): need to include
    :param kwargs: provide dir_name to save images and error excel
    :return: returns a data frame containing cross validation result
    """

    import pandas as pd
    import numpy as np
    import itertools
    import warnings
    import statsmodels.api as sm
    from fbprophet import Prophet
    from pydlm import dlm, trend, seasonality, dynamic, autoReg, longSeason, modelTuner
    from dateutil import parser
    import datetime as dt

    # data transform
    prod = prod.rename(columns={'dt_week': 'ds', 'quantity': 'y'})
    prod = prod[['ds', 'y']]
    prod.ds = prod.ds.apply(str).apply(parser.parse)
    prod.y = prod.y.apply(float)
    prod = prod.sort_values('ds')
    prod = prod.reset_index(drop=True)
    prod = prod.drop(prod.index[[0, len(prod.y) - 1]]).reset_index(drop=True)

    prod = get_monthly_aggregate_per_product(prod)
    # save plot (comment)
    if ('dir_name' in kwargs.keys()):
        dir_name = kwargs.get('dir_name')
        one_dim_save_plot(x=prod.ds,
                          y=prod.y,
                          xlable="Date",
                          ylable="quantity",
                          title="raw_weekly_aggregated_quantity",
                          dir_name=dir_name,
                          cus_no=cus_no,
                          mat_no=mat_no)

    # Remove outlier
    if ('dir_name' in kwargs.keys()):
        dir_name = kwargs.get('dir_name')
        prod = ma_replace_outlier(data=prod,
                                  n_pass=3,
                                  aggressive=True,
                                  window_size=6,
                                  sigma=2.5,
                                  dir_name=dir_name,
                                  mat_no=mat_no,
                                  cus_no=cus_no)
    else:
        prod = ma_replace_outlier(data=prod,
                                  n_pass=3,
                                  aggressive=True,
                                  window_size=6,
                                  sigma=2.5)

    # save plot (comment)
    if ('dir_name' in kwargs.keys()):
        dir_name = kwargs.get('dir_name')
        one_dim_save_plot(x=prod.ds,
                          y=prod.y,
                          xlable="Date",
                          ylable="quantity",
                          title="weekly_aggregated_quantity_outlier_replaced",
                          dir_name=dir_name,
                          cus_no=cus_no,
                          mat_no=mat_no)

    # test and train data creation
    # test and train data creation
    train = prod[prod.ds <= (np.amax(prod.ds) - pd.DateOffset(
        days=(np.amax(prod.ds) - np.amin(prod.ds)).days - min_train_days))]
    test = prod[(np.amax(np.array(train.index)) +
                 1):(np.amax(np.array(train.index)) + 1 + test_points)]
    rem_data = prod[(np.amax(np.array(train.index)) + test_points):]
    output_result = pd.DataFrame()
    output_error = pd.DataFrame(columns=[
        'cus_no', 'mat_no', 'rmse', 'mape', '3mre_med', '3mre_max', '4mre_med',
        '4mre_max', 'cum_error', 'cum_quantity', 'period_days'
    ])
    try:
        while (len(rem_data.ds) >= test_points):

            train_pydlm = train.set_index('ds', drop=True)
            # test_pydlm = test.set_index('ds', drop=True)

            # Modeling
            myDLM = dlm(train_pydlm.y)
            # add a first-order trend (linear trending) with prior covariance 1.0
            myDLM = myDLM + trend(degree=3, name='quadratic', w=1.0)
            # # add a 12 month seasonality with prior covariance 1.0
            myDLM = myDLM + seasonality(12, name='12month', w=0.0)
            # # add a 3 step auto regression
            myDLM = myDLM + autoReg(
                degree=3, data=train_pydlm.y, name='ar2', w=1.0)
            # # show the added components
            # myDLM.ls()

            # # fit forward filter
            # myDLM.fitForwardFilter()
            # # fit backward smoother
            # myDLM.fitBackwardSmoother()

            # myTuner = modelTuner(method='gradient_descent', loss='mse')
            # tunedDLM = myTuner.tune(myDLM, maxit=100)
            # tuned_discounts = myTuner.getDiscounts()
            # print(tuned_discounts)
            # tunedDLM.fit()
            myDLM.fit()
            # myDLM.tune()

            # myDLM.plot()

            # plot the results
            # if ('dir_name' in kwargs.keys()):
            #     dir_name = kwargs.get('dir_name')
            #     fig = plt.figure()
            #     myDLM.plot()
            #     # fig = plot.figure()
            #     plt.savefig(dir_name +str(cus_no)+"_"+str(mat_no)+ '_model_fit.png')
            #     plt.close(fig)
            # # plot only the filtered results
            # myDLM.turnOff('smoothed plot')
            # myDLM.plot()
            # # plot in one figure
            # myDLM.turnOff('multiple plots')
            # myDLM.plot()

            (predictMean, predictVar) = myDLM.predict(date=myDLM.n - 1)
            # (predictMean1, predictVar1) = myDLM.continuePredict()
            # print(predictMean.item((0,0)))
            # print(predictMean1.item((0,0)))
            # print(type(predictVar))

            result_test = test
            result_test['y_pydlm'] = np.array([predictMean.item((0, 0))])
            result_test.loc[(result_test['y_pydlm'] < 0), 'y_pydlm'] = 0

            print('Next Test Starts...')
            train = prod[:(np.amax(np.array(train.index)) + 1 + test_points)]
            test = prod[(np.amax(np.array(train.index)) +
                         1):(np.amax(np.array(train.index)) + 1 + test_points)]
            rem_data = prod[(np.amax(np.array(train.index)) + test_points):]

            output_result = pd.concat([output_result, result_test], axis=0)

        output_result = monthly_pydlm_model_error_calculator(output_result)

        output_error = pd.DataFrame(data=[[
            cus_no, mat_no,
            rmse_calculator(output_result.y_pydlm, output_result.y),
            mape_calculator(output_result.y_pydlm, output_result.y),
            np.nanmedian(output_result.rolling_3month_percent_error),
            np.nanmax(
                np.absolute(
                    np.array(output_result.rolling_3month_percent_error))),
            np.nanmedian(output_result.rolling_4month_percent_error),
            np.nanmax(
                np.absolute(
                    np.array(output_result.rolling_4month_percent_error))),
            output_result['Error_Cumsum'].iloc[-1],
            output_result['cumsum_quantity'].iloc[-1],
            ((np.amax(output_result.ds) - np.amin(output_result.ds)).days + 30)
        ]],
                                    columns=[
                                        'cus_no', 'mat_no', 'rmse', 'mape',
                                        '3mre_med', '3mre_max', '4mre_med',
                                        '4mre_max', 'cum_error',
                                        'cum_quantity', 'period_days'
                                    ])

        if ('dir_name' in kwargs.keys()):
            dir_name = kwargs.get('dir_name')
            try:
                # plot cumulative error
                two_dim_save_plot(x1=output_result.ds,
                                  y1=output_result.y_pydlm,
                                  y1_label='pydlm_pred',
                                  x2=output_result.ds,
                                  y2=output_result.y,
                                  y2_label='observed',
                                  xlable="Date",
                                  ylable="quantity",
                                  title="pydlm_prediction",
                                  dir_name=dir_name,
                                  cus_no=cus_no,
                                  mat_no=mat_no)
                # plot cumulative error
                one_dim_save_plot(x=output_result.ds,
                                  y=output_result.Error_Cumsum,
                                  xlable="Date",
                                  ylable="% Cumulative Error",
                                  title="cumulative_error",
                                  dir_name=dir_name,
                                  cus_no=cus_no,
                                  mat_no=mat_no)
                # plot cumulative error
                one_dim_save_plot(x=output_result.ds,
                                  y=output_result.rolling_3month_percent_error,
                                  xlable="Date",
                                  ylable="% 3 Month Rolling Error",
                                  title="3month_rolling_error",
                                  dir_name=dir_name,
                                  cus_no=cus_no,
                                  mat_no=mat_no)
            except ValueError:
                print("No points to plot")
    except np.linalg.linalg.LinAlgError:
        print("could not fit")
    return (output_error)
def run_pydlm_monthly(cus_no, mat_no, prod, param, **kwargs):
    import pandas as pd
    import numpy as np
    from dateutil import parser
    from fbprophet import Prophet
    from pydlm import dlm, trend, seasonality, dynamic, autoReg, longSeason, modelTuner

    if ('min_train_days' in kwargs.keys()):
        min_train_days = kwargs.get('min_train_days')
    else:
        min_train_days = p_model.min_train_days

    if ('test_points' in kwargs.keys()):
        test_points = kwargs.get('test_points')
    else:
        test_points = p_model.test_points_monthly

    if ('pred_points' in kwargs.keys()):
        pred_points = kwargs.get('pred_points')
    else:
        pred_points = p_model.pred_points_monthly

    # model parameters
    trend_degree = param.get('trend_degree')
    trend_w = param.get('trend_w')
    seasonality_w = param.get('seasonality_w')
    ar_degree = param.get('ar_degree')
    ar_w = param.get('ar_w')

    # data transform
    prod = prod.rename(columns={'dt_week': 'ds', 'quantity': 'y'})
    prod = prod[['ds', 'y']]
    prod.ds = prod.ds.apply(str).apply(parser.parse)
    prod.y = prod.y.apply(float)
    prod = prod.sort_values('ds')
    prod = prod.reset_index(drop=True)
    # prod = prod.drop(prod.index[[0, len(prod.y) - 1]]).reset_index(drop=True)

    # Aggregated monthly data
    prod = get_monthly_aggregate_per_product(prod)

    # Remove outlier
    prod = ma_replace_outlier(data=prod, n_pass=3, aggressive=True, window_size=6, sigma=2.5)
    # prod = prod.reset_index(drop= True)

    # test and train data creation
    train = prod[
        prod.ds <= (
            np.amax(prod.ds) - pd.DateOffset(days=(np.amax(prod.ds) - np.amin(prod.ds)).days - min_train_days))]
    test = prod[(np.amax(np.array(train.index)) + 1):(np.amax(np.array(train.index)) + 1 + test_points)]
    print(len(test))
    # rem_data = prod[(np.amax(np.array(train.index)) + test_points):]

    output_result = pd.DataFrame()

    while (len(test) > 0):
        train_pydlm = train.set_index('ds', drop=True)
        test_pydlm = test.set_index('ds', drop=True)

        # Modeling
        myDLM = dlm(train_pydlm.y)
        # add a first-order trend (linear trending) with prior covariance 1.0
        myDLM = myDLM + trend(degree=trend_degree, name='trend', w=trend_w)
        # # add a 12 month seasonality with prior covariance 1.0
        myDLM = myDLM + seasonality(12, name='12month', w= seasonality_w)
        # # add a 3 step auto regression
        myDLM = myDLM + autoReg(degree=ar_degree, data=train_pydlm.y, name='ar', w=ar_w)

        myDLM.fit()

        (predictMean, predictVar) = myDLM.predict(date=myDLM.n - 1)
        pred_test = np.array([round(predictMean.item((0, 0)),2)])
        for i in range(len(test_pydlm)-1):
            (predictMean_cont, predictVar_cont) = myDLM.continuePredict()
            pred_test = np.append(pred_test,round(predictMean_cont.item((0, 0)),2))

        print(pred_test)

        result_test = test
        print((result_test))
        result_test['y_pydlm'] = pred_test
        result_test.loc[(result_test['y_pydlm'] < 0), 'y_pydlm'] = 0

        train = prod[:(np.amax(np.array(train.index)) + 1 + test_points)]
        test = prod[(np.amax(np.array(train.index)) + 1):(np.amax(np.array(train.index)) + 1 + test_points)]
        # rem_data = prod[(np.amax(np.array(train.index)) + test_points):]

        output_result = pd.concat([output_result, result_test], axis=0)

    print(output_result.head())

    train_pydlm = prod.set_index('ds', drop=True)
    # test_pydlm = test.set_index('ds', drop=True)

    # Modeling
    myDLM = dlm(train_pydlm.y)
    # add a first-order trend (linear trending) with prior covariance 1.0
    myDLM = myDLM + trend(degree=trend_degree, name='trend', w=trend_w)
    # # add a 12 month seasonality with prior covariance 1.0
    myDLM = myDLM + seasonality(12, name='12month', w=seasonality_w)
    # # add a 3 step auto regression
    myDLM = myDLM + autoReg(degree=ar_degree, data=train_pydlm.y, name='ar', w=ar_w)

    myDLM.fit()
    print(trend_degree, trend_w, seasonality_w, ar_degree, ar_w)
    (predictMean, predictVar) = myDLM.predict(date=myDLM.n - 1)
    pred_test = np.array([round(predictMean.item((0, 0)), 2)])
    for i in range(test_points - 1):
        (predictMean, predictVar) = myDLM.continuePredict()
        pred_test = np.append(pred_test, round(predictMean.item((0, 0)), 2))

    print(pred_test)
Пример #12
0
    print(''.join(['  '] * level) + str(obj))
    for el in obj.refs:
        print_size(el, level=level + 1)


def compare_size(obj1, obj2, level=0):
    print(''.join(['  '] * level) + str(obj1))
    print(''.join(['  '] * level) + str(obj2))
    for el1, el2 in zip(obj1.refs, obj2.refs):
        compare_size(el1, el2, level=level + 1)


model1 = odlm([]) + trend(degree=2, discount=0.95, name='trend1')
model1.stableMode(False)

model2 = dlm([]) + trend(degree=2, discount=0.95, name='trend1')
model2.stableMode(False)

d1 = {}
d2 = {}
for idx, el in enumerate(ts):
    model1.append([el], component='main')
    model1.fitForwardFilter()

    model2.append([el], component='main')
    model2.fitForwardFilter()

    a1 = asizeof.asized(model1, detail=4)
    a2 = asizeof.asized(model2, detail=4)

    mean1, var1 = model1.predictN(N=1, date=model1.n - 1)
Пример #13
0
dlmPlot.plotData(range(len(time_series)),
                 time_series,
                 showDataPoint=False,
                 label='raw_data')
plt.legend(loc='best', shadow=True)
plt.show()

# Build a simple model
from pydlm import dlm, trend, seasonality

# A linear trend
linear_trend = trend(degree=1, discount=0.95, name='linear_trend', w=10)
# A seasonality
seasonal52 = seasonality(period=52, discount=0.99, name='seasonal52', w=10)

simple_dlm = dlm(time_series) + linear_trend + seasonal52
simple_dlm.fit()

# Plot the fitted results
simple_dlm.turnOff('data points')
simple_dlm.plot()
# Plot each component (attribution)
simple_dlm.turnOff('predict plot')
simple_dlm.turnOff('filtered plot')
simple_dlm.plot('linear_trend')
simple_dlm.plot('seasonal52')
# Plot the prediction give the first 350 weeks and forcast the next 200 weeks.
simple_dlm.plotPredictN(N=200, date=350)
# Plot the prediction give the first 250 weeks and forcast the next 200 weeks.
simple_dlm.plotPredictN(N=200, date=250)
Пример #14
0
dlmPlot.plotData(range(len(time_series)),
                 time_series,
                 showDataPoint=False,
                 label='raw_data')
plt.legend(loc='best', shadow=True)
plt.show()

# Build a simple model
from pydlm import dlm, trend, seasonality

# A linear trend
linear_trend = trend(degree=1, discount=0.95, name='linear_trend', w=10)
# A seasonality
seasonal52 = seasonality(period=52, discount=0.99, name='seasonal52', w=10)

simple_dlm = dlm(time_series) + linear_trend + seasonal52
simple_dlm.fit()

# Plot the fitted results
simple_dlm.turnOff('data points')
simple_dlm.plot()
# Plot each component (attribution)
simple_dlm.turnOff('predict plot')
simple_dlm.turnOff('filtered plot')
simple_dlm.plot('linear_trend')
simple_dlm.plot('seasonal52')
# Plot the prediction give the first 350 weeks and forcast the next 200 weeks.
simple_dlm.plotPredictN(N=200, date=350)
# Plot the prediction give the first 250 weeks and forcast the next 200 weeks.
simple_dlm.plotPredictN(N=200, date=250)
Пример #15
0
def estimate_and_predict_dlm_PR(calendar,
                                df_propor_PR_ts,
                                punched_df,
                                end_train_date,
                                start_test_date,
                                start_of_this_year,
                                enable_sales,
                                pred_weeks=8,
                                locality=10,
                                r=0.05,
                                missing_val=201735):
    '''
       accept the forecasting sales_proportion data as one regressor
       df_propor_PR_test: []
       return type: DataFrame with prediction result
       return: columns = [wm_yr_wk_nbr,club,yhat]
    
    '''
    res = pd.DataFrame()
    punched = punched_df.groupby(['club_nbr', 'posting_date'])['cost'].sum()
    punched.column = ['total_punched_wg']
    punched = punched.reset_index()
    punched = pd.merge(left=punched,
                       right=calendar,
                       how='left',
                       left_on='posting_date',
                       right_on='calendar_date').drop('calendar_date', axis=1)
    # mean wage among all clubs
    punched = removehurricane('cost', punched, 201733, 201739, sales=False)
    punched_mean = punched.groupby(['wm_yr_wk_nbr',
                                    'posting_date'])['cost'].mean()
    punched_mean = punched_mean.reset_index()
    punched_mean.columns = ['wm_yr_wk_nbr', 'posting_date', 'cost']
    punched_mean['club_nbr'] = pd.Series(np.ones([punched_mean.shape[0]]))
    ##########################
    if missing_val not in punched_mean.wm_yr_wk_nbr.tolist():
        punched_mean.loc[-1] = [
            missing_val,
            punched_mean.loc[punched_mean.wm_yr_wk_nbr == wm_nbr_add(
                missing_val, -2)].iloc[0, 1] + timedelta(days=14),
            0.5 * punched_mean.loc[punched_mean.wm_yr_wk_nbr == wm_nbr_add(
                missing_val, -2)].iloc[0, 2] +
            0.5 * punched_mean.loc[punched_mean.wm_yr_wk_nbr == wm_nbr_add(
                missing_val, 2)].iloc[0, 2], 1
        ]  # adding a row
        punched_mean.index = punched_mean.index + 1
    #########################
    punched_mean1 = punched_mean.copy(deep=True)
    punched_mean1['cost'] = 0.5 * punched_mean1['cost'] + 0.25 * punched_mean1[
        'cost'].shift(1) + 0.25 * punched_mean1['cost'].shift(2)
    ty = punched_mean1['cost'].mean()
    punched_mean1[['cost']] = punched_mean1[['cost']].fillna(value=ty)
    punched_mean1 = estimate_and_predict_prophet_PR(
        calendar,
        punched_mean1,
        end_train_date,
        start_test_date,
        daily_view=False,
        pred_days=120)  #predict the mean wages.
    punched_mean1 = punched_mean1.drop('club', axis=1)
    punched_mean1.columns = ['posting_date', 'PR_cost']
    punched_mean1 = pd.merge(left=punched_mean1,
                             right=calendar,
                             how='left',
                             left_on='posting_date',
                             right_on='calendar_date').drop('calendar_date',
                                                            axis=1)
    tmp = punched.groupby(['wm_yr_wk_nbr', 'posting_date'])['cost'].mean()
    tmp = tmp.reset_index()
    tmp.columns = ['wm_yr_wk_nbr', 'posting_date', 'PR_cost']
    tmp = tmp.loc[tmp.wm_yr_wk_nbr <= end_train_date]
    tmp['PR_cost'] = 0.5 * tmp['PR_cost'] + 0.25 * tmp['PR_cost'].shift(
        1) + 0.25 * tmp['PR_cost'].shift(2)
    ty = tmp['PR_cost'].mean()
    tmp[['PR_cost']] = tmp[['PR_cost']].fillna(value=ty)

    punched_mean = pd.concat([tmp, punched_mean1], axis=0)
    if missing_val not in punched_mean.wm_yr_wk_nbr.tolist():
        tu = [
            0.5 * punched_mean.loc[punched_mean.wm_yr_wk_nbr == wm_nbr_add(
                missing_val, -2)].iloc[0, 0] +
            0.5 * punched_mean.loc[punched_mean.wm_yr_wk_nbr == wm_nbr_add(
                missing_val, 2)].iloc[0, 0]
        ]
        tu.append(punched_mean.loc[punched_mean.wm_yr_wk_nbr == wm_nbr_add(
            missing_val, -2)].iloc[0, 1] + timedelta(days=14))
        tu.append(missing_val)
        punched_mean.loc[-1] = tu  # adding a row
        punched_mean.index = punched_mean.index + 1  # shifting index
    punched_mean = punched_mean.sort_values(
        by='wm_yr_wk_nbr').reset_index().drop('index', axis=1)
    punched = punched.drop('posting_date', axis=1)
    punched_pro = punched_df.groupby(['club_nbr',
                                      'posting_date'])['cost'].sum()
    punched_pro.column = ['total_punched_wg']
    punched_pro = punched_pro.reset_index()
    punched_pro = pd.merge(left=punched_pro,
                           right=calendar,
                           how='left',
                           left_on='posting_date',
                           right_on='calendar_date').drop('calendar_date',
                                                          axis=1)
    punched_pro = removehurricane('cost',
                                  punched_pro,
                                  201733,
                                  201739,
                                  sales=False)
    #201735 is Maria Hurrican Missing
    #201737 is the Irma Hurricane
    club_ls = punched.club_nbr.unique()
    for club in club_ls:
        pro_club = punched_pro[punched_pro.club_nbr.isin([club])]
        #########################################
        # adding missing value
        if missing_val not in pro_club.wm_yr_wk_nbr.tolist():
            pro_club.loc[-1] = [
                club, pro_club.loc[pro_club.wm_yr_wk_nbr == wm_nbr_add(
                    missing_val, -2)].iloc[0, 1] + timedelta(days=14),
                0.5 * pro_club.loc[pro_club.wm_yr_wk_nbr == wm_nbr_add(
                    missing_val, -2)].iloc[0, 2] +
                0.5 * pro_club.loc[pro_club.wm_yr_wk_nbr == wm_nbr_add(
                    missing_val, 2)].iloc[0, 2], missing_val
            ]  # adding a row
            pro_club.index = pro_club.index + 1  # shifting index
        ####################################################
        pro_club = pro_club.sort_values(by='posting_date').reset_index().drop(
            'index', axis=1)
        pro_sales = df_propor_PR_ts.loc[df_propor_PR_ts.club == club].drop(
            ['club'], axis=1)
        pro_club = pro_club.drop(['club_nbr', 'posting_date'], axis=1)
        pro_club.columns = ['cost', 'wm_yr_wk_nbr']
        pro_sales['total_sales'] = pro_sales['total_sales_across'] * pro_sales[
            'per_nbr_fc']
        pro_sales = pd.concat(
            [pro_sales] +
            [pro_sales.total_sales.shift(x) for x in range(1, 3)],
            axis=1)
        pro_sales.columns = [
            'wm_yr_wk_nbr', 'per_nbr_fc', 'total_sales_across',
            'total_sales_0', 'sr_1', 'sr_2'
        ]
        #########################################
        # adding missing value
        if missing_val not in pro_sales.wm_yr_wk_nbr.unique().tolist():
            tu = []
            for k in range(len(pro_sales.columns)):
                tu.append(
                    0.5 * pro_sales.loc[pro_sales.wm_yr_wk_nbr == wm_nbr_add(
                        missing_val, -2)].iloc[0, k] +
                    0.5 * pro_sales.loc[pro_sales.wm_yr_wk_nbr == wm_nbr_add(
                        missing_val, 2)].iloc[0, k])
            tu[0] = int(tu[0])
            pro_sales.loc[-1] = tu  # adding a row
            pro_sales.index = pro_sales.index + 1  # shifting index
        pro_sales = pro_sales.sort_values(
            by='wm_yr_wk_nbr').reset_index().drop('index', axis=1)
        pro_sales = pd.merge(left=pro_sales,
                             right=punched_mean,
                             how='right',
                             left_on='wm_yr_wk_nbr',
                             right_on='wm_yr_wk_nbr',
                             validate='1:1')
        pro_sales = pro_sales.drop(['posting_date'], axis=1)
        pro_sales = pro_sales.apply(lambda x: x.fillna(x.mean()), axis=0)
        pro_sales_train = pro_sales.loc[
            pro_sales.wm_yr_wk_nbr <= end_train_date]
        pro_sales_test = pro_sales.loc[
            pro_sales.wm_yr_wk_nbr >= start_test_date]
        # trend
        linear_trend = trend(degree=2, discount=0.98, name='linear_trend', w=8)
        # seasonality
        seasonal26 = seasonality(period=26,
                                 discount=1,
                                 name='seasonal26',
                                 w=12)
        # control variable
        sales0 = pro_sales_train['total_sales_0'].values.tolist()
        s0 = [[x] for x in sales0]
        sales1 = pro_sales_train['sr_1'].values.tolist()
        s1 = [[x] for x in sales1]
        sales2 = pro_sales_train['sr_2'].values.tolist()
        s2 = [[x] for x in sales2]
        macro = pro_sales_train['PR_cost'].values.tolist()
        m1 = [[x] for x in macro]
        #####################################
        s0 = dynamic(features=s0, discount=0.99, name='sales0', w=8)
        s1 = dynamic(features=s1, discount=0.99, name='sales1',
                     w=6)  # use the actual sales and forecasting sales amount
        s2 = dynamic(features=s2, discount=0.95, name='sales2', w=6)
        m1 = dynamic(features=m1, discount=0.99, name='macro', w=12)

        #e1 = dynamic(features=e1,discount=0.95,name='eff',w=6)
        drm = dlm(pro_club['cost']) + linear_trend + seasonal26 + autoReg(
            degree=locality, name='ar2', w=6) + m1  #+s0+s1+s2+m1
        drm.fit()
        #testset
        pro_sales_test = pro_sales_test.head(pred_weeks)
        sales0test = pro_sales_test['total_sales_0'].head(
            pred_weeks).values.tolist()
        s0test = [[x] for x in sales0test]
        sales1test = pro_sales_test['sr_1'].head(pred_weeks).values.tolist()
        s1test = [[x] for x in sales1test]
        sales2test = pro_sales_test['sr_2'].head(pred_weeks).values.tolist()
        s2test = [[x] for x in sales2test]
        macrotest = pro_sales_test['PR_cost'].head(pred_weeks).values.tolist()
        m1test = [[x] for x in macrotest]
        #efftest = testset['eff'].head(pred_weeks).values.tolist()
        #e1test = [[x] for x in efftest]
        features = {
            'sales0': s0test,
            'sales1': s1test,
            'sales2': s2test,
            'macro': m1test
        }  #,'eff':e1test}
        (predictMean, predictVar) = drm.predictN(N=pred_weeks,
                                                 date=drm.n - 1,
                                                 featureDict=features)
        #locality
        pro_sales = pro_sales.drop(['sr_1', 'sr_2'], axis=1)
        pro_sales['ratio'] = pro_sales['total_sales_0'] / pro_sales[
            'total_sales_across']
        pro_sales['ratio_1'] = pro_sales['ratio'].shift(1)
        pro_sales['ratio_2'] = pro_sales['ratio'].shift(2)
        trainset1_year = pro_club.loc[
            pro_club.wm_yr_wk_nbr <= end_train_date].loc[
                pro_club.wm_yr_wk_nbr >= end_train_date - locality]
        trainset_year = pro_sales.loc[
            pro_sales.wm_yr_wk_nbr <= end_train_date].loc[
                pro_sales.wm_yr_wk_nbr >= end_train_date - locality]
        trainset_year.apply(lambda x: x.fillna(x.mean()), axis=0)
        linear_trend_year = trend(degree=1,
                                  discount=0.99,
                                  name='linear_trend_year',
                                  w=10)
        sales0_year = trainset_year['ratio'].values.tolist()
        s0_year = [[x] for x in sales0_year]
        # use the forecast of the ratio of each club among total in PR area
        # since this is a local model, the total amount in area can be assumed to be constant.
        sales1_year = trainset_year['ratio_1'].values.tolist()
        s1_year = [[x] for x in sales1_year]
        sales2_year = trainset_year['ratio_2'].values.tolist()
        s2_year = [[x] for x in sales2_year]
        macro_year = trainset_year['PR_cost'].values.tolist()
        m1_year = [[x] for x in macro_year]
        #####################################
        s0_year = dynamic(features=s0_year,
                          discount=0.99,
                          name='sales0_year',
                          w=10)
        s1_year = dynamic(features=s1_year,
                          discount=0.99,
                          name='sales1_year',
                          w=8)
        s2_year = dynamic(features=s2_year,
                          discount=0.95,
                          name='sales2_year',
                          w=6)
        m1_year = dynamic(features=m1_year,
                          discount=0.99,
                          name='macro_year',
                          w=10)
        #e1_year = dynamic(features=e1_year,discount=0.95,name='eff_year',w=6)
        if enable_sales:
            drm_year = dlm(trainset1_year['cost']) + autoReg(
                degree=locality, name='ar2', w=5
            ) + linear_trend_year + m1_year + s0_year + s1_year + s2_year
        else:
            drm_year = dlm(trainset1_year['cost']) + autoReg(
                degree=locality, name='ar2',
                w=5) + linear_trend_year + m1_year  #+s0_year+s1_year+s2_year
        drm_year.fit()
        testset_year = pro_sales.loc[
            pro_sales.wm_yr_wk_nbr >= start_test_date].head(pred_weeks)
        sales0test = testset_year['ratio'].head(pred_weeks).values.tolist()
        s0test = [[x] for x in sales0test]
        sales1test = testset_year['ratio_1'].head(pred_weeks).values.tolist()
        s1test = [[x] for x in sales1test]
        sales2test = testset_year['ratio_2'].head(pred_weeks).values.tolist()
        s2test = [[x] for x in sales2test]
        features_year = {
            'sales0_year': s0test,
            'sales1_year': s1test,
            'sales2_year': s2test,
            'macro_year': m1test
        }
        (predictMean_year,
         predictVar_year) = drm_year.predictN(N=pred_weeks,
                                              date=drm_year.n - 1,
                                              featureDict=features_year)
        weeklist = []
        p1 = np.exp(-r * (abs(end_train_date - start_of_this_year - 52)))
        p2 = 1 - p1
        for k in range(pred_weeks):
            weeklist.append(wm_nbr_add(start_test_date, 2 * k))

        if res.shape[0] == 0:
            res['wm_yr_wk_nbr'] = weeklist
            res['club'] = pd.Series(club * np.ones(pred_weeks),
                                    index=res.index)
            res['yhat'] = pd.Series(p1 * np.asarray(predictMean) +
                                    p2 * np.asarray(predictMean_year),
                                    index=res.index)
        else:
            tmp = pd.DataFrame()
            tmp['wm_yr_wk_nbr'] = weeklist
            tmp['club'] = pd.Series(club * np.ones(pred_weeks),
                                    index=tmp.index)
            tmp['yhat'] = pd.Series(p1 * np.asarray(predictMean) +
                                    p2 * np.asarray(predictMean_year),
                                    index=tmp.index)
            res = pd.concat([res, tmp], axis=0)
    return res
Пример #16
0
import scipy.stats

series = pd.read_csv('daily-users.csv',
                     header=0,
                     parse_dates=[0],
                     index_col=0,
                     squeeze=True)

# Use just last 90 days
series = series.ix[-90:]

from pydlm import dlm, trend, seasonality

constant = trend(degree=0, name="constant")
seasonal_week = seasonality(period=7, name='seasonal_week')
model = dlm(series) + constant + seasonal_week
model.tune()
model.fit()

# Forecast one day
predictions, conf = model.predictN(N=1)
print("Prediction for next day: %.2f, confidence: %s" %
      (predictions[0], conf[0]))

while True:
    actual = float(input("Actual value? "))
    zscore = (actual - predictions[0]) / math.sqrt(conf[0])
    print("Z-score: %.2f" % zscore)
    pvalue = scipy.stats.norm.sf(abs(zscore)) * 2
    print("p-value: %.2f" % pvalue)
#from pandas_datareader import DataReader
from datetime import datetime
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
#import pyflux as pf

from pydlm import dlm, trend, seasonality, dynamic, autoReg, longSeason
data = np.array([0] * 100 + [3] * 100)
myDLM = dlm(data)
myDLM = myDLM + trend(degree=1, discount=0.95, name='trend1')
myDLM.fit()

coef = np.array(myDLM.getLatentState())
results = np.array(myDLM.result.predictedObs)[:,0,0]
results_var = np.array(myDLM.result.predictedObsVar )[:,0,0]


fig = plt.figure()
ax1 = fig.add_subplot(311)
ax1.plot(coef[:,0])
ax2 = fig.add_subplot(312)
ax2.plot(coef[:,1])
ax3 = fig.add_subplot(313)
ax3.plot(results)
ax3.plot(data,marker='o',ls='')
plt.savefig('scratch_result.pdf')



'''
Пример #18
0
data = dataset.load_excel(excel_file, dir="../../datasets")
data = dataset.load_all_regions(data)

df_italy = data["italy"] # Arrivals to Italy
df_greek_island = data["greek_island"] # Arrivals to Greek Island
df_mainland_greece = data["mainland_greece"] # Arrivals to Mainland greece
df_fyrom = data["fyrom"] # Arrivals to fYRoM
df_serbia = data["serbia"] # Arrivals to Serbia
df_croatia = data["croatia"] # Arrivals to Croatia
df_hungry = data["hungry"] # Arrivals to Hungry
df_slovenia = data["slovenia"] # Arrivals to Slovenia
df_austria = data["austria"] # Arrivals to Austria

df = df_austria # Seriies to test
column_name = df.columns[0]

fill_method = "ffill"
df.fillna(0, inplace=True)
df[df.columns[0]] = df[column_name].replace(to_replace=0, method=fill_method) # Replace 0 in series

model = dlm(df[column_name])
model = model + trend(degree=1, discount=0.72, name='trend component')
model = model + seasonality(period=2, discount=0.99, name='seasonality component')

model.fit()
model.plot()
predictions = list(np.array(model.result.predictedObs).flatten())
r2 = r2_score(df, predictions)
rmse = np.sqrt(model.getMSE())
print('RMSE:', rmse)
print('R2:', r2)
Пример #19
0
    observation_trajectories = [np.exp(particles)]
    for i in range(len(test_n_t_inf)):
        tmp = expected_value_transition_function(state_trajectories[i - 1])
        observation_trajectories.append(
            expected_value_observation_function(tmp))
        state_trajectories.append(tmp)

    state_trajectories = state_trajectories[1:]
    ## MEAN
    print(np.mean(observation_trajectories, axis=1))
    ## QUANTILES
    state_trajectories = np.array(state_trajectories).reshape(
        (len(test_n_t_inf), -1))

else:
    myDLM = dlm(train_n_t_inf)
    myDLM = myDLM + trend(1, name='lineTrend', w=1.0)
    # add a 7 day seasonality with prior covariance 1.0
    myDLM = myDLM + seasonality(52, name='7day', w=1.0)
    # add a 3 step auto regression
    myDLM = myDLM + autoReg(degree=2, data=train_n_t_inf, name='ar3', w=1.0)
    myDLM.fit()
    (predictMean, predictVar) = myDLM.predictN(N=D - 1, date=myDLM.n - 1)

for i in range(len(predictMean)):
    samples = np.random.normal(predictMean[i], np.sqrt(predictVar[i]), 4)
    state_trajectories.append(samples)
state_trajectories = np.array(state_trajectories)

phat = trace['a'].mean(axis=0)
from scipy.stats import binom
Пример #20
0
import pandas as pd
import matplotlib
matplotlib.use('Agg') # for saving figures
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

series = pd.read_csv('daily-users.csv', header=0, parse_dates=[0], index_col=0, squeeze=True)

# group daily counts into monthly
series = series.groupby(pd.Grouper(freq='M')).sum()

from pydlm import dlm, trend, seasonality, longSeason

constant = trend(degree=0, name="constant")
seasonal_month = seasonality(period=12, name='seasonal_month')
model = dlm(series.ix['2015-01-01':'2016-12-31']) + constant + seasonal_month

model.tune()
model.fit()

model.turnOff('data points')
model.turnOff('confidence interval')
model.plot()
plt.savefig('bayesian-monthly.png', dpi=300, bbox_inches='tight', pad_inches=0)
plt.close()

print(model.getMSE())

model.turnOff('predict plot')
model.turnOff('filtered plot')
model.plot('constant')
Пример #21
0
model.stableMode(False)

d = {}
for idx, el in enumerate(ts):
    print(el)
    model.append([el], component='main')
    model.fitForwardFilter()
    print()

mean, var = model.predictN(N=1, date=model.n - 1)
d[idx] = mean

df1 = pd.DataFrame.from_dict(d, orient="index")

## Version 2
model = dlm([]) + trend(degree=2, discount=0.95,
                        name='trend1') + seasonality(7)
model.stableMode(False)

d = {}
for idx, el in enumerate(ts):
    model.append([el], component='main')
    model.fitForwardFilter()

mean, var = model.predictN(N=1, date=model.n - 1)
d[idx] = mean

df2 = pd.DataFrame.from_dict(d, orient="index")

## Vemos los resultados
print(df1)
Пример #22
0
    def _tune(self,
              y,
              period,
              x=None,
              metric="smape",
              val_size=None,
              verbose=False):
        """
        Tune hyperparameters of the model.
        :param y: pd.Series or 1-D np.array, time series to predict.
        :param period: Int or Str, the number of observations per cycle: 1 or "annual" for yearly data, 4 or "quarterly"
        for quarterly data, 7 or "daily" for daily data, 12 or "monthly" for monthly data, 24 or "hourly" for hourly
        data, 52 or "weekly" for weekly data. First-letter abbreviations of strings work as well ("a", "q", "d", "m",
        "h" and "w", respectively). Additional reference: https://robjhyndman.com/hyndsight/seasonal-periods/.
        :param x: pd.DataFrame or 2-D np.array, exogeneous predictors, optional
        :param metric: Str, the metric used for model selection. One of "mse" (mean squared error), "mae" (mean absolute
        error).
        :param val_size: Int, the number of most recent observations to use as validation set for tuning.
        :param verbose: Boolean, True for printing additional info while tuning.
        :return: None
        """
        self.period = data_utils.period_to_int(period) if type(
            period) == str else period
        val_size = int(len(y) * .1) if val_size is None else val_size
        y_train, y_val = model_utils.train_val_split(y, val_size=val_size)
        if x is not None:
            x_train, x_val = model_utils.train_val_split(x, val_size=val_size)
        metric_fun = get_metric(metric)

        params_grid = {
            "trend": [0, 1, 2, 3],
            "ar": [None],
            # "ar": [None, 1, 2, 3],
        }
        params_keys, params_values = zip(*params_grid.items())
        params_permutations = [
            dict(zip(params_keys, v))
            for v in itertools.product(*params_values)
        ]

        scores = []
        for permutation in params_permutations:
            try:
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    model = pydlm.dlm(y_train)
                    model = model + pydlm.trend(degree=permutation["trend"],
                                                discount=0.5)
                    model = model + pydlm.seasonality(period=self.period,
                                                      discount=0.99)
                    if permutation["ar"] is not None:
                        model = model + pydlm.autoReg(degree=permutation["ar"],
                                                      discount=0.99)
                    if x is not None:
                        for variable_id, x_variable in enumerate(x_train.T):
                            model = model + pydlm.dynamic(
                                features=[[v] for v in x_variable],
                                discount=0.99,
                                name=str(variable_id))
                    with SuppressStdoutStderr():
                        model.tune()
                        model.fit()
                    if x is not None:
                        x_val_dict = {}
                        for variable_id, x_variable in enumerate(x_val.T):
                            x_val_dict.update(
                                {str(variable_id): [[v] for v in x_variable]})
                    else:
                        x_val_dict = None
                    y_pred = model.predictN(date=model.n - 1,
                                            N=len(y_val),
                                            featureDict=x_val_dict)[0]

                    score = metric_fun(y_val, y_pred)
                    scores.append(score)
            except:
                scores.append(np.inf)

        best_params = params_permutations[np.nanargmin(scores)]
        self.params.update(best_params)
        self.params["tuned"] = True