Пример #1
0
def combshd(train, horizon, seasonality, init):
    '''Author: Sandeep Pawar
    Date: 8/30/2020
    version: 1.1'''

    train_x, lam = boxcox(train)
    ses = sm.tsa.statespace.ExponentialSmoothing(train_x,
                                                 trend=True,
                                                 seasonal=None,
                                                 initialization_method=init,
                                                 damped_trend=False).fit()

    fc1 = inv_boxcox(ses.forecast(horizon), lam)

    holt = sm.tsa.statespace.ExponentialSmoothing(train_x,
                                                  trend=True,
                                                  seasonal=seasonality,
                                                  initialization_method=init,
                                                  damped_trend=False).fit()

    fc2 = inv_boxcox(holt.forecast(horizon), lam)

    damp = sm.tsa.statespace.ExponentialSmoothing(train_x,
                                                  trend=True,
                                                  seasonal=seasonality,
                                                  initialization_method=init,
                                                  damped_trend=True).fit()

    fc3 = inv_boxcox(damp.forecast(horizon), lam)

    fc = (fc1 + fc2 + fc3) / 3
    return fc
Пример #2
0
def pellet_forecast(train, forecast_horizon):

    train_x, lam = boxcox(train)
    ses = sm.tsa.statespace.ExponentialSmoothing(
        train_x,
        trend=True,
        seasonal=None,
        initialization_method='estimated',
        damped_trend=False).fit()

    fc1 = inv_boxcox(ses.forecast(forecast_horizon), lam)

    holt = sm.tsa.statespace.ExponentialSmoothing(
        train_x,
        trend=True,
        seasonal=12,
        initialization_method='estimated',
        damped_trend=False).fit()

    fc2 = inv_boxcox(holt.forecast(forecast_horizon), lam)

    damp = sm.tsa.statespace.ExponentialSmoothing(
        train_x,
        trend=True,
        seasonal=12,
        initialization_method='estimated',
        damped_trend=True).fit()

    fc3 = inv_boxcox(damp.forecast(forecast_horizon), lam)

    fc = (fc1 + fc2 + fc3) / 3
    return fc
Пример #3
0
def build_model(train, test, bc_lambda):
    # Now lets see how well we can predict the boxcox transform of property_crime

    formula = "property_crime_bc ~ " + ' + '.join(model_vars)

    lm1 = smf.ols(formula=formula, data=train).fit()
    print(lm1.summary())

    pred_train = inv_boxcox(lm1.fittedvalues, bc_lambda)
    pred_test = inv_boxcox(lm1.predict(test), bc_lambda)

    resids_train = train["property_crime"] - pred_train
    resids_test = test["property_crime"] - pred_test
    #print("RMSE: {:.4f}".format(resids_train.std()))

    #print("------\nTrain\n------")


    r2_test_bc = r2_score(test["property_crime_bc"], lm1.predict(test))
    r2_test = r2_score(test["property_crime"], pred_test)
    print("R-squared (Property Crime Box-Cox): {}".format(r2_test_bc))
    print("R-squared (Property Crime): {}".format(r2_test))
    #print("------\nTest\n------")

    build_evaluate_model(train, test, bc_lambda, model_vars)




    return lm1, pred_train, pred_test, resids_train, resids_test
Пример #4
0
def train_arima(dataframe, p, d, q, test, BOXCOX):
    print(dataframe)
    if (BOXCOX == "Yes"):
        dataframe['closing_balance'], lam = boxcox(
            dataframe['closing_balance'])
    else:
        lam = None
    model = ARIMA(dataframe, order=(p, d, q))
    results_ARIMA = model.fit(disp=-1)
    ARIMA_predict = pd.DataFrame(results_ARIMA.predict(start=test.index[0],
                                                       end=test.index[-1]),
                                 index=test.index)
    print(results_ARIMA.fittedvalues)
    if (BOXCOX == "Yes"):
        results_ARIMA.fittedvalues = inv_boxcox(results_ARIMA.fittedvalues,
                                                lam)
        ARIMA_predict = inv_boxcox(ARIMA_predict, lam)
        ARIMA_predict.columns = ['y']
        response = {}
        response['model'] = results_ARIMA
        response['predictions'] = ARIMA_predict
        response['lam'] = lam
        return response
    else:
        ARIMA_predict.columns = ['y']
        response = {}
        response['model'] = results_ARIMA
        response['predictions'] = ARIMA_predict
        response['lam'] = lam
        return response
Пример #5
0
 def inverse_transform(self, X):
     xtemp = X*self.xstd + self.xmean
     if isinstance(self.lmbda, float):
         xinv = inv_boxcox(xtemp, self.lmbda)
     else:
         xinv = numpy.zeros(shape=X.shape)
         for j, lmb in enumerate(self.lmbda):
             xinv[:, j] = inv_boxcox(xtemp[:, j], lmb)
     return xinv #- self.shift 
Пример #6
0
 def inverse_transform(self, X):
     xtemp = X * self.xstd + self.xmean
     if isinstance(self.lmbda, float):
         xinv = inv_boxcox(xtemp, self.lmbda)
     else:
         xinv = numpy.zeros(shape=X.shape)
         for j, lmb in enumerate(self.lmbda):
             xinv[:, j] = inv_boxcox(xtemp[:, j], lmb)
     return xinv  # - self.shift
 def gbm_error_func(y_pred, y_true):
     if target == 'log_y':
         y_pred = np.exp(y_pred)
         y_true = np.exp(y_true)
     if target == 'boxcox_y':
         y_pred = inv_boxcox(y_pred, lba_boxcox)
         y_true = inv_boxcox(y_true, lba_boxcox)
     eval_result = error_func(y_pred, y_true)
     return 'error', eval_result, False
Пример #8
0
 def renormalize(self):
     if self.lambda_boxcox == float(-999):
         self.predict_mean = self.predict[0]
         self.predict_down = self.predict[1][:, 0]
         self.predict_up = self.predict[1][:, 1]
     else:
         self.predict_mean = inv_boxcox(self.predict[0], self.lambda_boxcox)
         self.predict_down = inv_boxcox(self.predict[1][:, 0],
                                        self.lambda_boxcox)
         self.predict_up = inv_boxcox(self.predict[1][:, 1],
                                      self.lambda_boxcox)
     # Join predict dates with values into a dataframe
     df_final = pd.DataFrame(self.predict_mean, self.dates_prev)
     df_final.columns = ['endo_value']
     return (df_final)
Пример #9
0
 def predict(self, dataset):
     """Predict from X"""
     X = dataset[self.columns]
     Xscaled = self.scaler.transform(X)
     Xpca = self.pca.transform(Xscaled)
     predsbc = self.estimator.predict(Xpca)
     return inv_boxcox(predsbc, self.lmbda)
Пример #10
0
def transform_test(df, column):

    initial_mean = df[column].mean()
    initial_std = df[column].std()

    # 1. Boxcox transform
    boxcox_transformed, max_log = boxcox(df[column].values)

    # 2. Standardize
    mean_value = boxcox_transformed.mean()
    std_value = boxcox_transformed.std()
    standardized = (boxcox_transformed - mean_value) / std_value

    # 3. Inverse standardize
    inverse_standardized = (standardized * std_value) + mean_value

    # 4. Inverse Boxcox transform
    inversed_boxcox = inv_boxcox(inverse_standardized, max_log)
    # inversed_boxcox = (inversed_boxcox + initial_mean) * initial_std

    print('Test Results:')
    print(f'max_log = {max_log}')
    print(f' * Intial Array: mean = {df[column].values.mean()}, std = {df[column].values.std()}')
    print(f' * Boxcox Transformed: mean = {boxcox_transformed.mean()}, std = {boxcox_transformed.std()}')
    print(f' * Standardized: mean = {standardized.mean()}, std = {standardized.std()}')
    print(f' * Inverse Standardized: mean = {inverse_standardized.mean()}, std = {inverse_standardized.std()}')
    print(f' * Inverse Boxcox: mean = {inversed_boxcox.mean()}, std = {inversed_boxcox.std()}')
Пример #11
0
def train_theta_boxcox(ts, seasonality, n):
    theta_bc = Theta(theta=0, season_mode=SeasonalityMode.NONE)
    shiftdata = 0
    if (ts.univariate_values() < 0).any():
        shiftdata = -ts.min() + 100
        ts = ts + shiftdata
    new_values, lmbd = boxcox(ts.univariate_values())
    if lmbd < 0:
        lmbds, value = boxcox_normplot(ts.univariate_values(),
                                       lmbd - 1,
                                       0,
                                       N=100)
        if np.isclose(value[0], 0):
            lmbd = lmbds[np.argmax(value)]
            new_values = boxcox(ts.univariate_values(), lmbd)
        if np.isclose(new_values, new_values[0]).all():
            lmbd = 0
            new_values = boxcox(ts.univariate_values(), lmbd)
    ts = TimeSeries.from_times_and_values(ts.time_index(), new_values)
    theta_bc.fit(ts)
    forecast = theta_bc.predict(n)

    new_values = inv_boxcox(forecast.univariate_values(), lmbd)
    forecast = TimeSeries.from_times_and_values(seasonality.time_index(),
                                                new_values)
    if shiftdata > 0:
        forecast = forecast - shiftdata
    forecast = forecast * seasonality
    if (forecast.univariate_values() < 0).any():
        indices = seasonality.time_index()[forecast < 0]
        forecast = forecast.update(indices,
                                   np.zeros(len(indices)),
                                   inplace=True)
    return forecast
Пример #12
0
def model_predict(model, X):
    y = model['model'].predict(X)
    if 'transform' in model:
        y_lambda = model['transform'][1]
        y = inv_boxcox(y, y_lambda) - model['transform'][2]

    return y
Пример #13
0
def briquette_forecast(series_with_dates, forecast_horizon):

    #ETS
    series_with_dates.index.freq = 'MS'

    train = series_with_dates.values
    train_x, lam = boxcox(series_with_dates)

    comb_fc = combshd(train,
                      horizon=forecast_horizon,
                      seasonality=12,
                      init='concentrated')

    # SARIMA
    sarima = (SARIMAX(endog=train_x,
                      order=(1, 1, 1),
                      seasonal_order=(1, 0, 1, 12),
                      trend='c',
                      enforce_invertibility=False)).fit()
    start = len(train_x)
    end = len(train_x) + forecast_horizon - 1

    sarima_fc = inv_boxcox(sarima.predict(start, end, dynamic=False), lam)

    briquette_fc = pd.Series((comb_fc + sarima_fc) / 2)

    return briquette_fc.values
Пример #14
0
    def predict(self, metrics, **kwargs):
        """
        This function generate and returns prediction of the duration of a
        CSV recreate job. The user should only pass to this
        function a snapshot of the metrics for the cluster on which he is
        trying to execute the job and the input size information.
        :param metrics:
        :return int: Duration prediction in seconds
        """
        # Feature selection
        features = np.array([
            float(metrics.AvailableMB),  # YARN_AVAILABLE_MEMORY
            float(metrics.AvailableVCores),  # YARN_AVAILABLE_VIRTUAL_CORES
        ])
        data = xgboost.DMatrix(features.reshape(1, -1),
                               feature_names=[
                                   'YARN_AVAILABLE_MEMORY',
                                   'YARN_AVAILABLE_VIRTUAL_CORES'
                               ])

        # Generate predictions
        prediction = self._model.predict(data)

        # Apply inverse Boxcox function on generate prediction
        return inv_boxcox(prediction, self._config['boxcoxMaxLog'])
Пример #15
0
 def predict(self, steps):
     print("Forecasting...")
     progress_bar = ProgressBar(len(self.models.items()))
     self.fcst_ds = pd.date_range(
         start=self.train_ds.min(), 
         freq="D", 
         periods=len(self.train_ds)+steps)[-365:]
     for item, model in self.models.items():
         pred = model.predict(
             exogenous=fourier(
                 steps, 
                 seasonality=self.seasonality, 
                 n_terms=self.n_fourier_terms),
             n_periods=steps, 
             return_conf_int=True,
             alpha=(1.0 - self.confidence_interval))
         self.fcst[item] = pd.DataFrame(
             {"yhat":pred[0],
              "yhat_lower":pred[1][:,0],
              "yhat_upper":pred[1][:,1]},
             index=self.fcst_ds)
         if self.use_boxcox:
             self.fcst[item] = inv_boxcox(
                 self.fcst[item], 
                 self.lmbda_boxcox[item])
         progress_bar.update()
     progress_bar.finish()
     return pd.concat(self.fcst, axis=1)
Пример #16
0
    def inverse_transform(self, x):
        """
        Scale back the data to the original representation.

        Parameters
        ----------
        x: DataFrame, Series, ndarray, list
            The data used to scale along the features axis.

        Returns
        -------
        DataFrame
            Inverse transformed data.
        """
        x = self._check_type(x)
        xs = []
        for col, shift, lmd in zip(x.T, self._shift, self._lmd):
            for case in Switch(lmd):
                if case(np.nan, np.inf):
                    _x = col
                    break
                if case():
                    _x = inv_boxcox(col, lmd) - shift
            xs.append(_x.reshape(-1, 1))
        xs = np.concatenate(xs, axis=1)
        if len(self._shape) == 1:
            return xs.ravel()
        return xs
Пример #17
0
 def _inverse_boxcox(self, predicted, lambda_param):
     """ Method apply inverse Box-Cox transformation """
     if lambda_param == 0:
         return np.exp(predicted)
     else:
         res = inv_boxcox(predicted, lambda_param)
         res = self._filling_gaps(res)
         return res
Пример #18
0
    def inverse(self, ts_array):
        if self.method is None:
            return ts_array

        if self.method == 'log':
            return np.expm1(ts_array)
        else:
            return inv_boxcox(ts_array, self.param)
Пример #19
0
def pd_invboxcox(Data, lambdas) :
    Inv_BC_cols = np.empty(Data.shape)
    for i, col in enumerate(Data.columns):
        inv_bc_col = inv_boxcox(Data[col], lambdas[i])
        Inv_BC_cols[:,i] = inv_bc_col

    Inv_BC_Data = pd.DataFrame(Inv_BC_cols, index=Data.index, columns=Data.columns)
    return Inv_BC_Data
Пример #20
0
def inverse_transform(boxcoxT, transform, y):
    if transform is not None:
        if transform == 'BoxCox':
            for column in range(len(y.columns.tolist()) - 1):
                y.iloc[:, column + 1] = inv_boxcox(y.iloc[:, column + 1],
                                                   boxcoxT[column])

    return y
Пример #21
0
 def predict_epsilon(self, x, boxcox_lambda=0.2):
     """
     predict epsilon
     Args:
     x -- SMILES(str)
     boxcox_lambda -- int/float(preset to 0.2 for preloaded model)
     """
     y = self.__predict(self.model_epsilon, x)
     return inv_boxcox(y, boxcox_lambda)
def run(df, fold):
    
    try:
        drop_cols = [
            'scheduled_year', 'scheduled_weekofyear', 'scheduled_month', 
            'scheduled_dayofweek', 'scheduled_weekend', 'delivery_year', 
            'delivery_weekofyear', 'delivery_month', 'delivery_dayofweek', 
            'delivery_weekend', "City", "Code"
        ]

        df = df.drop(drop_cols, axis=1)
    except:
        None

    # list of numerical columns
    num_cols = ["Artist Reputation", "Height", "Width", "Price Of Sculpture", "Base Shipping Price"]

    # note that folds are same as before
    # get training data using folds
    df_train = df[df.kfold != fold].reset_index(drop=True)

    # get validation data using folds
    df_valid = df[df.kfold == fold].reset_index(drop=True)

    features = [
        f for f in df.columns if f not in ("kfold", "Cost", "Customer Id")
    ]

    # scale training data
    x_train = df_train[features].values

    # scale validation data
    x_valid = df_valid[features].values

    # initialize lgbm model
    model = model_dispatcher.models['rf']

    # fit model on training data
    model.fit(x_train, boxcox(df_train.Cost, -0.398686))

    # predict on validation data
    valid_preds_log = model.predict(x_valid)

    valid_preds = inv_boxcox(valid_preds_log, -0.398686)

    # get score
    score = calc_metric.calc_score(df_valid.Cost.values, valid_preds)

    # print auc
    print(f"Fold = {fold}, Score = {score}")

    # joblib.dump(
    #     model,
    #     os.path.join(config.MODEL_OUTPUT, f"tg_enc/lgbm_{fold}.bin")
    # )

    return score
Пример #23
0
 def forecast(self):
     future = self.model.make_future_dataframe(periods=self.forecast_len,
                                               freq=self.freq)
     future_pred = self.model.predict(future)
     future_pred = future_pred[-self.forecast_len:]
     if self.boxcox_lambda:
         future_pred["yhat"] = (
             inv_boxcox(future_pred["yhat"], self.boxcox_lambda) - 1)
     self.prediction = self.format_output(future_pred)
Пример #24
0
    def invBoxCoxTransform(self, bcData):
        r""" Transform data using inverse Box-Cox transformation.  """

        if self.boxCoxLambda is None:
            raise RuntimeError('Box-Cox lambda has not been learned or set.')

        rawVals = inv_boxcox(bcData, self.boxCoxLambda)

        return pd.Series(rawVals, index=bcData.index)
Пример #25
0
    def test_step(self, model, batch):
        """
        Called inside the testing loop with the data from the testing dataloader \
passed in as `batch`.

        :param model: The chosen model
        :type model: Model
        :param batch: Batch of input and ground truth variables
        :type batch: int
        :return: Loss and logs
        :rtype: dict
        """

        x, y_pre = batch
        y_hat_pre = model(x)
        mask = model.data.mask.expand_as(y_pre[0][0])
        tensorboard_logs = defaultdict(dict)
        for b in range(y_pre.shape[0]):
            for c in range(y_pre.shape[1]):
                y = y_pre[b][c][mask]
                y_hat = y_hat_pre[b][c][mask]
                if self.hparams.round_frp_to_zero:
                    y_hat = y_hat[y > self.hparams.round_frp_to_zero]
                    y = y[y > 0.5]
                if y_hat.nelement() == 0:
                    return {}
                if self.hparams.boxcox:
                    y_hat = torch.from_numpy(
                        inv_boxcox(y_hat.cpu().numpy(),
                                   self.hparams.boxcox)).to(y_hat.device)
                if self.hparams.clip_output:
                    y = y[(y_hat < self.hparams.clip_output[-1])
                          & (self.hparams.clip_output[0] < y_hat)]
                    y_hat = y_hat[(y_hat < self.hparams.clip_output[-1])
                                  & (self.hparams.clip_output[0] < y_hat)]

                pre_loss = (y_hat - y)**2
                loss = pre_loss.mean()
                assert loss == loss

                # Accuracy for a threshold
                acc = ((y - y_hat).abs() <
                       self.hparams.out_mad / 2).float().mean()
                mae = (y - y_hat).abs().float().mean()

                tensorboard_logs["test_loss"][str(c)] = loss
                tensorboard_logs["acc_test"][str(c)] = acc
                tensorboard_logs["mae_test"][str(c)] = mae

        test_loss = torch.stack(list(
            tensorboard_logs["test_loss"].values())).mean()
        tensorboard_logs["_test_loss"] = test_loss

        return {
            "test_loss": test_loss,
            "log": tensorboard_logs,
        }
Пример #26
0
def inv_bc_scale(number, lbd, mib, mia, maa):

    n = number * (maa - mia) + mia

    n = inv_boxcox(n, lbd)

    if mib <= 0:
        n = n + mib - 1

    return n
Пример #27
0
def inv_bc_scale_list(ls, lbd, mib, mia, maa):

    ls = [l * (maa - mia) + mia for l in ls]

    ls = inv_boxcox(ls, lbd)

    if mib <= 0:
        ls = [l + mib - 1 for l in ls]

    return ls
Пример #28
0
 def inv_transform(self, series):
     data = series.copy()
     u = self.parameter_dict["mean"]
     std = self.parameter_dict["std"]
     data = data * std + u
     if self.flag:
         lmbda = self.parameter_dict["lambda"]
         return pd.Series(inv_boxcox(data, lmbda), name=self.name)
     else:
         return data
 def inverse_transform(self, series):
     """
     Inverse Transforms the passed series using the learned lambda and offset learnt earlier
     :param series: Series to be reverse transformed
     :return: inverse transformed series
     """
     transformed_series = pd.Series(
         special.inv_boxcox(series, self.__lambda) - self.__offset)
     transformed_series.index = series.index
     return transformed_series
Пример #30
0
    def inverse_transform(self, X):
        if self.transform_cols is None:
            raise NotFittedError(
                f"This {self.__class__.__name__} instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator."
            )

        new_X = X.copy()
        for column in self.transform_cols:
            new_X[column] = inv_boxcox(new_X[column], self.lmbda)

        return new_X
Пример #31
0
def build_evaluate_model(train, test, bc_lambda, model_vars):
    regr = linear_model.LinearRegression()
    regr.fit(train[model_vars], train["property_crime_bc"])

    y_pred = inv_boxcox(cross_val_predict(regr, test[model_vars], test["property_crime_bc"]), bc_lambda)
    print("R-squared (after reverse property crime Box-Cox): {:.4f}".format(r2_score(test["property_crime"], y_pred)))

    resids_train = evaluate_model(regr, train, bc_lambda, "Train", model_vars)
    resids_test = evaluate_model(regr, test, bc_lambda, "Test", model_vars)

    return regr, resids_train, resids_test
def boxcox(x,y,y_label):
    box_cox, maxlog = stats.boxcox(y + abs(min(y)) + 1)
    regr.fit(x,box_cox)
    box_cox_predict = regr.predict(x)
    y_predict = inv_boxcox(box_cox_predict,maxlog) - abs(min(y)) - 1
    print "R squared: " + str(np.var(y_predict)/np.var(y))
    # Plot outputs
    fig = plt.figure()
    plt.scatter(y, y_predict, color='blue')
    plt.xlabel(y_label)
    plt.ylabel('predicted')
    plt.show()
Пример #33
0
def test_inv_boxcox():
    x = np.array([0., 1., 2.])
    lam = np.array([0., 1., 2.])
    y = boxcox(x, lam)
    x2 = inv_boxcox(y, lam)
    assert_almost_equal(x, x2)

    x = np.array([0., 1., 2.])
    lam = np.array([0., 1., 2.])
    y = boxcox1p(x, lam)
    x2 = inv_boxcox1p(y, lam)
    assert_almost_equal(x, x2)
Пример #34
0
    def _predict(self, h=None, smoothing_level=None, smoothing_slope=None,
                 smoothing_seasonal=None, initial_level=None, initial_slope=None,
                 damping_slope=None, initial_seasons=None, use_boxcox=None, lamda=None, remove_bias=None):
        """
        Helper prediction function

        Parameters
        ----------
        h : int, optional
            The number of time steps to forecast ahead.
        """
        # Variable renames to alpha,beta, etc as this helps with following the
        # mathematical notation in general
        alpha = smoothing_level
        beta = smoothing_slope
        gamma = smoothing_seasonal
        phi = damping_slope

        # Start in sample and out of sample predictions
        data = self.endog
        damped = self.damped
        seasoning = self.seasoning
        trending = self.trending
        trend = self.trend
        seasonal = self.seasonal
        m = self.seasonal_periods
        phi = phi if damped else 1.0
        if use_boxcox == 'log':
            lamda = 0.0
            y = boxcox(data, 0.0)
        elif isinstance(use_boxcox, float):
            lamda = use_boxcox
            y = boxcox(data, lamda)
        elif use_boxcox:
            y, lamda = boxcox(data)
        else:
            lamda = None
            y = data.squeeze()
            if np.ndim(y) != 1:
                raise NotImplementedError('Only 1 dimensional data supported')
        y_alpha = np.zeros((self.nobs,))
        y_gamma = np.zeros((self.nobs,))
        alphac = 1 - alpha
        y_alpha[:] = alpha * y
        if trending:
            betac = 1 - beta
        if seasoning:
            gammac = 1 - gamma
            y_gamma[:] = gamma * y
        l = np.zeros((self.nobs + h + 1,))
        b = np.zeros((self.nobs + h + 1,))
        s = np.zeros((self.nobs + h + m + 1,))
        l[0] = initial_level
        b[0] = initial_slope
        s[:m] = initial_seasons
        phi_h = np.cumsum(np.repeat(phi, h + 1)**np.arange(1, h + 1 + 1)
                          ) if damped else np.arange(1, h + 1 + 1)
        trended = {'mul': np.multiply,
                   'add': np.add,
                   None: lambda l, b: l
                   }[trend]
        detrend = {'mul': np.divide,
                   'add': np.subtract,
                   None: lambda l, b: 0
                   }[trend]
        dampen = {'mul': np.power,
                  'add': np.multiply,
                  None: lambda b, phi: 0
                  }[trend]
        if seasonal == 'mul':
            for i in range(1, self.nobs + 1):
                l[i] = y_alpha[i - 1] / s[i - 1] + \
                    (alphac * trended(l[i - 1], dampen(b[i - 1], phi)))
                if trending:
                    b[i] = (beta * detrend(l[i], l[i - 1])) + \
                        (betac * dampen(b[i - 1], phi))
                s[i + m - 1] = y_gamma[i - 1] / \
                    trended(l[i - 1], dampen(b[i - 1], phi)) + \
                    (gammac * s[i - 1])
            slope = b[1:i + 1].copy()
            season = s[m:i + m].copy()
            l[i:] = l[i]
            if trending:
                b[:i] = dampen(b[:i], phi)
                b[i:] = dampen(b[i], phi_h)
            trend = trended(l, b)
            s[i + m - 1:] = [s[(i - 1) + j % m] for j in range(h + 1 + 1)]            
            fitted = trend * s[:-m]
        elif seasonal == 'add':
            for i in range(1, self.nobs + 1):
                l[i] = y_alpha[i - 1] - (alpha * s[i - 1]) + \
                    (alphac * trended(l[i - 1], dampen(b[i - 1], phi)))
                if trending:
                    b[i] = (beta * detrend(l[i], l[i - 1])) + \
                        (betac * dampen(b[i - 1], phi))
                s[i + m - 1] = y_gamma[i - 1] - \
                    (gamma * trended(l[i - 1],
                                     dampen(b[i - 1], phi))) + (gammac * s[i - 1])
            slope = b[1:i + 1].copy()
            season = s[m:i + m].copy()
            l[i:] = l[i]
            if trending:
                b[:i] = dampen(b[:i], phi)
                b[i:] = dampen(b[i], phi_h)
            trend = trended(l, b)
            s[i + m - 1:] = [s[(i - 1) + j % m] for j in range(h + 1 + 1)]            
            fitted = trend + s[:-m]
        else:
            for i in range(1, self.nobs + 1):
                l[i] = y_alpha[i - 1] + \
                    (alphac * trended(l[i - 1], dampen(b[i - 1], phi)))
                if trending:
                    b[i] = (beta * detrend(l[i], l[i - 1])) + \
                        (betac * dampen(b[i - 1], phi))
            slope = b[1:i + 1].copy()
            season = s[m:i + m].copy()
            l[i:] = l[i]
            if trending:
                b[:i] = dampen(b[:i], phi)
                b[i:] = dampen(b[i], phi_h)
            trend = trended(l, b)
            fitted = trend
        level = l[1:i + 1].copy()
        if use_boxcox or use_boxcox == 'log' or isinstance(use_boxcox, float):
            fitted = inv_boxcox(fitted, lamda)
            level = inv_boxcox(level, lamda)
            slope = detrend(trend[:i], level)
            if seasonal == 'add':
                season = (fitted - inv_boxcox(trend, lamda))[:i]
            elif seasonal == 'mul':
                season = (fitted / inv_boxcox(trend, lamda))[:i]
            else:
                pass
        sse = sqeuclidean(fitted[:-h - 1], data)
        # (s0 + gamma) + (b0 + beta) + (l0 + alpha) + phi
        k = m * seasoning + 2 * trending + 2 + 1 * damped
        aic = self.nobs * np.log(sse / self.nobs) + (k) * 2
        aicc = aic + (2 * (k + 2) * (k + 3)) / (self.nobs - k - 3)
        bic = self.nobs * np.log(sse / self.nobs) + (k) * np.log(self.nobs)
        resid = data - fitted[:-h - 1]
        if remove_bias:
            fitted += resid.mean()
        if not damped:
            phi = np.NaN
        self.params = {'smoothing_level': alpha,
                       'smoothing_slope': beta,
                       'smoothing_seasonal': gamma,
                       'damping_slope': phi,
                       'initial_level': l[0],
                       'initial_slope': b[0],
                       'initial_seasons': s[:m],
                       'use_boxcox': use_boxcox,
                       'lamda': lamda,
                       'remove_bias': remove_bias}
        hwfit = HoltWintersResults(self, self.params, fittedfcast=fitted, fittedvalues=fitted[:-h - 1],
                                   fcastvalues=fitted[-h - 1:], sse=sse, level=level,
                                   slope=slope, season=season, aic=aic, bic=bic,
                                   aicc=aicc, resid=resid, k=k)
        return HoltWintersResultsWrapper(hwfit)
Пример #35
0
    def _predict(self, h=None, smoothing_level=None, smoothing_slope=None,
                 smoothing_seasonal=None, initial_level=None, initial_slope=None,
                 damping_slope=None, initial_seasons=None, use_boxcox=None, lamda=None,
                 remove_bias=None, is_optimized=None):
        """
        Helper prediction function

        Parameters
        ----------
        h : int, optional
            The number of time steps to forecast ahead.
        """
        # Variable renames to alpha, beta, etc as this helps with following the
        # mathematical notation in general
        alpha = smoothing_level
        beta = smoothing_slope
        gamma = smoothing_seasonal
        phi = damping_slope

        # Start in sample and out of sample predictions
        data = self.endog
        damped = self.damped
        seasoning = self.seasoning
        trending = self.trending
        trend = self.trend
        seasonal = self.seasonal
        m = self.seasonal_periods
        phi = phi if damped else 1.0
        if use_boxcox == 'log':
            lamda = 0.0
            y = boxcox(data, 0.0)
        elif isinstance(use_boxcox, float):
            lamda = use_boxcox
            y = boxcox(data, lamda)
        elif use_boxcox:
            y, lamda = boxcox(data)
        else:
            lamda = None
            y = data.squeeze()
            if np.ndim(y) != 1:
                raise NotImplementedError('Only 1 dimensional data supported')
        y_alpha = np.zeros((self.nobs,))
        y_gamma = np.zeros((self.nobs,))
        alphac = 1 - alpha
        y_alpha[:] = alpha * y
        if trending:
            betac = 1 - beta
        if seasoning:
            gammac = 1 - gamma
            y_gamma[:] = gamma * y
        l = np.zeros((self.nobs + h + 1,))
        b = np.zeros((self.nobs + h + 1,))
        s = np.zeros((self.nobs + h + m + 1,))
        l[0] = initial_level
        b[0] = initial_slope
        s[:m] = initial_seasons
        phi_h = np.cumsum(np.repeat(phi, h + 1)**np.arange(1, h + 1 + 1)
                          ) if damped else np.arange(1, h + 1 + 1)
        trended = {'mul': np.multiply,
                   'add': np.add,
                   None: lambda l, b: l
                   }[trend]
        detrend = {'mul': np.divide,
                   'add': np.subtract,
                   None: lambda l, b: 0
                   }[trend]
        dampen = {'mul': np.power,
                  'add': np.multiply,
                  None: lambda b, phi: 0
                  }[trend]
        nobs = self.nobs
        if seasonal == 'mul':
            for i in range(1, nobs + 1):
                l[i] = y_alpha[i - 1] / s[i - 1] + \
                       (alphac * trended(l[i - 1], dampen(b[i - 1], phi)))
                if trending:
                    b[i] = (beta * detrend(l[i], l[i - 1])) + \
                           (betac * dampen(b[i - 1], phi))
                s[i + m - 1] = y_gamma[i - 1] / trended(l[i - 1], dampen(b[i - 1], phi)) + \
                    (gammac * s[i - 1])
            slope = b[1:nobs + 1].copy()
            season = s[m:nobs + m].copy()
            l[nobs:] = l[nobs]
            if trending:
                b[:nobs] = dampen(b[:nobs], phi)
                b[nobs:] = dampen(b[nobs], phi_h)
            trend = trended(l, b)
            s[nobs + m - 1:] = [s[(nobs - 1) + j % m] for j in range(h + 1 + 1)]
            fitted = trend * s[:-m]
        elif seasonal == 'add':
            for i in range(1, nobs + 1):
                l[i] = y_alpha[i - 1] - (alpha * s[i - 1]) + \
                       (alphac * trended(l[i - 1], dampen(b[i - 1], phi)))
                if trending:
                    b[i] = (beta * detrend(l[i], l[i - 1])) + \
                           (betac * dampen(b[i - 1], phi))
                s[i + m - 1] = y_gamma[i - 1] - \
                    (gamma * trended(l[i - 1], dampen(b[i - 1], phi))) + \
                    (gammac * s[i - 1])
            slope = b[1:nobs + 1].copy()
            season = s[m:nobs + m].copy()
            l[nobs:] = l[nobs]
            if trending:
                b[:nobs] = dampen(b[:nobs], phi)
                b[nobs:] = dampen(b[nobs], phi_h)
            trend = trended(l, b)
            s[nobs + m - 1:] = [s[(nobs - 1) + j % m] for j in range(h + 1 + 1)]
            fitted = trend + s[:-m]
        else:
            for i in range(1, nobs + 1):
                l[i] = y_alpha[i - 1] + \
                       (alphac * trended(l[i - 1], dampen(b[i - 1], phi)))
                if trending:
                    b[i] = (beta * detrend(l[i], l[i - 1])) + \
                           (betac * dampen(b[i - 1], phi))
            slope = b[1:nobs + 1].copy()
            season = s[m:nobs + m].copy()
            l[nobs:] = l[nobs]
            if trending:
                b[:nobs] = dampen(b[:nobs], phi)
                b[nobs:] = dampen(b[nobs], phi_h)
            trend = trended(l, b)
            fitted = trend
        level = l[1:nobs + 1].copy()
        if use_boxcox or use_boxcox == 'log' or isinstance(use_boxcox, float):
            fitted = inv_boxcox(fitted, lamda)
            level = inv_boxcox(level, lamda)
            slope = detrend(trend[:nobs], level)
            if seasonal == 'add':
                season = (fitted - inv_boxcox(trend, lamda))[:nobs]
            else:  # seasonal == 'mul':
                season = (fitted / inv_boxcox(trend, lamda))[:nobs]
        sse = sqeuclidean(fitted[:-h - 1], data)
        # (s0 + gamma) + (b0 + beta) + (l0 + alpha) + phi
        k = m * seasoning + 2 * trending + 2 + 1 * damped
        aic = self.nobs * np.log(sse / self.nobs) + k * 2
        if self.nobs - k - 3 > 0:
            aicc_penalty = (2 * (k + 2) * (k + 3)) / (self.nobs - k - 3)
        else:
            aicc_penalty = np.inf
        aicc = aic + aicc_penalty
        bic = self.nobs * np.log(sse / self.nobs) + k * np.log(self.nobs)
        resid = data - fitted[:-h - 1]
        if remove_bias:
            fitted += resid.mean()
        if not damped:
            phi = np.NaN
        self.params = {'smoothing_level': alpha,
                       'smoothing_slope': beta,
                       'smoothing_seasonal': gamma,
                       'damping_slope': phi,
                       'initial_level': l[0],
                       'initial_slope': b[0],
                       'initial_seasons': s[:m],
                       'use_boxcox': use_boxcox,
                       'lamda': lamda,
                       'remove_bias': remove_bias}

        # Format parameters into a DataFrame
        codes = ['alpha', 'beta', 'gamma', 'l.0', 'b.0', 'phi']
        codes += ['s.{0}'.format(i) for i in range(m)]
        idx = ['smoothing_level', 'smoothing_slope', 'smoothing_seasonal',
               'initial_level', 'initial_slope', 'damping_slope']
        idx += ['initial_seasons.{0}'.format(i) for i in range(m)]

        formatted = [alpha, beta, gamma, l[0], b[0], phi]
        formatted += s[:m].tolist()
        formatted = list(map(lambda v: np.nan if v is None else v, formatted))
        formatted = np.array(formatted)
        if is_optimized is None:
            optimized = np.zeros(len(codes), dtype=np.bool)
        else:
            optimized = is_optimized.astype(np.bool)
        included = [True, trending, seasoning, True, trending, damped]
        included += [True] * m
        formatted = pd.DataFrame([[c, f, o] for c, f, o in zip(codes, formatted, optimized)],
                                 columns=['name', 'param', 'optimized'],
                                 index=idx)
        formatted = formatted.loc[included]

        hwfit = HoltWintersResults(self, self.params, fittedfcast=fitted,
                                   fittedvalues=fitted[:-h - 1], fcastvalues=fitted[-h - 1:],
                                   sse=sse, level=level, slope=slope, season=season, aic=aic,
                                   bic=bic, aicc=aicc, resid=resid, k=k,
                                   params_formatted=formatted, optimized=optimized)
        return HoltWintersResultsWrapper(hwfit)