def testVectorAutoRegressiveMethodMetrics():

    numberOfObservation = 12

    #reading the full dataset
    vectorAutoRegressiveMethodDataset = importVectorAutoRegressiveMethodDataset(
        "M2SLMoneyStock.csv", "PCEPersonalSpending.csv")

    #reading the forecasted values
    vectorAutoRegressiveMethodForecastedValues = readVectorAutoRegressiveMethodForecastedValues(
    )

    #rmse for money
    rmseForMoneyForecasting = rmse(
        vectorAutoRegressiveMethodDataset["Money"][-numberOfObservation:],
        vectorAutoRegressiveMethodForecastedValues["MoneyForecast"])

    #rmse for spending
    rmseForSpendingForecasting = rmse(
        vectorAutoRegressiveMethodDataset["Spending"][-numberOfObservation:],
        vectorAutoRegressiveMethodForecastedValues["SpendingForecast"])

    print(rmseForMoneyForecasting)  #43.71049653558938

    print(rmseForSpendingForecasting)  #37.00117516940808
def plotVectorAutoRegressiveMethodPredictedValues():

    #reading the forecasted values
    vectorAutoRegressiveMethodForecastedValues = readVectorAutoRegressiveMethodForecastedValues(
    )

    #reading the full dataset
    vectorAutoRegressiveMethodDataset = importVectorAutoRegressiveMethodDataset(
        "M2SLMoneyStock.csv", "PCEPersonalSpending.csv")

    visualizeVectorAutoRegressiveMethodPredictedValuesForMoney(
        vectorAutoRegressiveMethodDataset,
        vectorAutoRegressiveMethodForecastedValues)

    visualizeVectorAutoRegressiveMethodPredictedValuesForSpending(
        vectorAutoRegressiveMethodDataset,
        vectorAutoRegressiveMethodForecastedValues)
def trainVectorAutoRegressiveMethodModelOnFullDataset():

    vectorAutoRegressiveMethodDataset = importVectorAutoRegressiveMethodDataset(
        "M2SLMoneyStock.csv", "PCEPersonalSpending.csv")

    #training model on the whole dataset
    vectorAutoRegressiveMethodModel = VAR(vectorAutoRegressiveMethodDataset)

    #we are taking p = 5 as we have created different models based on the different p values.
    #Model gives minimum aic and bic for p =5
    vectorAutoRegressiveMethodModelResult = vectorAutoRegressiveMethodModel.fit(
        5)

    #saving the model in pickle files
    saveVectorAutoRegressiveMethodModelForFullDataset(
        vectorAutoRegressiveMethodModelResult)

    print(vectorAutoRegressiveMethodModelResult.summary())
Пример #4
0
def preprocess():

    vectorAutoRegressiveMethodDataset = importVectorAutoRegressiveMethodDataset(
        "M2SLMoneyStock.csv", "PCEPersonalSpending.csv")

    #taking the first difference
    vectorAutoRegressiveMethodDatasetFirstDiff = vectorAutoRegressiveMethodDataset.diff(
    )

    #taking the second difference. Second difference data is stationary
    vectorAutoRegressiveMethodDatasetSecondDiff = vectorAutoRegressiveMethodDatasetFirstDiff.diff(
    )

    #dropping missing values
    vectorAutoRegressiveMethodDatasetSecondDiff = vectorAutoRegressiveMethodDatasetSecondDiff.dropna(
    )

    X_train, X_test = splitVectorAutoRegressiveMethodDataset(
        vectorAutoRegressiveMethodDatasetSecondDiff)

    saveTrainingAndTestingDataset(X_train, X_test)
def testVectorAutoRegressiveMethodModel():

    #reading the full dataset
    vectorAutoRegressiveMethodDataset = importVectorAutoRegressiveMethodDataset(
        "M2SLMoneyStock.csv", "PCEPersonalSpending.csv")

    #reading testing data
    X_train = readVectorAutoRegressiveMethodXTrain()

    #reading model from pickle file
    vectorAutoRegressiveMethodModel = readVectorAutoRegressiveMethodModel()

    #Unlike the VARMAX model we'll use in upcoming sections, the VAR .forecast() function
    #requires that we pass in a lag order number of previous observations as well.
    #Unfortunately this forecast tool doesn't provide a DateTime index - we'll have to do that manually.
    #forecast for next 12 months
    predictedValues = vectorAutoRegressiveMethodModel.forecast(
        y=X_train.values[-5:], steps=12)

    idx = pd.date_range('1/1/2015', periods=12, freq='MS')

    vectorAutoRegressiveMethodForecastedValues = pd.DataFrame(
        predictedValues, index=idx, columns=['Money2d', 'Spending2d'])

    numberOfObsevation = 12

    #Invert the transformation
    #Remember that the forecasted values represent second-order differences.
    #To compare them to the original data we have to roll back each difference.
    #To roll back a first-order difference we take the most recent value on the training side of the original series,
    #and add it to a cumulative sum of forecasted values.
    #When working with second-order differences we first must perform this operation on the most recent first-order difference.

    # Add the most recent first difference from the training side of the original dataset to the forecast cumulative sum
    vectorAutoRegressiveMethodForecastedValues['Money1d'] = (
        vectorAutoRegressiveMethodDataset['Money'].iloc[-numberOfObsevation -
                                                        1] -
        vectorAutoRegressiveMethodDataset['Money'].iloc[-numberOfObsevation -
                                                        2]
    ) + vectorAutoRegressiveMethodForecastedValues['Money2d'].cumsum()

    # Now build the forecast values from the first difference set
    vectorAutoRegressiveMethodForecastedValues[
        'MoneyForecast'] = vectorAutoRegressiveMethodDataset['Money'].iloc[
            -numberOfObsevation -
            1] + vectorAutoRegressiveMethodForecastedValues['Money1d'].cumsum(
            )

    # Add the most recent first difference from the training side of the original dataset to the forecast cumulative sum
    vectorAutoRegressiveMethodForecastedValues['Spending1d'] = (
        vectorAutoRegressiveMethodDataset['Spending'].iloc[-numberOfObsevation
                                                           - 1] -
        vectorAutoRegressiveMethodDataset['Spending'].iloc[-numberOfObsevation
                                                           - 2]
    ) + vectorAutoRegressiveMethodForecastedValues['Spending2d'].cumsum()

    # Now build the forecast values from the first difference set
    vectorAutoRegressiveMethodForecastedValues[
        'SpendingForecast'] = vectorAutoRegressiveMethodDataset[
            'Spending'].iloc[-numberOfObsevation -
                             1] + vectorAutoRegressiveMethodForecastedValues[
                                 'Spending1d'].cumsum()

    #saving the foreasted values without lag
    saveVectorAutoRegressiveMethodForecastedValues(
        vectorAutoRegressiveMethodForecastedValues)
Пример #6
0
def testIsDatasetStationary():

    vectorAutoRegressiveMethodDataset = importVectorAutoRegressiveMethodDataset(
        "M2SLMoneyStock.csv", "PCEPersonalSpending.csv")

    #agumentedDickeyFullerTest(vectorAutoRegressiveMethodDataset["Money"])

    # =============================================================================
    #     Augmented Dickey-Fuller Test:
    #     ADF test statistic        4.239022
    #     p-value                   1.000000
    #     # lags used               4.000000
    #     # observations          247.000000
    #     critical value (1%)      -3.457105
    #     critical value (5%)      -2.873314
    #     critical value (10%)     -2.573044
    #     Weak evidence against the null hypothesis
    #     Fail to reject the null hypothesis
    #     Data has a unit root and is non-stationary
    # =============================================================================

    #agumentedDickeyFullerTest(vectorAutoRegressiveMethodDataset["Spending"])

    # =============================================================================
    #     Augmented Dickey-Fuller Test:
    #     ADF test statistic        0.149796
    #     p-value                   0.969301
    #     # lags used               3.000000
    #     # observations          248.000000
    #     critical value (1%)      -3.456996
    #     critical value (5%)      -2.873266
    #     critical value (10%)     -2.573019
    #     Weak evidence against the null hypothesis
    #     Fail to reject the null hypothesis
    #     Data has a unit root and is non-stationary
    # =============================================================================

    vectorAutoRegressiveMethodDatasetFirstDiff = vectorAutoRegressiveMethodDataset.diff(
    )

    #agumentedDickeyFullerTest(vectorAutoRegressiveMethodDatasetFirstDiff["Money"], title = "Money First Differnce")

    # =============================================================================
    #     Augmented Dickey-Fuller Test: Money First Differnce
    #     ADF test statistic       -2.057404
    #     p-value                   0.261984
    #     # lags used              15.000000
    #     # observations          235.000000
    #     critical value (1%)      -3.458487
    #     critical value (5%)      -2.873919
    #     critical value (10%)     -2.573367
    #     Weak evidence against the null hypothesis
    #     Fail to reject the null hypothesis
    #     Data has a unit root and is non-stationary
    # =============================================================================

    #agumentedDickeyFullerTest(vectorAutoRegressiveMethodDatasetFirstDiff["Spending"], title = "Spending First Differnce")

    # =============================================================================
    #     Augmented Dickey-Fuller Test: Spending First Differnce
    #     ADF test statistic     -7.226974e+00
    #     p-value                 2.041027e-10
    #     # lags used             2.000000e+00
    #     # observations          2.480000e+02
    #     critical value (1%)    -3.456996e+00
    #     critical value (5%)    -2.873266e+00
    #     critical value (10%)   -2.573019e+00
    #     Strong evidence against the null hypothesis
    #     Reject the null hypothesis
    #     Data has no unit root and is stationary
    # =============================================================================

    vectorAutoRegressiveMethodDatasetSecondDiff = vectorAutoRegressiveMethodDatasetFirstDiff.diff(
    )

    agumentedDickeyFullerTest(
        vectorAutoRegressiveMethodDatasetSecondDiff["Money"],
        title="Money Second Differnce")

    # =============================================================================
    #     Augmented Dickey-Fuller Test: Money Second Differnce
    #     ADF test statistic     -7.077471e+00
    #     p-value                 4.760675e-10
    #     # lags used             1.400000e+01
    #     # observations          2.350000e+02
    #     critical value (1%)    -3.458487e+00
    #     critical value (5%)    -2.873919e+00
    #     critical value (10%)   -2.573367e+00
    #     Strong evidence against the null hypothesis
    #     Reject the null hypothesis
    #     Data has no unit root and is stationary
    # ============================================================================

    agumentedDickeyFullerTest(
        vectorAutoRegressiveMethodDatasetSecondDiff["Spending"],
        title="Spending Second Differnce")
def plotTheSourceData():

    vectorAutoRegressiveMethodDataset = importVectorAutoRegressiveMethodDataset(
        "M2SLMoneyStock.csv", "PCEPersonalSpending.csv")

    visualizeSourceDataPlot(vectorAutoRegressiveMethodDataset)