Exemplo n.º 1
def test_dates_from_range():
    results = [datetime(1959, 3, 31, 0, 0),
               datetime(1959, 6, 30, 0, 0),
               datetime(1959, 9, 30, 0, 0),
               datetime(1959, 12, 31, 0, 0),
               datetime(1960, 3, 31, 0, 0),
               datetime(1960, 6, 30, 0, 0),
               datetime(1960, 9, 30, 0, 0),
               datetime(1960, 12, 31, 0, 0),
               datetime(1961, 3, 31, 0, 0),
               datetime(1961, 6, 30, 0, 0),
               datetime(1961, 9, 30, 0, 0),
               datetime(1961, 12, 31, 0, 0),
               datetime(1962, 3, 31, 0, 0),
               datetime(1962, 6, 30, 0, 0)]
    dt_range = dates_from_range('1959q1', '1962q2')
    npt.assert_(results == dt_range)

    # test with starting period not the first with length
    results = results[2:]
    dt_range = dates_from_range('1959q3', length=len(results))
    npt.assert_(results == dt_range)

    # check month
    results = [datetime(1959, 3, 31, 0, 0),
               datetime(1959, 4, 30, 0, 0),
               datetime(1959, 5, 31, 0, 0),
               datetime(1959, 6, 30, 0, 0),
               datetime(1959, 7, 31, 0, 0),
               datetime(1959, 8, 31, 0, 0),
               datetime(1959, 9, 30, 0, 0),
               datetime(1959, 10, 31, 0, 0),
               datetime(1959, 11, 30, 0, 0),
               datetime(1959, 12, 31, 0, 0),
               datetime(1960, 1, 31, 0, 0),
               datetime(1960, 2, 28, 0, 0),
               datetime(1960, 3, 31, 0, 0),
               datetime(1960, 4, 30, 0, 0),
               datetime(1960, 5, 31, 0, 0),
               datetime(1960, 6, 30, 0, 0),
               datetime(1960, 7, 31, 0, 0),
               datetime(1960, 8, 31, 0, 0),
               datetime(1960, 9, 30, 0, 0),
               datetime(1960, 10, 31, 0, 0),
               datetime(1960, 12, 31, 0, 0),
               datetime(1961, 1, 31, 0, 0),
               datetime(1961, 2, 28, 0, 0),
               datetime(1961, 3, 31, 0, 0),
               datetime(1961, 4, 30, 0, 0),
               datetime(1961, 5, 31, 0, 0),
               datetime(1961, 6, 30, 0, 0),
               datetime(1961, 7, 31, 0, 0),
               datetime(1961, 8, 31, 0, 0),
               datetime(1961, 9, 30, 0, 0),
               datetime(1961, 10, 31, 0, 0)]

    dt_range = dates_from_range("1959m3", length=len(results))
Exemplo n.º 2
def test_acovf2d():
    dta = sunspots.load_pandas().data
    dta.index = Index(dates_from_range('1700', '2008'))
    del dta["YEAR"]
    res = acovf(dta)
    assert_equal(res, acovf(dta.values))
    X = np.random.random((10,2))
    assert_raises(ValueError, acovf, X)
Exemplo n.º 3
def test_acovf2d():
    dta = sunspots.load_pandas().data
    dta.index = Index(dates_from_range('1700', '2008'))
    del dta["YEAR"]
    res = acovf(dta)
    assert_equal(res, acovf(dta.values))
    X = np.random.random((10, 2))
    assert_raises(ValueError, acovf, X)
Exemplo n.º 4
def test_hpfilter_pandas():
    dta = macrodata.load_pandas().data
    index = Index(dates_from_range('1959Q1', '2009Q3'))
    dta.index = index
    cycle, trend = hpfilter(dta["realgdp"])
    ndcycle, ndtrend = hpfilter(dta['realgdp'].values)
    assert_equal(cycle.values, ndcycle)
    assert_equal(cycle.index[0], datetime(1959, 3, 31))
    assert_equal(cycle.index[-1], datetime(2009, 9, 30))
    assert_equal(cycle.name, "realgdp")
Exemplo n.º 5
def test_hpfilter_pandas():
    dta = macrodata.load_pandas().data
    index = Index(dates_from_range('1959Q1', '2009Q3'))
    dta.index = index
    cycle, trend = hpfilter(dta["realgdp"])
    ndcycle, ndtrend = hpfilter(dta['realgdp'].values)
    assert_equal(cycle.values, ndcycle)
    assert_equal(cycle.index[0], datetime(1959, 3, 31))
    assert_equal(cycle.index[-1], datetime(2009, 9, 30))
    assert_equal(cycle.name, "realgdp")
Exemplo n.º 6
def test_dates_from_range():
    results = [datetime(1959, 3, 31, 0, 0),
               datetime(1959, 6, 30, 0, 0),
               datetime(1959, 9, 30, 0, 0),
               datetime(1959, 12, 31, 0, 0),
               datetime(1960, 3, 31, 0, 0),
               datetime(1960, 6, 30, 0, 0),
               datetime(1960, 9, 30, 0, 0),
               datetime(1960, 12, 31, 0, 0),
               datetime(1961, 3, 31, 0, 0),
               datetime(1961, 6, 30, 0, 0),
               datetime(1961, 9, 30, 0, 0),
               datetime(1961, 12, 31, 0, 0),
               datetime(1962, 3, 31, 0, 0),
               datetime(1962, 6, 30, 0, 0)]
    dt_range = dates_from_range('1959q1', '1962q2')
    npt.assert_(results == dt_range)
Exemplo n.º 7
    def setupClass(cls):
        if not _have_x13:
            raise SkipTest('X13/X12 not available')

        import pandas as pd
        from statsmodels.datasets import macrodata, co2
        dta = macrodata.load_pandas().data
        dates = dates_from_range('1959Q1', '2009Q3')
        index = pd.DatetimeIndex(dates)
        dta.index = index
        cls.quarterly_data = dta.dropna()

        dta = co2.load_pandas().data
        dta['co2'] = dta.co2.interpolate()
        cls.monthly_data = dta.resample('M')

        cls.monthly_start_data = dta.resample('MS')
Exemplo n.º 8
    def setupClass(cls):
        if not _have_x13:
            raise SkipTest("X13/X12 not available")

        import pandas as pd
        from statsmodels.datasets import macrodata, co2

        dta = macrodata.load_pandas().data
        dates = dates_from_range("1959Q1", "2009Q3")
        index = pd.DatetimeIndex(dates)
        dta.index = index
        cls.quarterly_data = dta.dropna()

        dta = co2.load_pandas().data
        dta["co2"] = dta.co2.interpolate()
        cls.monthly_data = dta.resample("M")

        cls.monthly_start_data = dta.resample("MS")
Exemplo n.º 9
def test_bking_pandas():
    # 1d
    dta = macrodata.load_pandas().data
    index = Index(dates_from_range('1959Q1', '2009Q3'))
    dta.index = index
    filtered = bkfilter(dta["infl"])
    nd_filtered = bkfilter(dta['infl'].values)
    assert_equal(filtered.values, nd_filtered)
    assert_equal(filtered.index[0], datetime(1962, 3, 31))
    assert_equal(filtered.index[-1], datetime(2006, 9, 30))
    assert_equal(filtered.name, "infl")

    filtered = bkfilter(dta[["infl","unemp"]])
    nd_filtered = bkfilter(dta[['infl', 'unemp']].values)
    assert_equal(filtered.values, nd_filtered)
    assert_equal(filtered.index[0], datetime(1962, 3, 31))
    assert_equal(filtered.index[-1], datetime(2006, 9, 30))
    assert_equal(filtered.columns.values, ["infl", "unemp"])
Exemplo n.º 10
def test_cfitz_pandas():
    # 1d
    dta = macrodata.load_pandas().data
    index = Index(dates_from_range('1959Q1', '2009Q3'))
    dta.index = index
    cycle, trend = cffilter(dta["infl"])
    ndcycle, ndtrend = cffilter(dta['infl'].values)
    assert_allclose(cycle.values, ndcycle, rtol=1e-14)
    assert_equal(cycle.index[0], datetime(1959, 3, 31))
    assert_equal(cycle.index[-1], datetime(2009, 9, 30))
    assert_equal(cycle.name, "infl")

    cycle, trend = cffilter(dta[["infl","unemp"]])
    ndcycle, ndtrend = cffilter(dta[['infl', 'unemp']].values)
    assert_allclose(cycle.values, ndcycle, rtol=1e-14)
    assert_equal(cycle.index[0], datetime(1959, 3, 31))
    assert_equal(cycle.index[-1], datetime(2009, 9, 30))
    assert_equal(cycle.columns.values, ["infl", "unemp"])
Exemplo n.º 11
def test_bking_pandas():
    # 1d
    dta = macrodata.load_pandas().data
    index = Index(dates_from_range('1959Q1', '2009Q3'))
    dta.index = index
    filtered = bkfilter(dta["infl"])
    nd_filtered = bkfilter(dta['infl'].values)
    assert_equal(filtered.values, nd_filtered)
    assert_equal(filtered.index[0], datetime(1962, 3, 31))
    assert_equal(filtered.index[-1], datetime(2006, 9, 30))
    assert_equal(filtered.name, "infl")

    filtered = bkfilter(dta[["infl","unemp"]])
    nd_filtered = bkfilter(dta[['infl', 'unemp']].values)
    assert_equal(filtered.values, nd_filtered)
    assert_equal(filtered.index[0], datetime(1962, 3, 31))
    assert_equal(filtered.index[-1], datetime(2006, 9, 30))
    assert_equal(filtered.columns.values, ["infl", "unemp"])
Exemplo n.º 12
def test_cfitz_pandas():
    # 1d
    dta = macrodata.load_pandas().data
    index = Index(dates_from_range('1959Q1', '2009Q3'))
    dta.index = index
    cycle, trend = cffilter(dta["infl"])
    ndcycle, ndtrend = cffilter(dta['infl'].values)
    assert_allclose(cycle.values, ndcycle, rtol=1e-14)
    assert_equal(cycle.index[0], datetime(1959, 3, 31))
    assert_equal(cycle.index[-1], datetime(2009, 9, 30))
    assert_equal(cycle.name, "infl")

    cycle, trend = cffilter(dta[["infl","unemp"]])
    ndcycle, ndtrend = cffilter(dta[['infl', 'unemp']].values)
    assert_allclose(cycle.values, ndcycle, rtol=1e-14)
    assert_equal(cycle.index[0], datetime(1959, 3, 31))
    assert_equal(cycle.index[-1], datetime(2009, 9, 30))
    assert_equal(cycle.columns.values, ["infl", "unemp"])
Exemplo n.º 13
    def setupClass(cls):
        if not _have_x13:
            raise SkipTest('X13/X12 not available')

        import pandas as pd
        from statsmodels.datasets import macrodata, co2
        dta = macrodata.load_pandas().data
        dates = dates_from_range('1959Q1', '2009Q3')
        index = pd.DatetimeIndex(dates)
        dta.index = index
        cls.quarterly_data = dta.dropna()

        dta = co2.load_pandas().data
        dta['co2'] = dta.co2.interpolate()
        cls.monthly_data = dta.resample('M')
        # change in pandas 0.18 resample is deferred object
        if not isinstance(cls.monthly_data, (pd.DataFrame, pd.Series)):
            cls.monthly_data = cls.monthly_data.mean()

        cls.monthly_start_data = dta.resample('MS')
        if not isinstance(cls.monthly_start_data, (pd.DataFrame, pd.Series)):
            cls.monthly_start_data = cls.monthly_start_data.mean()
Exemplo n.º 14
    def setupClass(cls):
        if not _have_x13:
            raise SkipTest('X13/X12 not available')

        import pandas as pd
        from statsmodels.datasets import macrodata, co2
        dta = macrodata.load_pandas().data
        dates = dates_from_range('1959Q1', '2009Q3')
        index = pd.DatetimeIndex(dates)
        dta.index = index
        cls.quarterly_data = dta.dropna()

        dta = co2.load_pandas().data
        dta['co2'] = dta.co2.interpolate()
        cls.monthly_data = dta.resample('M')
        # change in pandas 0.18 resample is deferred object
        if not isinstance(cls.monthly_data, (pd.DataFrame, pd.Series)):
            cls.monthly_data = cls.monthly_data.mean()

        cls.monthly_start_data = dta.resample('MS')
        if not isinstance(cls.monthly_start_data, (pd.DataFrame, pd.Series)):
            cls.monthly_start_data = cls.monthly_start_data.mean()
Exemplo n.º 15
from statsmodels.datasets.macrodata import load_pandas
from statsmodels.tsa.base.datetools import dates_from_range
from statsmodels.tsa.arima_model import ARIMA
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm

# let's examine an ARIMA model of CPI

cpi = load_pandas().data["cpi"]
dates = dates_from_range("1959q1", "2009q3")
cpi.index = dates

res = ARIMA(cpi, (1,1,1), freq='Q').fit()
print res.summary()

# we can look at the series

# maybe logs are better
log_cpi = np.log(cpi)

# check the ACF and PCF plots
acf, confint_acf = sm.tsa.acf(log_cpi.diff().values[1:], confint=95)
# center the confidence intervals about zero
#confint_acf -= confint_acf.mean(1)[:,None]
pacf = sm.tsa.pacf(log_cpi.diff().values[1:], method='ols')
# confidence interval is now an option to pacf
from scipy import stats
confint_pacf = stats.norm.ppf(1-.025) * np.sqrt(1/202.)
Exemplo n.º 16
#print newdf.head()


# Building ARIMA model

from statsmodels.tsa.base.datetools import dates_from_range

trainWTI = newdf[:int(0.95 * len(newdf))]

# 2012m12 means to start counting months from the 12th month of 2012
# To know the starting month, print trainWTI.head()
dates1 = dates_from_range('2012m1', length=len(trainWTI.WTI))
trainWTI.index = dates1
trainWTI = trainWTI[['WTI']]

print trainWTI.tail()

# Determine whether AR or MA terms are needed to correct any
# autocorrelation that remains in the series.
# Looking at the autocorrelation function (ACF) and partial autocorrelation (PACF) plots of the series,
# it's possible to identify the numbers of AR and/or MA terms that are needed
# In this example, the autocorrelations are significant for a large number of lags,
# but perhaps the autocorrelations at lags 2 and above are merely due to the propagation of the autocorrelation at lag 1.
# This is confirmed by the PACF plot.
# Rule 1: If the PACF of the differenced series displays a sharp cutoff and/or the lag-1 autocorrelation is positive,
# then consider adding an AR term to the model. The lag at which the PACF cuts off is the indicated number of AR terms.
Exemplo n.º 17
from statsmodels.datasets.macrodata import load_pandas
from statsmodels.tsa.base.datetools import dates_from_range
from statsmodels.tsa.arima_model import ARIMA
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import statsmodels.api as sm

# 让我们以 CPI 的 ARIMA 模型来举例 

cpi = load_pandas().data['cpi']
dates = dates_from_range('1959q1', '2009q3')
cpi.index = dates

res = ARIMA(cpi, (1, 1, 1), freq='Q').fit()

# 我们可以画图查看序列 

# 或许查看日志会更好
log_cpi = np.log(cpi)

# 检查 ACF 和 PCF 图
acf, confint_acf = sm.tsa.acf(log_cpi.diff().values[1:], confint=95)
# 将置信区间定为零
# TODO: demean? --> confint_acf -= confint_acf.mean(1)[:, None]
pacf = sm.tsa.pacf(log_cpi.diff().values[1:], method='ols')
# 置信区间是 pacf 的一个选项
confint_pacf = stats.norm.ppf(1 - .025) * np.sqrt(1 / 202.)
Exemplo n.º 18


# Building ARIMA model

from statsmodels.tsa.base.datetools import dates_from_range

trainWTI = newdf[:int(0.95*len(newdf))]

# 2012m12 means to start counting months from the 12th month of 2012
# To know the starting month, print trainWTI.head()
dates1 = dates_from_range('2012m1', length=len(trainWTI.WTI))
trainWTI.index = dates1
trainWTI = trainWTI[['WTI']]

print trainWTI.tail()

# Determine whether AR or MA terms are needed to correct any
# autocorrelation that remains in the series.
# Looking at the autocorrelation function (ACF) and partial autocorrelation (PACF) plots of the series,
# it's possible to identify the numbers of AR and/or MA terms that are needed
# In this example, the autocorrelations are significant for a large number of lags,
# but perhaps the autocorrelations at lags 2 and above are merely due to the propagation of the autocorrelation at lag 1.
# This is confirmed by the PACF plot.
# Rule 1: If the PACF of the differenced series displays a sharp cutoff and/or the lag-1 autocorrelation is positive,
# then consider adding an AR term to the model. The lag at which the PACF cuts off is the indicated number of AR terms.
Exemplo n.º 19
from __future__ import print_function
from statsmodels.datasets.macrodata import load_pandas
from statsmodels.tsa.base.datetools import dates_from_range
from statsmodels.tsa.arima_model import ARIMA
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import statsmodels.api as sm

# let's examine an ARIMA model of CPI

cpi = load_pandas().data['cpi']
dates = dates_from_range('1959q1', '2009q3')
cpi.index = dates

res = ARIMA(cpi, (1, 1, 1), freq='Q').fit()

# we can look at the series

# maybe logs are better
log_cpi = np.log(cpi)

# check the ACF and PCF plots
acf, confint_acf = sm.tsa.acf(log_cpi.diff().values[1:], confint=95)
# center the confidence intervals about zero
# TODO: demean? --> confint_acf -= confint_acf.mean(1)[:, None]
pacf = sm.tsa.pacf(log_cpi.diff().values[1:], method='ols')
# confidence interval is now an option to pacf