Пример #1
0
def pacf_ols(x, nlags=40):
    '''Calculate partial autocorrelations

    Parameters
    ----------
    x : 1d array
        observations of time series for which pacf is calculated
    nlags : int
        Number of lags for which pacf is returned.  Lag 0 is not returned.

    Returns
    -------
    pacf : 1d array
        partial autocorrelations, maxlag+1 elements

    Notes
    -----
    This solves a separate OLS estimation for each desired lag.
    '''
    #TODO: add warnings for Yule-Walker
    #NOTE: demeaning and not using a constant gave incorrect answers?
    #JP: demeaning should have a better estimate of the constant
    #maybe we can compare small sample properties with a MonteCarlo
    xlags = lagmat(x, nlags)
    x0 = xlags[:,0]
    xlags = xlags[:,1:]
    #xlags = sm.add_constant(lagmat(x, nlags), prepend=True)
    xlags = sm.add_constant(xlags, prepend=True)
    pacf = [1.]
    for k in range(1, nlags+1):
        res = sm.OLS(x0[k:], xlags[k:,:k+1]).fit()
         #np.take(xlags[k:], range(1,k+1)+[-1],

        pacf.append(res.params[-1])
    return np.array(pacf)
Пример #2
0
def acorr_lm(x, maxlag=None, autolag='AIC', store=False):
    '''Lagrange Multiplier tests for autocorrelation

    not checked yet, copied from unitrood_adf with adjustments
    check array shapes because of the addition of the constant.
    written/copied without reference
    This is not Breush-Godfrey. BG adds lags of residual to exog in the
    design matrix for the auxiliary regression with residuals as endog,
    see Greene 12.7.1.

    Notes
    -----
    If x is calculated as y^2 for a time series y, then this test corresponds
    to the Engel test for autoregressive conditional heteroscedasticity (ARCH).
    TODO: get details and verify

    '''

    x = np.asarray(x)
    nobs = x.shape[0]
    if maxlag is None:
        #for adf from Greene referencing Schwert 1989
        maxlag = 12. * np.power(nobs/100., 1/4.)#nobs//4  #TODO: check default, or do AIC/BIC


    xdiff = np.diff(x)
    #
    xdall = lagmat(x[:-1,None], maxlag, trim='both')
    nobs = xdall.shape[0]
    xdall = np.c_[np.ones((nobs,1)), xdall]
    xshort = x[-nobs:]

    if store: resstore = ResultsStore()

    if autolag:
        #search for lag length with highest information criteria
        #Note: I use the same number of observations to have comparable IC
        results = {}
        for mlag in range(1,maxlag):
            results[mlag] = sm.OLS(xshort, xdall[:,:mlag+1]).fit()

        if autolag.lower() == 'aic':
            bestic, icbestlag = max((v.aic,k) for k,v in results.iteritems())
        elif autolag.lower() == 'bic':
            icbest, icbestlag = max((v.bic,k) for k,v in results.iteritems())
        else:
            raise ValueError("autolag can only be None, 'AIC' or 'BIC'")

        #rerun ols with best ic
        xdall = lagmat(x[:,None], icbestlag, trim='forward')
        nobs = xdall.shape[0]
        xdall = np.c_[np.ones((nobs,1)), xdall]
        xshort = x[-nobs:]
        usedlag = icbestlag
    else:
        usedlag = maxlag

    resols = sm.OLS(xshort, xdall[:,:usedlag+1]).fit()
    fval = resols.fvalue
    fpval = resols.f_pvalue
    lm = nobs * resols.rsquared
    lmpval = stats.chi2.sf(lm, usedlag)
    # Note: degrees of freedom for LM test is nvars minus constant = usedlags
    return fval, fpval, lm, lmpval

    if store:
        resstore.resols = resols
        resstore.usedlag = usedlag
        return fval, fpval, lm, lmpval, resstore
    else:
        return fval, fpval, lm, lmpval
Пример #3
0
    def fit(self, maxlag=None, method='ols', ic=None, trend='c', demean=True,
            penalty=False,
            start_params=None, solver=None, maxiter=35, full_output=1, disp=1,
            callback=None, **kwargs):
        """
        Fit the unconditional maximum likelihood of an AR(p) process.

        Parameters
        ----------
        start_params : array-like, optional
            A first guess on the parameters.  Defaults is a vector of zeros.
        method : str {'ols', 'yw'. 'mle', 'umle'}, optional
            ols - Ordinary Leasy Squares
            yw - Yule-Walker
            mle - conditional maximum likelihood
            umle - unconditional maximum likelihood
        solver : str or None, optional
            Unconstrained solvers:
                Default is 'bfgs', 'newton' (newton-raphson), 'ncg'
                (Note that previous 3 are not recommended at the moment.)
                and 'powell'
            Constrained solvers:
                'bfgs-b', 'tnc'
            See notes.
        maxiter : int, optional
            The maximum number of function evaluations. Default is 35.
        tol = float
            The convergence tolerance.  Default is 1e-08.
        penalty : bool
            Whether or not to use a penalty function.  Default is False,
            though this is ignored at the moment and the penalty is always
            used if appropriate.  See notes.

        Notes
        -----
        The unconstrained solvers use a quadratic penalty (regardless if
        penalty kwd is True or False) in order to ensure that the solution
        stays within (-1,1).  The constrained solvers default to using a bound
        of (-.999,.999).

        See also
        --------
        scikits.statsmodels.model.LikelihoodModel.fit for more information
        on using the solvers.

        The below is the docstring from
        scikits.statsmodels.LikelihoodModel.fit
        """
        self.penalty = penalty
        method = method.lower()
        nobs = self.nobs
        if maxlag is None:
            maxlag = round(12*(nobs/100.)**(1/4.))
        avobs = nobs - maxlag
        self.avobs = avobs
        laglen = maxlag
        self.laglen = laglen
        if demean:
            endog = self.endog.copy() # have to copy if demeaning
            mean = endog.mean()
            endog -= mean
            self.endog_mean = mean
        else:
            endog = self.endog
        # LHS
        Y = endog[laglen:,:]
        # make lagged RHS
        X = lagmat(endog, maxlag=laglen, trim='both')[:,1:]
        if self.exog is not None:
            X = np.column_stack((self.exog[laglen:,:], X))
        # Handle constant, etc.
        if trend == 'c':
            trendorder = 1
        elif trend == 'nc':
            trendorder = 0
        elif trend == 'ct':
            trendorder = 2
        elif trend == 'ctt':
            trendorder = 3
        X = add_trend(X,prepend=True, trend=trend)
        self.trendorder = trendorder

        self.Y = Y
        self.X = X

        if solver:
            solver = solver.lower()
#TODO: allow user-specified penalty function
#        if penalty and method not in ['bfgs_b','tnc','cobyla','slsqp']:
#            minfunc = lambda params : -self.loglike(params) - \
#                    self.penfunc(params)
#        else:
        if method == "mle":
            if not solver: # make default?
                solver = 'newton'
            if not start_params:
                start_params = np.zeros((X.shape[1]))
            if solver in ['newton', 'bfgs', 'ncg']:
                return super(AR, self).fit(start_params=start_params, method=solver,
                    maxiter=maxiter, full_output=full_output, disp=disp,
                    callback=callback, **kwargs)
#                return retvals
        elif method == "umle":
#TODO: move this stuff up to LikelihoodModel.fit
            minfunc = lambda params: -self.loglike(params)
            bounds = [(-.999,.999)]   # assume stationarity
            if start_params == None:
                start_params = np.array([0]) # assumes AR(1)
            if method == 'bfgs-b':
                retval = optimize.fmin_l_bfgs_b(minfunc, start_params,
                        approx_grad=True, bounds=bounds)
                self.params, self.llf = retval[0:2]
            if method == 'tnc':
                retval = optimize.fmin_tnc(minfunc, start_params,
                        approx_grad=True, bounds = bounds)
                self.params = retval[0]
            if method == 'powell':
                retval = optimize.fmin_powell(minfunc,start_params)
                self.params = retval[None]
#TODO: write regression tests for Pauli's branch so that
# new line_search and optimize.nonlin can get put in.
# http://projects.scipy.org/scipy/ticket/791
#            if method == 'broyden':
#                retval = optimize.broyden2(minfunc, [.5], verbose=True)
#                self.results = retvar
        elif method == "ols":
            arfit = OLS(Y,X).fit()
            params = arfit.params
            omega = None
            self.params = params
        elif method == "yw":
            params, omega = sm.regression.yule_walker(endog, order=maxlag,
                    method="mle", demean=False)
            self.params = params
Пример #4
0
def adfuller(x, maxlag=None, regression="c", autolag='AIC',
    store=False, regresults=False):
    '''Augmented Dickey-Fuller unit root test

    The Augmented Dickey-Fuller test can be used to test for a unit root in a
    univariate process in the presence of serial correlation.

    Parameters
    ----------
    x : array_like, 1d
        data series
    maxlag : int
        Maximum lag which is included in test, default 12*(nobs/100)^{1/4}
    regression : str {'c','ct','ctt','nc'}
        Constant and trend order to include in regression
        * 'c' : constant only
        * 'ct' : constant and trend
        * 'ctt' : constant, and linear and quadratic trend
        * 'nc' : no constant, no trend
    autolag : {'AIC', 'BIC', 't-stat', None}
        * if None, then maxlag lags are used
        * if 'AIC' or 'BIC', then the number of lags is chosen to minimize the
          corresponding information criterium
        * 't-stat' based choice of maxlag.  Starts with maxlag and drops a
          lag until the t-statistic on the last lag length is significant at
          the 95 % level.
    store : bool
        If True, then a result instance is returned additionally to
        the adf statistic
    regresults : bool
        If True, the full regression results are returned.

    Returns
    -------
    adf : float
        Test statistic
    pvalue : float
        MacKinnon's approximate p-value based on MacKinnon (1994)
    usedlag : int
        Number of lags used.
    nobs : int
        Number of observations used for the ADF regression and calculation of
        the critical values.
    critical values : dict
        Critical values for the test statistic at the 1 %, 5 %, and 10 % levels.
        Based on MacKinnon (2010)
    icbest : float
        The maximized information criterion if autolag is not None.
    regresults : RegressionResults instance
        The
    resstore : (optional) instance of ResultStore
        an instance of a dummy class with results attached as attributes

    Notes
    -----
    If the p-value is close to significant, then the critical values should be
    used to judge whether to accept or reject the null.

    Examples
    --------
    see example script

    References
    ----------
    Greene
    Hamilton


    P-Values (regression surface approximation)
    MacKinnon, J.G. 1994.  "Approximate asymptotic distribution functions for
        unit-root and cointegration tests.  `Journal of Business and Economic
        Statistics` 12, 167-76.

    Critical values
    MacKinnon, J.G. 2010. "Critical Values for Cointegration Tests."
        Queen's University, Dept of Economics, Working Papers.  Available at
        http://ideas.repec.org/p/qed/wpaper/1227.html
    '''
    regression = regression.lower()
    if regression not in ['c','nc','ct','ctt']:
        raise ValueError("regression option %s not understood") % regression
    x = np.asarray(x)
    nobs = x.shape[0]

    if regression == 'c':
        trendorder = 0
    elif regression == 'nc':
        trendorder = -1
    elif regression == 'ct':
        trendorder = 1
    elif regression == 'ctt':
        trendorder = 2
    # only make the trend once with biggest nobs
    trend = np.vander(np.arange(nobs), trendorder+1)

    if maxlag is None:
        #from Greene referencing Schwert 1989
        maxlag = 12. * np.power(nobs/100., 1/4.)

    xdiff = np.diff(x)
    xdall = lagmat(xdiff[:,None], maxlag, trim='both')
    nobs = xdall.shape[0]

    xdall[:,0] = x[-nobs-1:-1] # replace 0 xdiff with level of x
    xdshort = xdiff[-nobs:]
#    xdshort = x[-nobs:]
#TODO: allow for 2nd xdshort as endog, with Phillips Perron or DF test?

    if store:
        resstore = ResultsStore()
    if autolag:
        if trendorder is not -1:
            fullRHS = np.column_stack((trend[:nobs],xdall))
        else:
            fullRHS = xdall
        lagstart = trendorder + 1
        #search for lag length with highest information criteria
        #Note: use the same number of observations to have comparable IC
        icbest, bestlag = _autolag(sm.OLS, xdshort, fullRHS, lagstart,
                maxlag, autolag)

        #rerun ols with best autolag
        xdall = lagmat(xdiff[:,None], bestlag, trim='both')
        nobs = xdall.shape[0]
#        trend = np.vander(np.arange(nobs), trendorder+1)
        xdall[:,0] = x[-nobs-1:-1] # replace 0 xdiff with level of x
        xdshort = xdiff[-nobs:]
        usedlag = bestlag
    else:
        usedlag = maxlag

    resols = sm.OLS(xdshort, np.column_stack([xdall[:,:usedlag+1],
        trend[:nobs]])).fit()
    #NOTE: should be usedlag+1 since the first column is the level?
    adfstat = resols.t(0)
#    adfstat = (resols.params[0]-1.0)/resols.bse[0]
    # the "asymptotically correct" z statistic is obtained as
    # nobs/(1-np.sum(resols.params[1:-(trendorder+1)])) (resols.params[0] - 1)
    # I think this is the statistic that is used for series that are integrated
    # for orders higher than I(1), ie., not ADF but cointegration tests.

    # Get approx p-value and critical values
    pvalue = mackinnonp(adfstat, regression=regression, N=1)
    critvalues = mackinnoncrit(N=1, regression=regression, nobs=nobs)
    critvalues = {"1%" : critvalues[0], "5%" : critvalues[1],
            "10%" : critvalues[2]}
    if store:
        resstore.resols = resols
        resstore.usedlag = usedlag
        resstore.adfstat = adfstat
        resstore.critvalues = critvalues
        resstore.nobs = nobs
        resstore.H0 = "The coefficient on the lagged level equals 1"
        resstore.HA = "The coefficient on the lagged level < 1"
        resstore.icbest = icbest
        return adfstat, pvalue, critvalues, resstore
    else:
        if not autolag:
            return adfstat, pvalue, usedlag, nobs, critvalues
        else:
            return adfstat, pvalue, usedlag, nobs, critvalues, icbest