def _estimate_svar(self, start_params, lags, maxiter, maxfun, trend='c', solver="nm", override=False): """ lags : int trend : string or None As per above """ k_trend = util.get_trendorder(trend) y = self.endog z = util.get_var_endog(y, lags, trend=trend, has_constant='raise') y_sample = y[lags:] # Lutkepohl p75, about 5x faster than stated formula var_params = np.linalg.lstsq(z, y_sample, rcond=-1)[0] resid = y_sample - np.dot(z, var_params) # Unbiased estimate of covariance matrix $\Sigma_u$ of the white noise # process $u$ # equivalent definition # .. math:: \frac{1}{T - Kp - 1} Y^\prime (I_T - Z (Z^\prime Z)^{-1} # Z^\prime) Y # Ref: Lutkepohl p.75 # df_resid right now is T - Kp - 1, which is a suggested correction avobs = len(y_sample) df_resid = avobs - (self.neqs * lags + k_trend) sse = np.dot(resid.T, resid) #TODO: should give users the option to use a dof correction or not omega = sse / df_resid self.sigma_u = omega A, B = self._solve_AB(start_params, override=override, solver=solver, maxiter=maxiter, maxfun=maxfun) A_mask = self.A_mask B_mask = self.B_mask return SVARResults(y, z, var_params, omega, lags, names=self.endog_names, trend=trend, dates=self.data.dates, model=self, A=A, B=B, A_mask=A_mask, B_mask=B_mask)
def __init__(self, data, lag_order=1, window=None, window_type='expanding', trend='c', min_periods=None): self.lag_order = lag_order self.neqs = len(data.columns) self._y_orig = data # TODO: deal with trend self._x_orig = _make_lag_matrix(data, lag_order) self._x_orig['intercept'] = 1 (self.y, self.x, self.x_filtered, self._index, self._time_has_obs) = _filter_data(self._y_orig, self._x_orig) self.lag_order = lag_order self.trendorder = util.get_trendorder(trend) self._set_window(window_type, window, min_periods) warnings.warn( 'DynamicPanelVAR is deprecated and will be removed in a ' 'future version, use VAR or VARMAX.', DeprecationWarning)
def __init__(self, endog, endog_lagged, params, sigma_u, lag_order, model=None, trend='c', names=None, dates=None): self.model = model self.y = self.endog = endog #keep alias for now self.ys_lagged = self.endog_lagged = endog_lagged #keep alias for now self.dates = dates self.n_totobs, neqs = self.y.shape self.nobs = self.n_totobs - lag_order k_trend = util.get_trendorder(trend) if k_trend > 0: # make this the polynomial trend order trendorder = k_trend - 1 else: trendorder = None self.k_trend = k_trend self.trendorder = trendorder self.exog_names = util.make_lag_names(names, lag_order, k_trend) self.params = params # Initialize VARProcess parent class # construct coefficient matrices # Each matrix needs to be transposed reshaped = self.params[self.k_trend:] reshaped = reshaped.reshape((lag_order, neqs, neqs)) # Need to transpose each coefficient matrix intercept = self.params[0] coefs = reshaped.swapaxes(1, 2).copy() super(VARResults, self).__init__(coefs, intercept, sigma_u, names=names)
def __init__(self, data, lag_order=1, window=None, window_type='expanding', trend='c', min_periods=None): self.lag_order = lag_order self.names = list(data.columns) self.neqs = len(self.names) self._y_orig = data # TODO: deal with trend self._x_orig = _make_lag_matrix(data, lag_order) self._x_orig['intercept'] = 1 (self.y, self.x, self.x_filtered, self._index, self._time_has_obs) = _filter_data(self._y_orig, self._x_orig) self.lag_order = lag_order self.trendorder = util.get_trendorder(trend) self._set_window(window_type, window, min_periods)
def fit(self, maxlags=None, method='ols', ic=None, trend='c', verbose=False): """ Fit the VAR model Parameters ---------- maxlags : int Maximum number of lags to check for order selection, defaults to 12 * (nobs/100.)**(1./4), see select_order function method : {'ols'} Estimation method to use ic : {'aic', 'fpe', 'hqic', 'bic', None} Information criterion to use for VAR order selection. aic : Akaike fpe : Final prediction error hqic : Hannan-Quinn bic : Bayesian a.k.a. Schwarz verbose : bool, default False Print order selection output to the screen trend : str {"c", "ct", "ctt", "nc"} "c" - add constant "ct" - constant and trend "ctt" - constant, linear and quadratic trend "nc" - co constant, no trend Note that these are prepended to the columns of the dataset. Notes ----- Lutkepohl pp. 146-153 Returns ------- est : VARResults """ lags = maxlags if trend not in ['c', 'ct', 'ctt', 'nc']: raise ValueError("trend '{}' not supported for VAR".format(trend)) if ic is not None: selections = self.select_order(maxlags=maxlags, verbose=verbose) if ic not in selections: raise Exception("%s not recognized, must be among %s" % (ic, sorted(selections))) lags = selections[ic] if verbose: print('Using %d based on %s criterion' % (lags, ic)) else: if lags is None: lags = 1 k_trend = util.get_trendorder(trend) self.exog_names = util.make_lag_names(self.endog_names, lags, k_trend) self.nobs = len(self.endog) - lags return self._estimate_var(lags, trend=trend)
def fit(self, maxlags=None, method='ols', ic=None, trend='c', verbose=False): """ Fit the VAR model Parameters ---------- maxlags : int Maximum number of lags to check for order selection, defaults to 12 * (nobs/100.)**(1./4), see select_order function method : {'ols'} Estimation method to use ic : {'aic', 'fpe', 'hqic', 'bic', None} Information criterion to use for VAR order selection. aic : Akaike fpe : Final prediction error hqic : Hannan-Quinn bic : Bayesian a.k.a. Schwarz verbose : bool, default False Print order selection output to the screen trend, str {"c", "ct", "ctt", "nc"} "c" - add constant "ct" - constant and trend "ctt" - constant, linear and quadratic trend "nc" - co constant, no trend Note that these are prepended to the columns of the dataset. Notes ----- Lutkepohl pp. 146-153 Returns ------- est : VARResults """ lags = maxlags if trend not in ['c', 'ct', 'ctt', 'nc']: raise ValueError("trend '{}' not supported for VAR".format(trend)) if ic is not None: selections = self.select_order(maxlags=maxlags, verbose=verbose) if ic not in selections: raise Exception("%s not recognized, must be among %s" % (ic, sorted(selections))) lags = selections[ic] if verbose: print('Using %d based on %s criterion' % (lags, ic)) else: if lags is None: lags = 1 k_trend = util.get_trendorder(trend) self.exog_names = util.make_lag_names(self.endog_names, lags, k_trend) self.nobs = len(self.endog) - lags return self._estimate_var(lags, trend=trend)
def __init__(self, endog, endog_lagged, params, sigma_u, lag_order, A=None, B=None, A_mask=None, B_mask=None, model=None, trend='c', names=None, dates=None): self.model = model self.y = self.endog = endog #keep alias for now self.ys_lagged = self.endog_lagged = endog_lagged #keep alias for now self.dates = dates self.n_totobs, self.neqs = self.y.shape self.nobs = self.n_totobs - lag_order k_trend = util.get_trendorder(trend) if k_trend > 0: # make this the polynomial trend order trendorder = k_trend - 1 else: trendorder = None self.k_trend = k_trend self.k_exog = k_trend # now (0.9) required by VARProcess self.trendorder = trendorder self.exog_names = util.make_lag_names(names, lag_order, k_trend) self.params = params self.sigma_u = sigma_u # Each matrix needs to be transposed reshaped = self.params[self.k_trend:] reshaped = reshaped.reshape((lag_order, self.neqs, self.neqs)) # Need to transpose each coefficient matrix intercept = self.params[0] coefs = reshaped.swapaxes(1, 2).copy() #SVAR components #TODO: if you define these here, you don't also have to define #them in SVAR process, but I left them for now -ss self.A = A self.B = B self.A_mask = A_mask self.B_mask = B_mask super(SVARResults, self).__init__(coefs, intercept, sigma_u, A, B, names=names)
def test_get_trendorder(): results = { 'c' : 1, 'nc' : 0, 'ct' : 2, 'ctt' : 3 } for t, trendorder in iteritems(results): assert(util.get_trendorder(t) == trendorder)
def test_get_trendorder(): results = { 'c' : 1, 'nc' : 0, 'ct' : 2, 'ctt' : 3 } for t, trendorder in results.iteritems(): assert(util.get_trendorder(t) == trendorder)
def __init__( self, endog, endog_lagged, params, sigma_u, lag_order, A=None, B=None, A_mask=None, B_mask=None, model=None, trend="c", names=None, dates=None, ): self.model = model self.y = self.endog = endog # keep alias for now self.ys_lagged = self.endog_lagged = endog_lagged # keep alias for now self.dates = dates self.n_totobs, self.neqs = self.y.shape self.nobs = self.n_totobs - lag_order k_trend = util.get_trendorder(trend) if k_trend > 0: # make this the polynomial trend order trendorder = k_trend - 1 else: trendorder = None self.k_trend = k_trend self.trendorder = trendorder self.exog_names = util.make_lag_names(names, lag_order, k_trend) self.params = params self.sigma_u = sigma_u # Each matrix needs to be transposed reshaped = self.params[self.k_trend :] reshaped = reshaped.reshape((lag_order, self.neqs, self.neqs)) # Need to transpose each coefficient matrix intercept = self.params[0] coefs = reshaped.swapaxes(1, 2).copy() # SVAR components # TODO: if you define these here, you don't also have to define # them in SVAR process, but I left them for now -ss self.A = A self.B = B self.A_mask = A_mask self.B_mask = B_mask super(SVARResults, self).__init__(coefs, intercept, sigma_u, A, B, names=names)
def _stackX(self, k_ar, trend): """ Private method to build the RHS matrix for estimation. Columns are trend terms then lags. """ endog = self.endog X = lagmat(endog, maxlag=k_ar, trim='both') k_trend = util.get_trendorder(trend) if k_trend: X = add_trend(X, prepend=True, trend=trend) self.k_trend = k_trend return X
def _estimate_svar(self, start_params, lags, maxiter, maxfun, trend="c", solver="nm", override=False): """ lags : int trend : string or None As per above """ k_trend = util.get_trendorder(trend) y = self.endog z = util.get_var_endog(y, lags, trend=trend) y_sample = y[lags:] # Lutkepohl p75, about 5x faster than stated formula var_params = np.linalg.lstsq(z, y_sample)[0] resid = y_sample - np.dot(z, var_params) # Unbiased estimate of covariance matrix $\Sigma_u$ of the white noise # process $u$ # equivalent definition # .. math:: \frac{1}{T - Kp - 1} Y^\prime (I_T - Z (Z^\prime Z)^{-1} # Z^\prime) Y # Ref: Lutkepohl p.75 # df_resid right now is T - Kp - 1, which is a suggested correction avobs = len(y_sample) df_resid = avobs - (self.neqs * lags + k_trend) sse = np.dot(resid.T, resid) # TODO: should give users the option to use a dof correction or not omega = sse / df_resid self.sigma_u = omega A, B = self._solve_AB(start_params, override=override, solver=solver, maxiter=maxiter, maxfun=maxfun) A_mask = self.A_mask B_mask = self.B_mask return SVARResults( y, z, var_params, omega, lags, names=self.endog_names, trend=trend, dates=self.data.dates, model=self, A=A, B=B, A_mask=A_mask, B_mask=B_mask, )
def _estimate_var(self, lags, offset=0, trend="c"): """ lags : int offset : int Periods to drop from beginning-- for order selection so it's an apples-to-apples comparison trend : string or None As per above """ # have to do this again because select_order doesn't call fit self.k_trend = k_trend = util.get_trendorder(trend) if offset < 0: # pragma: no cover raise ValueError("offset must be >= 0") y = self.y[offset:] z = util.get_var_endog(y, lags, trend=trend) y_sample = y[lags:] # Lutkepohl p75, about 5x faster than stated formula params = np.linalg.lstsq(z, y_sample)[0] resid = y_sample - np.dot(z, params) # Unbiased estimate of covariance matrix $\Sigma_u$ of the white noise # process $u$ # equivalent definition # .. math:: \frac{1}{T - Kp - 1} Y^\prime (I_T - Z (Z^\prime Z)^{-1} # Z^\prime) Y # Ref: Lutkepohl p.75 # df_resid right now is T - Kp - 1, which is a suggested correction avobs = len(y_sample) df_resid = avobs - (self.neqs * lags + k_trend) sse = np.dot(resid.T, resid) omega = sse / df_resid varfit = VARResults( y, z, params, omega, lags, names=self.endog_names, trend=trend, dates=self.data.dates, model=self ) return VARResultsWrapper(varfit)
def predict(self, params, start=None, end=None, lags=1, trend='c'): """ Returns in-sample predictions or forecasts """ if start is None: start = k_ar # Handle start, end start, end, out_of_sample, prediction_index = ( self._get_prediction_index(start, end)) if end < start: raise ValueError("end is before start") if end == start + out_of_sample: return np.array([]) k_trend = util.get_trendorder(trend) k = self.neqs k_ar = lags predictedvalues = np.zeros((end + 1 - start + out_of_sample, k)) if k_trend != 0: intercept = params[:k_trend] predictedvalues += intercept y = self.y X = util.get_var_endog(y, lags, trend=trend, has_constant='raise') fittedvalues = np.dot(X, params) fv_start = start - k_ar pv_end = min(len(predictedvalues), len(fittedvalues) - fv_start) fv_end = min(len(fittedvalues), end-k_ar+1) predictedvalues[:pv_end] = fittedvalues[fv_start:fv_end] if not out_of_sample: return predictedvalues # fit out of sample y = y[-k_ar:] coefs = params[k_trend:].reshape((k_ar, k, k)).swapaxes(1,2) predictedvalues[pv_end:] = forecast(y, coefs, intercept, out_of_sample) return predictedvalues
def _estimate_var(self, lags, offset=0, trend='c'): """ lags : int offset : int Periods to drop from beginning-- for order selection so it's an apples-to-apples comparison trend : string or None As per above """ # have to do this again because select_order doesn't call fit self.k_trend = k_trend = util.get_trendorder(trend) if offset < 0: # pragma: no cover raise ValueError('offset must be >= 0') y = self.y[offset:] z = util.get_var_endog(y, lags, trend=trend) y_sample = y[lags:] # Lutkepohl p75, about 5x faster than stated formula params = np.linalg.lstsq(z, y_sample)[0] resid = y_sample - np.dot(z, params) # Unbiased estimate of covariance matrix $\Sigma_u$ of the white noise # process $u$ # equivalent definition # .. math:: \frac{1}{T - Kp - 1} Y^\prime (I_T - Z (Z^\prime Z)^{-1} # Z^\prime) Y # Ref: Lutkepohl p.75 # df_resid right now is T - Kp - 1, which is a suggested correction avobs = len(y_sample) df_resid = avobs - (self.neqs * lags + k_trend) sse = np.dot(resid.T, resid) omega = sse / df_resid varfit = VARResults(y, z, params, omega, lags, names=self.endog_names, trend=trend, dates=self.data.dates, model=self) return VARResultsWrapper(varfit)
def __init__(self, data, lag_order=1, window=None, window_type='expanding', trend='c', min_periods=None): self.lag_order = lag_order self.neqs = len(data.columns) self._y_orig = data # TODO: deal with trend self._x_orig = _make_lag_matrix(data, lag_order) self._x_orig['intercept'] = 1 (self.y, self.x, self.x_filtered, self._index, self._time_has_obs) = _filter_data(self._y_orig, self._x_orig) self.lag_order = lag_order self.trendorder = util.get_trendorder(trend) self._set_window(window_type, window, min_periods) warnings.warn('DynamicPanelVAR is depricated and will be removed in a future version, use VAR or VARMAX.', DeprecationWarning)
def test_get_trendorder(): results = {"c": 1, "n": 0, "ct": 2, "ctt": 3} for t, trendorder in results.items(): assert util.get_trendorder(t) == trendorder
def _estimate_var(self, lags, offset=0, trend='c'): """ lags : int Lags of the endogenous variable. offset : int Periods to drop from beginning-- for order selection so it's an apples-to-apples comparison trend : string or None As per above """ # have to do this again because select_order doesn't call fit self.k_trend = k_trend = util.get_trendorder(trend) if offset < 0: # pragma: no cover raise ValueError('offset must be >= 0') nobs = self.n_totobs - lags - offset endog = self.endog[offset:] exog = None if self.exog is None else self.exog[offset:] z = util.get_var_endog(endog, lags, trend=trend, has_constant='raise') if exog is not None: # TODO: currently only deterministic terms supported (exoglags==0) # and since exoglags==0, x will be an array of size 0. x = util.get_var_endog(exog[-nobs:], 0, trend="nc", has_constant="raise") x_inst = exog[-nobs:] x = np.column_stack((x, x_inst)) del x_inst # free memory temp_z = z z = np.empty((x.shape[0], x.shape[1] + z.shape[1])) z[:, :self.k_trend] = temp_z[:, :self.k_trend] z[:, self.k_trend:self.k_trend + x.shape[1]] = x z[:, self.k_trend + x.shape[1]:] = temp_z[:, self.k_trend:] del temp_z, x # free memory # the following modification of z is necessary to get the same results # as JMulTi for the constant-term-parameter... for i in range(self.k_trend): if (np.diff(z[:, i]) == 1).all(): # modify the trend-column z[:, i] += lags # make the same adjustment for the quadratic term if (np.diff(np.sqrt(z[:, i])) == 1).all(): z[:, i] = (np.sqrt(z[:, i]) + lags)**2 y_sample = endog[lags:] ################################################################################# ### TOPOLOGY CONSTRAINED VAR MODEL FITTING ### retrieve sizes associated with coefficient matrix c num_rows = z.shape[1] num_nodes = y_sample.shape[1] ### the loss function to minimize difference between [Z]*[C] and [Y], element-wise square-sum of [Z]*[C]-[Y] def loss(c): c = c.reshape( (num_rows, num_nodes)) #convert c from 1D array to 2D return np.sum(np.square((np.dot(z, c) - y_sample))) ### initial value of variable c0 = np.zeros((num_rows, num_nodes)) ### bounds of variables to be optimized if self.bounds is not None: bnds = np.tensordot(np.ones(num_rows * num_nodes), self.bounds, axes=0) else: bnds = None ### if topology is used as constraints if self.adjacency is not None: ### Index matrix (with the same size of c) to identify the zero coefficients corresponding to no-connection in graph ## H = np.ones((1,num_nodes),dtype=int) # First row of coefficient (bias) should be zero H = np.zeros((1, num_nodes), dtype=int) for i in range(0, lags): H = np.append(H, (1 - self.adjacency.T), axis=0) ### constraints: ### based on adjacency matrix, non-adjacent coefficients indicated by H are zeros cons = ({ 'type': 'eq', 'fun': lambda c: np.sum( np.square(H * c.reshape((num_rows, num_nodes)))) }) if self.adjacency.all( ): ## if with full connections, no constraint should be imposed res = minimize(loss, c0, method='SLSQP', constraints=(), bounds=bnds, options={'disp': True}) else: res = minimize(loss, c0, method='SLSQP', constraints=cons, bounds=bnds, options={'disp': True}) ### only Coefficients VALUE RANGE CONSTRAINED VAR MODEL FITTING else: ## H = np.ones((1,num_nodes),dtype=int) # First row of coefficient (bias) should be zero ## for i in range(0,lags): ## H=np.append(H, np.zeros((num_nodes,num_nodes)), axis=0) ## cons = ({'type': 'eq', ## 'fun' : lambda c: np.sum(np.square(H*c.reshape((num_rows, num_nodes)))) }) res = minimize(loss, c0, method='SLSQP', constraints=(), bounds=bnds, options={'disp': True}) params = res.x.reshape((num_rows, num_nodes)) ### ################################################################################### # L�tkepohl p75, about 5x faster than stated formula # params = np.linalg.lstsq(z, y_sample, rcond=1e-15)[0] resid = y_sample - np.dot(z, params) # Unbiased estimate of covariance matrix $\Sigma_u$ of the white noise # process $u$ # equivalent definition # .. math:: \frac{1}{T - Kp - 1} Y^\prime (I_T - Z (Z^\prime Z)^{-1} # Z^\prime) Y # Ref: L�tkepohl p.75 # df_resid right now is T - Kp - 1, which is a suggested correction avobs = len(y_sample) if exog is not None: k_trend += exog.shape[1] df_resid = avobs - (self.neqs * lags + k_trend) sse = np.dot(resid.T, resid) omega = sse / df_resid varfit = VARResults(endog, z, params, omega, lags, names=self.endog_names, trend=trend, dates=self.data.dates, model=self, exog=self.exog) return VARResultsWrapper(varfit)
def test_get_trendorder(): results = {"c": 1, "nc": 0, "ct": 2, "ctt": 3} for t, trendorder in results.iteritems(): assert util.get_trendorder(t) == trendorder
start_time = "2019-08-16 16:00:00" end_time = "2019-08-16 16:30:00" df = load_data( start_time, end_time, crypto_symbols, [], True ) data = df.to_numpy() exog = [] exog.append(np.ones(len(data)).reshape(-1, 1)) exog = np.hstack(exog) var_model = VAR(data, exog) k_trend = util.get_trendorder('c') n_totobs = len(data) p = 1 maxlags = 5 n_totobs = len(data) lags = p offset = maxlags + 1 - p nobs = n_totobs - lags - offset data = data[offset:] exog = exog[offset:] print(data) Z = np.array([data[t-lags : t][::-1].ravel() for t in range(lags, len(data))]) print(Z) z = util.get_var_endog(data, lags, trend='c', has_constant='raise') print(z.shape) exit()