def pdf(self, endog_predict=None, exog_predict=None): r""" Evaluate the probability density function. Parameters ---------- endog_predict: array_like, optional Evaluation data for the dependent variables. If unspecified, the training data is used. exog_predict: array_like, optional Evaluation data for the independent variables. Returns ------- pdf: array_like The value of the probability density at `endog_predict` and `exog_predict`. Notes ----- The formula for the conditional probability density is: .. math:: f(X|Y)=\frac{f(X,Y)}{f(Y)} with .. math:: f(X)=\prod_{s=1}^{q}h_{s}^{-1}k \left(\frac{X_{is}-X_{js}}{h_{s}}\right) where :math:`k` is the appropriate kernel for each variable. """ if endog_predict is None: endog_predict = self.endog else: endog_predict = _adjust_shape(endog_predict, self.k_dep) if exog_predict is None: exog_predict = self.exog else: exog_predict = _adjust_shape(exog_predict, self.k_indep) pdf_est = [] data_predict = np.column_stack((endog_predict, exog_predict)) for i in xrange(np.shape(data_predict)[0]): f_yx = gpke(self.bw, data=self.data, data_predict=data_predict[i, :], var_type=(self.dep_type + self.indep_type)) f_x = gpke(self.bw[self.k_dep:], data=self.exog, data_predict=exog_predict[i, :], var_type=self.indep_type) pdf_est.append(f_yx / f_x) return np.squeeze(pdf_est)
def _est_loc_constant(self, bw, endog, exog, data_predict): """ Local constant estimator of g(x) in the regression y = g(x) + e Parameters ---------- bw : array_like Array of bandwidth value(s). endog : 1D array_like The dependent variable. exog : 1D or 2D array_like The independent variable(s). data_predict : 1D or 2D array_like The point(s) at which the density is estimated. Returns ------- G : ndarray The value of the conditional mean at `data_predict`. B_x : ndarray The marginal effects. """ ker_x = gpke(bw, data=exog, data_predict=data_predict, var_type=self.var_type, #ukertype='aitchison_aitken_reg', #okertype='wangryzin_reg', tosum=False) ker_x = np.reshape(ker_x, np.shape(endog)) G_numer = (ker_x * endog).sum(axis=0) G_denom = ker_x.sum(axis=0) G = G_numer / G_denom nobs = exog.shape[0] f_x = G_denom / float(nobs) ker_xc = gpke(bw, data=exog, data_predict=data_predict, var_type=self.var_type, ckertype='d_gaussian', #okertype='wangryzin_reg', tosum=False) ker_xc = ker_xc[:, np.newaxis] d_mx = -(endog * ker_xc).sum(axis=0) / float(nobs) #* np.prod(bw[:, ix_cont])) d_fx = -ker_xc.sum(axis=0) / float(nobs) #* np.prod(bw[:, ix_cont])) B_x = d_mx / f_x - G * d_fx / f_x B_x = (G_numer * d_fx - G_denom * d_mx) / (G_denom**2) #B_x = (f_x * d_mx - m_x * d_fx) / (f_x ** 2) return G, B_x
def pdf(self, data_predict=None): r""" Evaluate the probability density function. Parameters ---------- data_predict: array_like, optional Points to evaluate at. If unspecified, the training data is used. Returns ------- pdf_est: array_like Probability density function evaluated at `data_predict`. Notes ----- The probability density is given by the generalized product kernel estimator: .. math:: K_{h}(X_{i},X_{j}) = \prod_{s=1}^{q}h_{s}^{-1}k\left(\frac{X_{is}-X_{js}}{h_{s}}\right) """ if data_predict is None: data_predict = self.data else: data_predict = _adjust_shape(data_predict, self.k_vars) pdf_est = [] for i in xrange(np.shape(data_predict)[0]): pdf_est.append(gpke(self.bw, data=self.data, data_predict=data_predict[i, :], var_type=self.var_type) / self.nobs) pdf_est = np.squeeze(pdf_est) return pdf_est
def loo_likelihood(self, bw, func=lambda x: x): r""" Returns the leave-one-out likelihood function. The leave-one-out likelihood function for the unconditional KDE. Parameters ---------- bw: array_like The value for the bandwidth parameter(s). func: callable, optional Function to transform the likelihood values (before summing); for the log likelihood, use ``func=np.log``. Default is ``f(x) = x``. Notes ----- The leave-one-out kernel estimator of :math:`f_{-i}` is: .. math:: f_{-i}(X_{i})=\frac{1}{(n-1)h} \sum_{j=1,j\neq i}K_{h}(X_{i},X_{j}) where :math:`K_{h}` represents the generalized product kernel estimator: .. math:: K_{h}(X_{i},X_{j}) = \prod_{s=1}^{q}h_{s}^{-1}k\left(\frac{X_{is}-X_{js}}{h_{s}}\right) """ LOO = LeaveOneOut(self.data) L = 0 for i, X_not_i in enumerate(LOO): f_i = gpke(bw, data=-X_not_i, data_predict=-self.data[i, :], var_type=self.var_type) L += func(f_i) return -L
def _est_loc_linear(self, bw, endog, exog, data_predict): """ Local linear estimator of g(x) in the regression ``y = g(x) + e``. Parameters ---------- bw: array_like Vector of bandwidth value(s). endog: 1D array_like The dependent variable. exog: 1D or 2D array_like The independent variable(s). data_predict: 1D array_like of length K, where K is the number of variables. The point at which the density is estimated. Returns ------- D_x: array_like The value of the conditional mean at `data_predict`. Notes ----- See p. 81 in [1] and p.38 in [2] for the formulas. Unlike other methods, this one requires that `data_predict` be 1D. """ nobs, k_vars = exog.shape ker = gpke(bw, data=exog, data_predict=data_predict, var_type=self.var_type, #ukertype='aitchison_aitken_reg', #okertype='wangryzin_reg', tosum=False) / float(nobs) # Create the matrix on p.492 in [7], after the multiplication w/ K_h,ij # See also p. 38 in [2] #ix_cont = np.arange(self.k_vars) # Use all vars instead of continuous only # Note: because ix_cont was defined here such that it selected all # columns, I removed the indexing with it from exog/data_predict. # Convert ker to a 2-D array to make matrix operations below work ker = ker[:, np.newaxis] M12 = exog - data_predict M22 = np.dot(M12.T, M12 * ker) M12 = (M12 * ker).sum(axis=0) M = np.empty((k_vars + 1, k_vars + 1)) M[0, 0] = ker.sum() M[0, 1:] = M12 M[1:, 0] = M12 M[1:, 1:] = M22 ker_endog = ker * endog V = np.empty((k_vars + 1, 1)) V[0, 0] = ker_endog.sum() V[1:, 0] = ((exog - data_predict) * ker_endog).sum(axis=0) mean_mfx = np.dot(np.linalg.pinv(M), V) mean = mean_mfx[0] mfx = mean_mfx[1:, :] return mean, mfx
def _est_loc_linear(self, bw, endog, exog, data_predict, W): """ Local linear estimator of g(x) in the regression ``y = g(x) + e``. Parameters ---------- bw: array_like Vector of bandwidth value(s) endog: 1D array_like The dependent variable exog: 1D or 2D array_like The independent variable(s) data_predict: 1D array_like of length K, where K is the number of variables. The point at which the density is estimated Returns ------- D_x: array_like The value of the conditional mean at data_predict Notes ----- See p. 81 in [1] and p.38 in [2] for the formulas Unlike other methods, this one requires that data_predict be 1D """ nobs, k_vars = exog.shape ker = gpke(bw, data=exog, data_predict=data_predict, var_type=self.var_type, ukertype='aitchison_aitken_reg', okertype='wangryzin_reg', tosum=False) # Create the matrix on p.492 in [7], after the multiplication w/ K_h,ij # See also p. 38 in [2] # Convert ker to a 2-D array to make matrix operations below work ker = W * ker[:, np.newaxis] M12 = exog - data_predict M22 = np.dot(M12.T, M12 * ker) M12 = (M12 * ker).sum(axis=0) M = np.empty((k_vars + 1, k_vars + 1)) M[0, 0] = ker.sum() M[0, 1:] = M12 M[1:, 0] = M12 M[1:, 1:] = M22 ker_endog = ker * endog V = np.empty((k_vars + 1, 1)) V[0, 0] = ker_endog.sum() V[1:, 0] = ((exog - data_predict) * ker_endog).sum(axis=0) mean_mfx = np.dot(np.linalg.pinv(M), V) mean = mean_mfx[0] mfx = mean_mfx[1:, :] return mean, mfx
def loo_likelihood(self, bw, func=lambda x: x): """ Returns the leave-one-out conditional likelihood of the data. If `func` is not equal to the default, what's calculated is a function of the leave-one-out conditional likelihood. Parameters ---------- bw: array_like The bandwidth parameter(s). func: callable, optional Function to transform the likelihood values (before summing); for the log likelihood, use ``func=np.log``. Default is ``f(x) = x``. Returns ------- L: float The value of the leave-one-out function for the data. Notes ----- Similar to ``KDE.loo_likelihood`, but substitute ``f(y|x)=f(x,y)/f(y)`` for ``f(x)``. """ yLOO = LeaveOneOut(self.data) xLOO = LeaveOneOut(self.exog).__iter__() L = 0 for i, Y_j in enumerate(yLOO): X_not_i = xLOO.next() f_yx = gpke(bw, data=-Y_j, data_predict=-self.data[i, :], var_type=(self.dep_type + self.indep_type)) f_x = gpke(bw[self.k_dep:], data=-X_not_i, data_predict=-self.exog[i, :], var_type=self.indep_type) f_i = f_yx / f_x L += func(f_i) return -L
def cdf(self, data_predict=None): r""" Evaluate the cumulative distribution function. Parameters ---------- data_predict: array_like, optional Points to evaluate at. If unspecified, the training data is used. Returns ------- cdf_est: array_like The estimate of the cdf. Notes ----- See http://en.wikipedia.org/wiki/Cumulative_distribution_function For more details on the estimation see Ref. [5] in module docstring. The multivariate CDF for mixed data (continuous and ordered/unordered discrete) is estimated by: ..math:: F(x^{c},x^{d})=n^{-1}\sum_{i=1}^{n}\left[G( \frac{x^{c}-X_{i}}{h})\sum_{u\leq x^{d}}L(X_{i}^{d},x_{i}^{d}, \lambda)\right] where G() is the product kernel CDF estimator for the continuous and L() for the discrete variables. Used bandwidth is ``self.bw``. """ if data_predict is None: data_predict = self.data else: data_predict = _adjust_shape(data_predict, self.k_vars) cdf_est = [] for i in xrange(np.shape(data_predict)[0]): cdf_est.append( gpke(self.bw, data=self.data, data_predict=data_predict[i, :], var_type=self.var_type, ckertype="gaussian_cdf", ukertype="aitchisonaitken_cdf", okertype='wangryzin_cdf') / self.nobs) cdf_est = np.squeeze(cdf_est) return cdf_est
def cdf(self, data_predict=None): r""" Evaluate the cumulative distribution function. Parameters ---------- data_predict: array_like, optional Points to evaluate at. If unspecified, the training data is used. Returns ------- cdf_est: array_like The estimate of the cdf. Notes ----- See http://en.wikipedia.org/wiki/Cumulative_distribution_function For more details on the estimation see Ref. [5] in module docstring. The multivariate CDF for mixed data (continuous and ordered/unordered discrete) is estimated by: ..math:: F(x^{c},x^{d})=n^{-1}\sum_{i=1}^{n}\left[G( \frac{x^{c}-X_{i}}{h})\sum_{u\leq x^{d}}L(X_{i}^{d},x_{i}^{d}, \lambda)\right] where G() is the product kernel CDF estimator for the continuous and L() for the discrete variables. Used bandwidth is ``self.bw``. """ if data_predict is None: data_predict = self.data else: data_predict = _adjust_shape(data_predict, self.k_vars) cdf_est = [] for i in xrange(np.shape(data_predict)[0]): cdf_est.append(gpke(self.bw, data=self.data, data_predict=data_predict[i, :], var_type=self.var_type, ckertype="gaussian_cdf", ukertype="aitchisonaitken_cdf", okertype='wangryzin_cdf') / self.nobs) cdf_est = np.squeeze(cdf_est) return cdf_est
def aic_hurvich(self, bw, func=None): """ Computes the AIC Hurvich criteria for the estimation of the bandwidth. Parameters ---------- bw : str or array_like See the ``bw`` parameter of `KernelReg` for details. Returns ------- aic : ndarray The AIC Hurvich criteria, one element for each variable. func : None Unused here, needed in signature because it's used in `cv_loo`. References ---------- See ch.2 in [1] and p.35 in [2]. """ H = np.empty((self.nobs, self.nobs)) for j in range(self.nobs): H[:, j] = gpke(bw, data=self.exog, data_predict=self.exog[j,:], var_type=self.var_type, tosum=False) denom = H.sum(axis=1) H = H / denom gx = KernelReg(endog=self.endog, exog=self.exog, var_type=self.var_type, reg_type=self.reg_type, bw=bw, defaults=EstimatorSettings(efficient=False)).fit()[0] gx = np.reshape(gx, (self.nobs, 1)) sigma = ((self.endog - gx)**2).sum(axis=0) / float(self.nobs) frac = (1 + np.trace(H) / float(self.nobs)) / \ (1 - (np.trace(H) + 2) / float(self.nobs)) #siga = np.dot(self.endog.T, (I - H).T) #sigb = np.dot((I - H), self.endog) #sigma = np.dot(siga, sigb) / float(self.nobs) aic = np.log(sigma) + frac return aic
def imse(self, bw): r""" The integrated mean square error for the conditional KDE. Parameters ---------- bw: array_like The bandwidth parameter(s). Returns ------- CV: float The cross-validation objective function. Notes ----- For more details see pp. 156-166 in [1]. For details on how to handle the mixed variable types see [3]. The formula for the cross-validation objective function for mixed variable types is: .. math:: CV(h,\lambda)=\frac{1}{n}\sum_{l=1}^{n} \frac{G_{-l}(X_{l})}{\left[\mu_{-l}(X_{l})\right]^{2}}- \frac{2}{n}\sum_{l=1}^{n}\frac{f_{-l}(X_{l},Y_{l})}{\mu_{-l}(X_{l})} where .. math:: G_{-l}(X_{l}) = n^{-2}\sum_{i\neq l}\sum_{j\neq l} K_{X_{i},X_{l}} K_{X_{j},X_{l}}K_{Y_{i},Y_{j}}^{(2)} where :math:`K_{X_{i},X_{l}}` is the multivariate product kernel and :math:`\mu_{-l}(X_{l})` is the leave-one-out estimator of the pdf. :math:`K_{Y_{i},Y_{j}}^{(2)}` is the convolution kernel. The value of the function is minimized by the ``_cv_ls`` method of the `GenericKDE` class to return the bw estimates that minimize the distance between the estimated and "true" probability density. """ zLOO = LeaveOneOut(self.data) CV = 0 nobs = float(self.nobs) expander = np.ones((self.nobs - 1, 1)) for ii, Z in enumerate(zLOO): X = Z[:, self.k_dep:] Y = Z[:, :self.k_dep] Ye_L = np.kron(Y, expander) Ye_R = np.kron(expander, Y) Xe_L = np.kron(X, expander) Xe_R = np.kron(expander, X) K_Xi_Xl = gpke(bw[self.k_dep:], data=Xe_L, data_predict=self.exog[ii, :], var_type=self.indep_type, tosum=False) K_Xj_Xl = gpke(bw[self.k_dep:], data=Xe_R, data_predict=self.exog[ii, :], var_type=self.indep_type, tosum=False) K2_Yi_Yj = gpke(bw[0:self.k_dep], data=Ye_L, data_predict=Ye_R, var_type=self.dep_type, ckertype='gauss_convolution', okertype='wangryzin_convolution', ukertype='aitchisonaitken_convolution', tosum=False) G = (K_Xi_Xl * K_Xj_Xl * K2_Yi_Yj).sum() / nobs**2 f_X_Y = gpke(bw, data=-Z, data_predict=-self.data[ii, :], var_type=(self.dep_type + self.indep_type)) / nobs m_x = gpke(bw[self.k_dep:], data=-X, data_predict=-self.exog[ii, :], var_type=self.indep_type) / nobs CV += (G / m_x ** 2) - 2 * (f_X_Y / m_x) return CV / nobs
def cdf(self, endog_predict=None, exog_predict=None): r""" Cumulative distribution function for the conditional density. Parameters ---------- endog_predict: array_like, optional The evaluation dependent variables at which the cdf is estimated. If not specified the training dependent variables are used. exog_predict: array_like, optional The evaluation independent variables at which the cdf is estimated. If not specified the training independent variables are used. Returns ------- cdf_est: array_like The estimate of the cdf. Notes ----- For more details on the estimation see [5], and p.181 in [1]. The multivariate conditional CDF for mixed data (continuous and ordered/unordered discrete) is estimated by: ..math:: F(y|x)=\frac{n^{-1}\sum_{i=1}^{n}G(\frac{y-Y_{i}}{h_{0}}) W_{h}(X_{i},x)}{\widehat{\mu}(x)} where G() is the product kernel CDF estimator for the dependent (y) variable(s) and W() is the product kernel CDF estimator for the independent variable(s). """ if endog_predict is None: endog_predict = self.endog else: endog_predict = _adjust_shape(endog_predict, self.k_dep) if exog_predict is None: exog_predict = self.exog else: exog_predict = _adjust_shape(exog_predict, self.k_indep) N_data_predict = np.shape(exog_predict)[0] cdf_est = np.empty(N_data_predict) for i in xrange(N_data_predict): mu_x = gpke(self.bw[self.k_dep:], data=self.exog, data_predict=exog_predict[i, :], var_type=self.indep_type) / self.nobs mu_x = np.squeeze(mu_x) cdf_endog = gpke(self.bw[0:self.k_dep], data=self.endog, data_predict=endog_predict[i, :], var_type=self.dep_type, ckertype="gaussian_cdf", ukertype="aitchisonaitken_cdf", okertype='wangryzin_cdf', tosum=False) cdf_exog = gpke(self.bw[self.k_dep:], data=self.exog, data_predict=exog_predict[i, :], var_type=self.indep_type, tosum=False) S = (cdf_endog * cdf_exog).sum(axis=0) cdf_est[i] = S / (self.nobs * mu_x) return cdf_est