def normal_reference_constant(self): """ Constant used for silverman normal reference asymtotic bandwidth calculation. C = 2((pi^(1/2)*(nu!)^3 R(k))/(2nu(2nu)!kap_nu(k)^2))^(1/(2nu+1)) nu = kernel order kap_nu = nu'th moment of kernel R = kernel roughness (square of L^2 norm) Note: L2Norm property returns square of norm. """ nu = self._order if not nu == 2: msg = "Only implemented for second order kernels" raise NotImplementedError(msg) if self._normal_reference_constant is None: C = np.pi**(.5) * factorial(nu)**3 * self.L2Norm C /= (2 * nu * factorial(2 * nu) * self.moments(nu)**2) C = 2*C**(1.0/(2*nu+1)) self._normal_reference_constant = C return self._normal_reference_constant
def cumulant_from_moments(momt, n): """Compute n-th cumulant given moments. Parameters ---------- momt: array_like `momt[j]` contains `(j+1)`-th moment. These can be raw moments around zero, or central moments (in which case, `momt[0]` == 0). n: integer which cumulant to calculate (must be >1) Returns ------- kappa: float n-th cumulant. """ if n < 1: raise ValueError("Expected a positive integer. Got %s instead." % n) if len(momt) < n: raise ValueError("%s-th cumulant requires %s moments, " "only got %s." % (n, n, len(momt))) kappa = 0. for p in _faa_di_bruno_partitions(n): r = sum(k for (m, k) in p) term = (-1)**(r - 1) * factorial(r - 1) for (m, k) in p: term *= np.power(momt[m - 1] / factorial(m), k) / factorial(k) kappa += term kappa *= factorial(n) return kappa
def normal_reference_constant(self): """ Constant used for silverman normal reference asymtotic bandwidth calculation. C = 2((pi^(1/2)*(nu!)^3 R(k))/(2nu(2nu)!kap_nu(k)^2))^(1/(2nu+1)) nu = kernel order kap_nu = nu'th moment of kernel R = kernel roughness (square of L^2 norm) Note: L2Norm property returns square of norm. """ nu = self._order if not nu == 2: msg = "Only implemented for second order kernels" raise NotImplementedError(msg) if self._normal_reference_constant is None: C = np.pi**(.5) * factorial(nu)**3 * self.L2Norm C /= (2 * nu * factorial(2 * nu) * self.moments(nu)**2) C = 2*C**(1.0/(2*nu+1)) self._normal_reference_constant = C return self._normal_reference_constant
def cumulant_from_moments(momt, n): """Compute n-th cumulant given moments. Parameters ---------- momt: array_like `momt[j]` contains `(j+1)`-th moment. These can be raw moments around zero, or central moments (in which case, `momt[0]` == 0). n: integer which cumulant to calculate (must be >1) Returns ------- kappa: float n-th cumulant. """ if n < 1: raise ValueError("Expected a positive integer. Got %s instead." % n) if len(momt) < n: raise ValueError("%s-th cumulant requires %s moments, " "only got %s." % (n, n, len(momt))) kappa = 0. for p in _faa_di_bruno_partitions(n): r = sum(k for (m, k) in p) term = (-1)**(r - 1) * factorial(r - 1) for (m, k) in p: term *= np.power(momt[m - 1] / factorial(m), k) / factorial(k) kappa += term kappa *= factorial(n) return kappa
def nloglikeobs(self, params): """ Loglikelihood of Poisson model Parameters ---------- params : array-like The parameters of the model. Returns ------- The log likelihood of the model evaluated at `params` Notes -------- .. math:: \\ln L=\\sum_{i=1}^{n}\\left[-\\lambda_{i}+y_{i}x_{i}^{\\prime}\\beta-\\ln y_{i}!\\right] """ beta = params[:-1] gamm = 1 / (1 + np.exp(params[-1])) #check this # replace with np.dot(self.exogZ, gamma) #print(np.shape(self.offset), self.exog.shape, beta.shape XB = self.offset + np.dot(self.exog, beta) endog = self.endog nloglik = -np.log(1-gamm) + np.exp(XB) - endog*XB + np.log(factorial(endog)) nloglik[endog==0] = - np.log(gamm + np.exp(-nloglik[endog==0])) return nloglik
def nloglikeobs(self, params): """ Loglikelihood of Poisson model Parameters ---------- params : array-like The parameters of the model. Returns ------- The log likelihood of the model evaluated at `params` Notes -------- .. math:: \\ln L=\\sum_{i=1}^{n}\\left[-\\lambda_{i}+y_{i}x_{i}^{\\prime}\\beta-\\ln y_{i}!\\right] """ beta = params[:-1] gamm = 1 / (1 + np.exp(params[-1])) #check this # replace with np.dot(self.exogZ, gamma) #print(np.shape(self.offset), self.exog.shape, beta.shape XB = self.offset + np.dot(self.exog, beta) endog = self.endog nloglik = -np.log(1 - gamm) + np.exp(XB) - endog * XB + np.log( factorial(endog)) nloglik[endog == 0] = -np.log(gamm + np.exp(-nloglik[endog == 0])) return nloglik
def _compute_coefs_pdf(self, cum): # scale cumulants by \sigma mu, sigma = cum[0], np.sqrt(cum[1]) lam = np.asarray(cum) for j, l in enumerate(lam): lam[j] /= cum[1]**j coef = np.zeros(lam.size * 3 - 5) coef[0] = 1. for s in range(lam.size - 2): for p in _faa_di_bruno_partitions(s+1): term = sigma**(s+1) for (m, k) in p: term *= np.power(lam[m+1] / factorial(m+2), k) / factorial(k) r = sum(k for (m, k) in p) coef[s + 1 + 2*r] += term return coef, mu, sigma
def _compute_coefs_pdf(self, cum): # scale cumulants by \sigma mu, sigma = cum[0], np.sqrt(cum[1]) lam = np.asarray(cum) for j, l in enumerate(lam): lam[j] /= cum[1]**j coef = np.zeros(lam.size * 3 - 5) coef[0] = 1. for s in range(lam.size - 2): for p in _faa_di_bruno_partitions(s+1): term = sigma**(s+1) for (m, k) in p: term *= np.power(lam[m+1] / factorial(m+2), k) / factorial(k) r = sum(k for (m, k) in p) coef[s + 1 + 2*r] += term return coef, mu, sigma
def nloglikeobs(self, params): """ Loglikelihood of Poisson model Parameters ---------- params : array-like The parameters of the model. Returns ------- The log likelihood of the model evaluated at `params` Notes -------- .. math:: \\ln L=\\sum_{i=1}^{n}\\left[-\\lambda_{i}+y_{i}x_{i}^{\\prime}\\beta-\\ln y_{i}!\\right] """ XB = np.dot(self.exog, params) endog = self.endog return np.exp(XB) - endog*XB + np.log(factorial(endog))
def nloglikeobs(self, params): """ Loglikelihood of Poisson model Parameters ---------- params : array-like The parameters of the model. Returns ------- The log likelihood of the model evaluated at `params` Notes -------- .. math:: \\ln L=\\sum_{i=1}^{n}\\left[-\\lambda_{i}+y_{i}x_{i}^{\\prime}\\beta-\\ln y_{i}!\\right] """ XB = np.dot(self.exog, params) endog = self.endog return np.exp(XB) - endog * XB + np.log(factorial(endog))
def _chi2_cumulant(n, df): assert n > 0 return 2**(n-1) * factorial(n - 1) * df
def banddepth(data, method='MBD'): """ Calculate the band depth for a set of functional curves. Band depth is an order statistic for functional data (see `fboxplot`), with a higher band depth indicating larger "centrality". In analog to scalar data, the functional curve with highest band depth is called the median curve, and the band made up from the first N/2 of N curves is the 50% central region. Parameters ---------- data : ndarray The vectors of functions to create a functional boxplot from. The first axis is the function index, the second axis the one along which the function is defined. So ``data[0, :]`` is the first functional curve. method : {'MBD', 'BD2'}, optional Whether to use the original band depth (with J=2) of [1]_ or the modified band depth. See Notes for details. Returns ------- depth : ndarray Depth values for functional curves. Notes ----- Functional band depth as an order statistic for functional data was proposed in [1]_ and applied to functional boxplots and bagplots in [2]_. The method 'BD2' checks for each curve whether it lies completely inside bands constructed from two curves. All permutations of two curves in the set of curves are used, and the band depth is normalized to one. Due to the complete curve having to fall within the band, this method yields a lot of ties. The method 'MBD' is similar to 'BD2', but checks the fraction of the curve falling within the bands. It therefore generates very few ties. References ---------- .. [1] S. Lopez-Pintado and J. Romo, "On the Concept of Depth for Functional Data", Journal of the American Statistical Association, vol. 104, pp. 718-734, 2009. .. [2] Y. Sun and M.G. Genton, "Functional Boxplots", Journal of Computational and Graphical Statistics, vol. 20, pp. 1-19, 2011. """ def _band2(x1, x2, curve): xb = np.vstack([x1, x2]) if np.any(curve < xb.min(axis=0)) or np.any(curve > xb.max(axis=0)): res = 0 else: res = 1 return res def _band_mod(x1, x2, curve): xb = np.vstack([x1, x2]) res = np.logical_and(curve >= xb.min(axis=0), curve <= xb.max(axis=0)) return np.sum(res) / float(res.size) if method == 'BD2': band = _band2 elif method == 'MBD': band = _band_mod else: raise ValueError("Unknown input value for parameter `method`.") num = data.shape[0] ix = np.arange(num) depth = [] for ii in range(num): res = 0 for ix1, ix2 in combinations(ix, 2): res += band(data[ix1, :], data[ix2, :], data[ii, :]) # Normalize by number of combinations to get band depth normfactor = factorial(num) / 2. / factorial(num - 2) depth.append(float(res) / normfactor) return np.asarray(depth)
def _chi2_cumulant(n, df): assert n > 0 return 2**(n - 1) * factorial(n - 1) * df
def banddepth(data, method='MBD'): """Calculate the band depth for a set of functional curves. Band depth is an order statistic for functional data (see `fboxplot`), with a higher band depth indicating larger "centrality". In analog to scalar data, the functional curve with highest band depth is called the median curve, and the band made up from the first N/2 of N curves is the 50% central region. Parameters ---------- data : ndarray The vectors of functions to create a functional boxplot from. The first axis is the function index, the second axis the one along which the function is defined. So ``data[0, :]`` is the first functional curve. method : {'MBD', 'BD2'}, optional Whether to use the original band depth (with J=2) of [1]_ or the modified band depth. See Notes for details. Returns ------- depth : ndarray Depth values for functional curves. Notes ----- Functional band depth as an order statistic for functional data was proposed in [1]_ and applied to functional boxplots and bagplots in [2]_. The method 'BD2' checks for each curve whether it lies completely inside bands constructed from two curves. All permutations of two curves in the set of curves are used, and the band depth is normalized to one. Due to the complete curve having to fall within the band, this method yields a lot of ties. The method 'MBD' is similar to 'BD2', but checks the fraction of the curve falling within the bands. It therefore generates very few ties. References ---------- .. [1] S. Lopez-Pintado and J. Romo, "On the Concept of Depth for Functional Data", Journal of the American Statistical Association, vol. 104, pp. 718-734, 2009. .. [2] Y. Sun and M.G. Genton, "Functional Boxplots", Journal of Computational and Graphical Statistics, vol. 20, pp. 1-19, 2011. """ def _band2(x1, x2, curve): xb = np.vstack([x1, x2]) if np.any(curve < xb.min(axis=0)) or np.any(curve > xb.max(axis=0)): res = 0 else: res = 1 return res def _band_mod(x1, x2, curve): xb = np.vstack([x1, x2]) res = np.logical_and(curve >= xb.min(axis=0), curve <= xb.max(axis=0)) return np.sum(res) / float(res.size) if method == 'BD2': band = _band2 elif method == 'MBD': band = _band_mod else: raise ValueError("Unknown input value for parameter `method`.") num = data.shape[0] ix = np.arange(num) depth = [] for ii in range(num): res = 0 for ix1, ix2 in combinations(ix, 2): res += band(data[ix1, :], data[ix2, :], data[ii, :]) # Normalize by number of combinations to get band depth normfactor = factorial(num) / 2. / factorial(num - 2) depth.append(float(res) / normfactor) return np.asarray(depth)