def test_cdf(self): """Testing cdf""" pdf = array([0.33350065, 0.71365127, 0.42428029, 0.99204143, 0.01738811]) cdf = array([0.13442936, 0.42209201, 0.59311334, 0.9929911 , 1.0]) y = array([0, 1, 2, 3, 4]) self.numpyAssertAlmostEqual(statutils.cdf(y, pdf), cdf)
def generateGenesisDateCDF(self, genDays, lonLat, bw=None, genesisKDE=None): """ Calculate the PDF of genesis day using KDEs. Since the data is periodic, we use a simple method to include the periodicity in estimating the PDF. We prepend and append the data to itself, then use the central third of the PDF and multiply by three to obtain the required PDF. Probably notquite exact, but it should be sufficient for our purposes. """ data = flLoadFile( genDays ) days = np.arange( 1, 366 ) ndays = np.concatenate( [days - 365, days, days + 365] ) ndata = np.concatenate( [data - 365, data, data + 365] ) if bw is None: bw = KPDF.UPDFOptimumBandwidth( ndata ) try: kdeMethod = getattr(KPDF, "UPDF%s" %self.kdeType) except AttributeError: self.logger.exception("Invalid input on option: KDE method UPDF%s does not exist"%self.kdeType) raise pdf = kdeMethod( ndata, ndays, bw ) # Actual PDF to return apdf = 3.0*pdf[365:730] cy = stats.cdf(days, apdf) if genesisKDE is None: return np.transpose(np.array(np.concatenate( [days, apdf, cy] ) )) else: # Assume both kdeParameters and cdfParameters are defined as files: self.logger.debug("Saving KDE and CDF data to files") #flSaveFile(genesisKDE, transpose(numpy.concatenate([days, pdf]))) flSaveFile(genesisKDE, np.transpose(np.array([days, cy])))
def generateGenesisDateCDF(self, genDays, lonLat, bw=None, genesisKDE=None): """ Calculate the PDF of genesis day using KDEs. Since the data is periodic, we use a simple method to include the periodicity in estimating the PDF. We prepend and append the data to itself, then use the central third of the PDF and multiply by three to obtain the required PDF. Probably not quite exact, but it should be sufficient for our purposes. :param str genDays: Name of file containing genesis days (as day of year). :param lonLat: Array of genesis longitudes and latitudes. :param float bw: Optional. Bandwidth of the KDE to use. :param str genesisKDE: Optional. File name to save resulting CDF to. :type lonLat: :class:`numpy.ndarray` :returns: :class:`numpy.ndarray` containing the days, the PDF and CDF of the genesis days. """ data = flLoadFile(genDays) days = np.arange(1, 366) ndays = np.concatenate([days - 365, days, days + 365]) ndata = np.concatenate([data - 365, data, data + 365]) if bw is None: bw = stats.bandwidth(self.parameters) kde = sm.nonparametric.KDEUnivariate(self.parameters) kde.fit(kernel=self.kdeType, bw=bw, fft=False, gridsize=len(grid), clip=(min(grid), max(grid)), cut=0) #try: # kdeMethod = getattr(KPDF, "UPDF%s" % self.kdeType) #except AttributeError: # LOG.exception(("Invalid input on option: " # "KDE method UPDF%s does not exist"), # self.kdeType) # raise veceval = np.vectorize(kde.evaluate) pdf = np.nan_to_num(veceval(grid)) # Actual PDF to return apdf = 3.0 * pdf[365:730] cy = stats.cdf(days, apdf) if genesisKDE is None: return np.transpose(np.array(np.concatenate([days, apdf, cy]))) else: # Assume both kdeParameters and cdfParameters are defined as files: LOG.debug("Saving KDE and CDF data to files") flSaveFile(genesisKDE, np.transpose(np.array([days, cy])))
def test_cdf(self): """Testing cdf""" pdf = array( [0.33350065, 0.71365127, 0.42428029, 0.99204143, 0.01738811]) cdf = array([0.13442936, 0.42209201, 0.59311334, 0.9929911, 1.0]) y = array([0, 1, 2, 3, 4]) self.numpyAssertAlmostEqual(statutils.cdf(y, pdf), cdf)
def _calculateCDF(self): """Calculate Py and CDFy beforehand to remove the need of repeated calculation later """ # sum along the column of z to get sum(z(i,:)) # (check 'help sum' if need) px = self.z.sum(axis=0) # calculate CDF of (x,Px) cdfX = stats.cdf(self.x, px) # define Py & CDFy with nx by ny py = np.zeros(self.z.shape, 'd').T cdfY = np.zeros(self.z.shape, 'd').T # Py=conditional distribution, CDFy = CDF of Y try: for i in xrange(len(self.x)): for j in xrange(len(self.z[:, i])): if px[i] == 0: py[i, j] = 0 else: py[i, j] = self.z[j, i]/px[i] cdfTemp = stats.cdf(self.y, py[i, :]) for j in xrange(len(cdfTemp)): cdfY[i, j] = cdfTemp[j] except IndexError: LOG.debug("i = %s", str(i)) LOG.debug("j = %s", str(j)) LOG.debug("p_y[%s, %s] = %s"%(str(i), str(j), str(py[i, j]))) LOG.debug("z[%s, %s] = %s"%(str(i), str(j), str(self.z[j, i]))) LOG.debug("p_x[%s] = %s"%(str(i), str(px[i]))) LOG.debug("cdfy dim = %s", (str(cdfY.shape))) LOG.debug("p_y dim = %s", (str(py.shape))) LOG.debug("cdfx dim = %s", (str(cdfX.shape))) LOG.debug("p_x dim = %s", (str(px.shape))) raise self.cdfX = cdfX self.cdfY = cdfY return
def _calculateCDF(self): """Calculate Py and CDFy beforehand to remove the need of repeated calculation later """ # sum along the column of z to get sum(z(i,:)) # (check 'help sum' if need) px = self.z.sum(axis=0) # calculate CDF of (x,Px) cdfX = stats.cdf(self.x, px) # define Py & CDFy with nx by ny py = np.zeros([self.x.size, self.y.size], 'd') cdfY = np.zeros([self.x.size, self.y.size], 'd') # Py=conditional distribution, CDFy = CDF of Y try: for i in xrange(len(self.x)): for j in xrange(len(self.z[:, i])): if px[i] == 0: py[i,j] = 0 else: py[i,j] = self.z[j, i]/px[i] cdfTemp = stats.cdf(self.y, py[i, :]) for j in xrange(len(cdfTemp)): cdfY[i,j] = cdfTemp[j] except IndexError: self.logger.debug("i = %s"%str(i)) self.logger.debug("j = %s"%str(j)) self.logger.debug("p_y[%s, %s] = %s"%(str(i), str(j), str(py[i, j]))) self.logger.debug("z[%s, %s] = %s"%(str(i), str(j), str(self.z[j, i]))) self.logger.debug("p_x[%s] = %s"%(str(i), str(px[i]))) self.logger.debug("cdfy dim = %s"%(str(cdfY.shape))) self.logger.debug("p_y dim = %s"%(str(py.shape))) self.logger.debug("cdfx dim = %s"%(str(cdfX.shape))) self.logger.debug("p_x dim = %s"%(str(px.shape))) raise self.cdfX = cdfX self.cdfY = cdfY return
def generateGenesisDateCDF(self, genDays, lonLat, bw=None, genesisKDE=None): """ Calculate the PDF of genesis day using KDEs. Since the data is periodic, we use a simple method to include the periodicity in estimating the PDF. We prepend and append the data to itself, then use the central third of the PDF and multiply by three to obtain the required PDF. Probably not quite exact, but it should be sufficient for our purposes. :param str genDays: Name of file containing genesis days (as day of year). :param lonLat: Array of genesis longitudes and latitudes. :param float bw: Optional. Bandwidth of the KDE to use. :param str genesisKDE: Optional. File name to save resulting CDF to. :type lonLat: :class:`numpy.ndarray` :returns: :class:`numpy.ndarray` containing the days, the PDF and CDF of the genesis days. """ data = flLoadFile(genDays) days = np.arange(1, 366) ndays = np.concatenate([days - 365, days, days + 365]) ndata = np.concatenate([data - 365, data, data + 365]) if bw is None: bw = KPDF.UPDFOptimumBandwidth(ndata) try: kdeMethod = getattr(KPDF, "UPDF%s" % self.kdeType) except AttributeError: LOG.exception(("Invalid input on option: " "KDE method UPDF%s does not exist"), self.kdeType) raise pdf = kdeMethod(ndata, ndays, bw) # Actual PDF to return apdf = 3.0 * pdf[365:730] cy = stats.cdf(days, apdf) if genesisKDE is None: return np.transpose(np.array(np.concatenate([days, apdf, cy]))) else: # Assume both kdeParameters and cdfParameters are defined as files: LOG.debug("Saving KDE and CDF data to files") flSaveFile(genesisKDE, np.transpose(np.array([days, cy])))
def generateKDE(self, parameters, kdeStep, kdeParameters=None, cdfParameters=None, angular=False, periodic=False, missingValue=sys.maxint): """ Generate a PDF and CDF for a given parameter set using the method of kernel density estimators. Optionally return the PDF and CDF as an array, or write both to separate files. """ self.logger.debug("Running generateKDE") if type(parameters) is str: self.parameters = stats.statRemoveNum(flLoadFile(parameters, '%', ','), missingValue) else: if parameters.size <= 1: self.logger.error("Insufficient members in parameter list") raise IndexError, "Insufficient members in parameter list" self.parameters = stats.statRemoveNum(parameters, missingValue) if angular: xmin = 0.0 xmax = 360.0 elif periodic: xmin = 0.0 xmax = periodic else: xmin = self.parameters.min() xmax = self.parameters.max() self.logger.debug("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f" % (xmin, xmax, kdeStep)) if periodic: x = np.arange(1, periodic + 1, kdeStep) self.grid = np.concatenate( [x - periodic, x, x + periodic] ) self.parameters = np.concatenate([self.parameters - periodic, self.parameters, self.parameters + periodic]) else: self.grid = np.arange(xmin, xmax, kdeStep) if self.grid.size<2: self.logger.critical("Grid for CDF generation is a single value") self.logger.critical("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f" % (xmin, xmax,kdeStep)) raise ValueError bw = KPDF.UPDFOptimumBandwidth(self.parameters) self.pdf = self._generatePDF(self.grid, bw, self.parameters) if periodic: self.pdf = 3.0*self.pdf[(periodic/kdeStep):2*(periodic/kdeStep)] self.grid = self.grid[(periodic/kdeStep):2*(periodic/kdeStep)] self.cy = stats.cdf(self.grid, self.pdf) if kdeParameters is None: return np.transpose(np.array([self.grid, self.pdf, self.cy])) else: # Assume both kdeParameters and cdfParameters are defined as files: self.logger.debug("Saving KDE and CDF data to files") flSaveFile(kdeParameters, np.transpose(np.array([self.grid, self.pdf]))) flSaveFile(cdfParameters, np.transpose(np.array([self.grid, self.cy])))
def generateKDE(self, parameters, kdeStep, kdeParameters=None, cdfParameters=None, angular=False, periodic=False, missingValue=sys.maxsize): """ Generate a PDF and CDF for a given parameter set using the method of kernel density estimators. Optionally return the PDF and CDF as an array, or write both to separate files. :param parameters: Parameter values. If a string is given, then it is the path to a file containing the values. If an array is passed, then it should hold the parameter values. :param kdeStep: Increment of the ordinate values at which the distributions will be calculated. :type kdeStep: float, default=`0.1` :param str kdeParameters: Optional. If given, then the cell distributions will be saved to a file with this name. If absent, the distribution values are returned. :param str cdfParameters: Optional. If given, then the cell distributions will be saved to a file with this name. If absent, the distribution values are returned. :param angular: Does the data represent an angular measure (e.g. bearing). :type angular: boolean, default=``False`` :param periodic: Does the data represent some form of periodic data (e.g. day of year). If given, it should be the period of the data (e.g. for annual data, ``periodic=365``). :type periodic: boolean or int, default=``False`` :param missingValue: Missing values have this value (default :attr:`sys.maxint`). returns: If ``kdeParameters`` is given, returns ``None`` (data are saved to file), otherwise :class:`numpy.ndarray` of the parameter grid, the PDF and CDF. """ LOG.debug("Running generateKDE") if type(parameters) is str: self.parameters = stats.statRemoveNum( flLoadFile(parameters, '%', ','), missingValue) else: if parameters.size <= 1: LOG.error("Insufficient members in parameter list") raise IndexError("Insufficient members in parameter list") self.parameters = stats.statRemoveNum(parameters, missingValue) if angular: xmin = 0.0 xmax = 360.0 elif periodic: xmin = 0.0 xmax = periodic else: xmin = self.parameters.min() xmax = self.parameters.max() LOG.debug("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f" % (xmin, xmax, kdeStep)) if periodic: x = np.arange(1, periodic + 1, kdeStep) self.grid = np.concatenate([x - periodic, x, x + periodic]) self.parameters = np.concatenate([ self.parameters - periodic, self.parameters, self.parameters + periodic ]) else: self.grid = np.arange(xmin, xmax, kdeStep) if self.grid.size < 2: LOG.critical("Grid for CDF generation is a single value") LOG.critical("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f", xmin, xmax, kdeStep) raise ValueError #bw = KPDF.UPDFOptimumBandwidth(self.parameters) bw = stats.bandwidth(self.parameters) self.pdf = self._generatePDF(self.grid, bw, self.parameters) if periodic: idx = int(periodic / kdeStep) self.pdf = 3.0 * self.pdf[idx:2 * idx] self.grid = self.grid[idx:2 * idx] self.cy = stats.cdf(self.grid, self.pdf) if kdeParameters is None: return np.transpose(np.array([self.grid, self.pdf, self.cy])) else: # Assume both kdeParameters and cdfParameters are defined as files: LOG.debug("Saving KDE and CDF data to files") flSaveFile(kdeParameters, np.transpose(np.array([self.grid, self.pdf]))) flSaveFile(cdfParameters, np.transpose(np.array([self.grid, self.cy])))
def test_cdfzeros(self): """Test cdf returns zero array for zero input""" x = array([0, 1, 2, 3, 4]) y = zeros(len(x)) self.numpyAssertAlmostEqual(statutils.cdf(y, x), zeros(len(x)))
def generateKDE(self, parameters, kdeStep, kdeParameters=None, cdfParameters=None, angular=False, periodic=False, missingValue=sys.maxint): """ Generate a PDF and CDF for a given parameter set using the method of kernel density estimators. Optionally return the PDF and CDF as an array, or write both to separate files. :param parameters: Parameter values. If a string is given, then it is the path to a file containing the values. If an array is passed, then it should hold the parameter values. :param kdeStep: Increment of the ordinate values at which the distributions will be calculated. :type kdeStep: float, default=`0.1` :param str kdeParameters: Optional. If given, then the cell distributions will be saved to a file with this name. If absent, the distribution values are returned. :param str cdfParameters: Optional. If given, then the cell distributions will be saved to a file with this name. If absent, the distribution values are returned. :param angular: Does the data represent an angular measure (e.g. bearing). :type angular: boolean, default=``False`` :param periodic: Does the data represent some form of periodic data (e.g. day of year). If given, it should be the period of the data (e.g. for annual data, ``periodic=365``). :type periodic: boolean or float, default=``False`` :param missingValue: Missing values have this value (default :attr:`sys.maxint`). returns: If ``kdeParameters`` is given, returns ``None`` (data are saved to file), otherwise :class:`numpy.ndarray` of the parameter grid, the PDF and CDF. """ self.logger.debug("Running generateKDE") if type(parameters) is str: self.parameters = stats.statRemoveNum(flLoadFile(parameters, '%', ','), missingValue) else: if parameters.size <= 1: self.logger.error("Insufficient members in parameter list") raise IndexError, "Insufficient members in parameter list" self.parameters = stats.statRemoveNum(parameters, missingValue) if angular: xmin = 0.0 xmax = 360.0 elif periodic: xmin = 0.0 xmax = periodic else: xmin = self.parameters.min() xmax = self.parameters.max() self.logger.debug("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f" % (xmin, xmax, kdeStep)) if periodic: x = np.arange(1, periodic + 1, kdeStep) self.grid = np.concatenate( [x - periodic, x, x + periodic] ) self.parameters = np.concatenate([self.parameters - periodic, self.parameters, self.parameters + periodic]) else: self.grid = np.arange(xmin, xmax, kdeStep) if self.grid.size<2: self.logger.critical("Grid for CDF generation is a single value") self.logger.critical("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f" % (xmin, xmax,kdeStep)) raise ValueError bw = KPDF.UPDFOptimumBandwidth(self.parameters) self.pdf = self._generatePDF(self.grid, bw, self.parameters) if periodic: self.pdf = 3.0*self.pdf[(periodic/kdeStep):2*(periodic/kdeStep)] self.grid = self.grid[(periodic/kdeStep):2*(periodic/kdeStep)] self.cy = stats.cdf(self.grid, self.pdf) if kdeParameters is None: return np.transpose(np.array([self.grid, self.pdf, self.cy])) else: # Assume both kdeParameters and cdfParameters are defined as files: self.logger.debug("Saving KDE and CDF data to files") flSaveFile(kdeParameters, np.transpose(np.array([self.grid, self.pdf]))) flSaveFile(cdfParameters, np.transpose(np.array([self.grid, self.cy])))