def generateSamples(self, ns, sample_parameter_path=None): """ Generate random samples of cyclone initial parameters. :param int ns: Number of samples to generate. :param str sample_parameter_path: Path to a file to save the sampled parameter values to. :returns: The sample values. :raises ValueError: If ns <= 0. """ if ns <= 0: raise ValueError, 'invalid input on ns: number of sample cannot be zero or negative' unif_s = scipy.rand(ns) ind_kdf = self.xacy[:, 1].searchsorted(unif_s) self.sample = self.xacy[ind_kdf, 0] if sample_parameter_path: flSaveFile(sample_parameter_path, self.sample) else: return self.sample
def minPressureHist(self, index, pAllData): """ Plot a histogram of the minimum central pressures from the input dataset. """ pyplot.figure(self.figurenum(), figsize=(8, 7)) pyplot.clf() pcarray = [] index = index.astype(int) for i in range(len(index) - 1): if index[i] == 1: pcarray.append(pAllData[i]) else: if pAllData[i] is not None: if pAllData[i] < pcarray[-1]: pcarray[-1] = pAllData[i] pbins = numpy.arange(850., 1020., 5) n, b, p = pyplot.hist(numpy.array(pcarray), pbins, normed=False, lw=2, ec='k', fc='w') pyplot.xlabel("Minimum central pressure (hPa)") pyplot.ylabel("Count") pyplot.title("Distribution of minimum central pressure") self.savefig("min_pressure_hist") x = numpy.zeros((len(b), 2)) x[:, 0] = b x[1:, 1] = n files.flSaveFile(os.path.join(self.outpath, 'min_pressure_hist.csv'), x, delimiter=',', fmt='%6.2f')
def generateSamples(self, ns, sample_parameter_path=None): """ Generate random samples of cyclone initial parameters. :param int ns: Number of samples to generate. :param str sample_parameter_path: Path to a file to save the sampled parameter values to. :returns: The sample values. :raises ValueError: If ns <= 0. """ if ns <= 0: raise ValueError, "invalid input on ns: number of sample cannot be zero or negative" unif_s = scipy.rand(ns) ind_kdf = self.xacy[:, 1].searchsorted(unif_s) self.sample = self.xacy[ind_kdf, 0] if sample_parameter_path: flSaveFile(sample_parameter_path, self.sample) else: return self.sample
def generateGenesisDateCDF(self, genDays, lonLat, bw=None, genesisKDE=None): """ Calculate the PDF of genesis day using KDEs. Since the data is periodic, we use a simple method to include the periodicity in estimating the PDF. We prepend and append the data to itself, then use the central third of the PDF and multiply by three to obtain the required PDF. Probably notquite exact, but it should be sufficient for our purposes. """ data = flLoadFile( genDays ) days = np.arange( 1, 366 ) ndays = np.concatenate( [days - 365, days, days + 365] ) ndata = np.concatenate( [data - 365, data, data + 365] ) if bw is None: bw = KPDF.UPDFOptimumBandwidth( ndata ) try: kdeMethod = getattr(KPDF, "UPDF%s" %self.kdeType) except AttributeError: self.logger.exception("Invalid input on option: KDE method UPDF%s does not exist"%self.kdeType) raise pdf = kdeMethod( ndata, ndays, bw ) # Actual PDF to return apdf = 3.0*pdf[365:730] cy = stats.cdf(days, apdf) if genesisKDE is None: return np.transpose(np.array(np.concatenate( [days, apdf, cy] ) )) else: # Assume both kdeParameters and cdfParameters are defined as files: self.logger.debug("Saving KDE and CDF data to files") #flSaveFile(genesisKDE, transpose(numpy.concatenate([days, pdf]))) flSaveFile(genesisKDE, np.transpose(np.array([days, cy])))
def generateGenesisDateCDF(self, genDays, lonLat, bw=None, genesisKDE=None): """ Calculate the PDF of genesis day using KDEs. Since the data is periodic, we use a simple method to include the periodicity in estimating the PDF. We prepend and append the data to itself, then use the central third of the PDF and multiply by three to obtain the required PDF. Probably not quite exact, but it should be sufficient for our purposes. :param str genDays: Name of file containing genesis days (as day of year). :param lonLat: Array of genesis longitudes and latitudes. :param float bw: Optional. Bandwidth of the KDE to use. :param str genesisKDE: Optional. File name to save resulting CDF to. :type lonLat: :class:`numpy.ndarray` :returns: :class:`numpy.ndarray` containing the days, the PDF and CDF of the genesis days. """ data = flLoadFile(genDays) days = np.arange(1, 366) ndays = np.concatenate([days - 365, days, days + 365]) ndata = np.concatenate([data - 365, data, data + 365]) if bw is None: bw = stats.bandwidth(self.parameters) kde = sm.nonparametric.KDEUnivariate(self.parameters) kde.fit(kernel=self.kdeType, bw=bw, fft=False, gridsize=len(grid), clip=(min(grid), max(grid)), cut=0) #try: # kdeMethod = getattr(KPDF, "UPDF%s" % self.kdeType) #except AttributeError: # LOG.exception(("Invalid input on option: " # "KDE method UPDF%s does not exist"), # self.kdeType) # raise veceval = np.vectorize(kde.evaluate) pdf = np.nan_to_num(veceval(grid)) # Actual PDF to return apdf = 3.0 * pdf[365:730] cy = stats.cdf(days, apdf) if genesisKDE is None: return np.transpose(np.array(np.concatenate([days, apdf, cy]))) else: # Assume both kdeParameters and cdfParameters are defined as files: LOG.debug("Saving KDE and CDF data to files") flSaveFile(genesisKDE, np.transpose(np.array([days, cy])))
def generateSamples(self, ns, outputFile=None): """ Generate random samples of cyclone origins. :param int ns: Number of samples to generate. :param str outputFile: If given, save the samples to the file. :returns: :class:`numpy.ndarray` containing longitude and latitude of a random sample of TC origins. :raises ValueError: If :attr:`ns` <= 0. :raises IndexError: If an invalid index is returned when generating uniform random values. """ if ns <= 0: LOG.error(("Invalid input on ns: number of samples " "cannot be zero or negative")) raise ValueError # Generate 2 vectors of uniform random variables unifX = scipy.rand(ns) unifY = scipy.rand(ns) self.oLon = np.empty(ns, 'd') self.oLat = np.empty(ns, 'd') # For each random variable try: for i in range(ns): xi = self.cdfX.searchsorted(unifX[i]) yj = self.cdfY[xi, :].searchsorted(unifY[i]) if (i % (ns / 100)) == 0 and i != 0: LOG.debug("Processing %ith element" % i) self.oLon[i] = self.x[xi] self.oLat[i] = self.y[yj] except IndexError: LOG.debug("i = %s" % str(i)) LOG.debug("unifX = %s" % str(unifX[i])) LOG.debug("unifY = %s" % str(unifY[i])) LOG.debug("cdfY[xi,:] = %s" % str(self.cdfY[xi, :])) raise if outputFile: flSaveFile(outputFile, np.transpose([self.oLon, self.oLat]), fmt='%2.3f', header='Origin Lon, Origin Lat', delimiter=',') else: lonLat = np.empty([ns, 2], 'd') lonLat[:, 0] = self.oLon lonLat[:, 1] = self.oLat return lonLat
def generateSamples(self, ns, outputFile=None): """ Generate random samples of cyclone origins. :param int ns: Number of samples to generate. :param str outputFile: If given, save the samples to the file. :returns: :class:`numpy.ndarray` containing longitude and latitude of a random sample of TC origins. :raises ValueError: If :attr:`ns` <= 0. :raises IndexError: If an invalid index is returned when generating uniform random values. """ if ns <= 0: self.logger.error('Invalid input on ns: number of sample cannot be zero or negative') raise ValueError # Generate 2 vectors of uniform random variables unifX = scipy.rand(ns) unifY = scipy.rand(ns) self.oLon = np.empty(ns, 'd') self.oLat = np.empty(ns, 'd') # For each random variable try: for i in xrange(ns): xi = self.cdfX.searchsorted(unifX[i]) yj = self.cdfY[xi, :].searchsorted(unifY[i]) if (i % (ns/100)) == 0 and i != 0: self.logger.debug("Processing %ith element"%i) self.oLon[i] = self.x[xi] self.oLat[i] = self.y[yj] except IndexError: self.logger.debug("i = %s"%str(i)) self.logger.debug("unifX = %s"%str(unifX[i])) self.logger.debug("unifY = %s"%str(unifY[i])) self.logger.debug("cdfY[xi,:] = %s"%str(self.cdfY[xi, :])) raise if outputFile: flSaveFile(outputFile, np.transpose([self.oLon, self.oLat]), fmt='%2.3f', header='Origin Lon, Origin Lat', delimiter=',') else: lonLat = np.empty([ns, 2], 'd') lonLat[:, 0] = self.oLon lonLat[:, 1] = self.oLat return lonLat
def _windSpeed(self, windSpeed): """Extract maximum sustained wind speeds Input: windSpeed - array of windspeeds for TC observations Output: None - data is written to file """ self.logger.info('Extracting maximum sustained wind speeds') np.putmask(windSpeed, windSpeed > 200., sys.maxint) if self.ncflag: self.data['windspeed'] = windSpeed else: wind_speed = pjoin(self.processPath, 'wind_speed') self.logger.debug('Outputting data into {0}'.format(wind_speed)) header = 'Maximum wind speed (m/s)' flSaveFile(wind_speed, windSpeed, header, fmt='%6.2f')
def _windSpeed(self, windSpeed): """Extract maximum sustained wind speeds Input: windSpeed - array of windspeeds for TC observations Output: None - data is written to file """ self.logger.info('Extracting maximum sustained wind speeds') np.putmask(windSpeed, windSpeed > 200., sys.maxint) if self.ncflag: self.data['windspeed'] = windSpeed else: wind_speed = pjoin(self.processPath, 'wind_speed') self.logger.debug('Outputting data into %s' % wind_speed) header = 'Maximum wind speed (m/s)' flSaveFile(wind_speed, windSpeed, header, fmt='%6.2f')
def generateGenesisDateCDF(self, genDays, lonLat, bw=None, genesisKDE=None): """ Calculate the PDF of genesis day using KDEs. Since the data is periodic, we use a simple method to include the periodicity in estimating the PDF. We prepend and append the data to itself, then use the central third of the PDF and multiply by three to obtain the required PDF. Probably not quite exact, but it should be sufficient for our purposes. :param str genDays: Name of file containing genesis days (as day of year). :param lonLat: Array of genesis longitudes and latitudes. :param float bw: Optional. Bandwidth of the KDE to use. :param str genesisKDE: Optional. File name to save resulting CDF to. :type lonLat: :class:`numpy.ndarray` :returns: :class:`numpy.ndarray` containing the days, the PDF and CDF of the genesis days. """ data = flLoadFile(genDays) days = np.arange(1, 366) ndays = np.concatenate([days - 365, days, days + 365]) ndata = np.concatenate([data - 365, data, data + 365]) if bw is None: bw = KPDF.UPDFOptimumBandwidth(ndata) try: kdeMethod = getattr(KPDF, "UPDF%s" % self.kdeType) except AttributeError: LOG.exception(("Invalid input on option: " "KDE method UPDF%s does not exist"), self.kdeType) raise pdf = kdeMethod(ndata, ndays, bw) # Actual PDF to return apdf = 3.0 * pdf[365:730] cy = stats.cdf(days, apdf) if genesisKDE is None: return np.transpose(np.array(np.concatenate([days, apdf, cy]))) else: # Assume both kdeParameters and cdfParameters are defined as files: LOG.debug("Saving KDE and CDF data to files") flSaveFile(genesisKDE, np.transpose(np.array([days, cy])))
def generateSamples(self, ns, sample_parameter_path=None): """Generate random samples of cyclone initial parameters """ if ns <= 0: raise ValueError, 'invalid input on ns: number of sample cannot be zero or negative' unif_s = rand(ns) ind_kdf = self.xacy[:, 1].searchsorted(unif_s) self.sample = self.xacy[ind_kdf, 0] if sample_parameter_path: flSaveFile(sample_parameter_path, self.sample) else: return self.sample
def _rmaxRate(self, rmax, dt, indicator): """Extract the rate of size change from the rmax values. Entries corresponding to initial cyclone reports are set to maxint, as the change in rmax from the previous observation is undefined. Entries corresponding to records with no rmax observation are also set to maxint. Input: rmax - array of radii to maximum winds for TC observations dt - array of times between consecutive TC observations indicator - array of ones/zeros representing initial TC observations (including TCs with a single observation) Output: None - data is written to file """ self.logger.info('Extracting the rate of size change') # Change in rmax: rmaxChange_ = np.diff(rmax) rmaxChange = np.empty(indicator.size, 'f') rmaxChange[1:] = rmaxChange_ # Rate of rmax change: rmaxRate = rmaxChange / dt # Mask rates corresponding to initial times and times when # the rmax is known to be missing. self.logger.debug('Outputting data into {0}'.format( pjoin(self.processPath, 'rmax_rate'))) np.putmask(rmaxRate, indicator, sys.maxint) np.putmask(rmaxRate, rmax >= sys.maxint, sys.maxint) np.putmask(rmaxRate, (rmaxRate >= sys.maxint) | (rmaxRate <= -sys.maxint), sys.maxint) np.putmask(rmaxRate, np.isnan(rmaxRate), sys.maxint) if self.ncflag: self.data['rmaxRate'] = rmaxRate else: rmax_rate = pjoin(self.processPath, 'rmax_rate') header = 'All rmax change rates (km/hr)' flSaveFile(rmax_rate, rmaxRate, header, fmt='%6.2f')
def _pressureRate(self, pressure, dt, indicator): """Extract the rate of pressure change from the pressure values. Entries corresponding to initial cyclone reports are set to maxint, as the change in pressure from the previous observation is undefined. Entries corresponding to records with no pressure observation are also set to maxint. Input: pressure - array of central pressure observations for TC observations dt - array of times between consecutive TC observations indicator - array of ones/zeros representing initial TC observations (including TCs with a single observation) Output: None - data is written to file """ self.logger.info('Extracting the rate of pressure change') # Change in pressure: pressureChange_ = np.diff(pressure) pressureChange = np.empty(indicator.size, 'f') pressureChange[1:] = pressureChange_ # Rate of pressure change: pressureRate = pressureChange / dt # Mask rates corresponding to initial times, times when # the pressure is known to be missing, and when the # pressure rate is greater than 10 hPa/hour (a sanity check). # The highest rate of intensification on record is # Typhoon Forrest (Sept 1983) 100 mb in 24 hrs. np.putmask(pressureRate, indicator, sys.maxint) np.putmask(pressureRate, pressure >= sys.maxint, sys.maxint) np.putmask(pressureRate, np.isnan(pressureRate), sys.maxint) np.putmask(pressureRate, np.abs(pressureRate) > 10, sys.maxint) if self.ncflag: self.data['pressureRate'] = pressureRate else: pressure_rate = pjoin(self.processPath, 'pressure_rate') self.logger.debug('Outputting data into %s' % pressure_rate) header = 'All pressure change rates (hPa/hr)' flSaveFile(pressure_rate, pressureRate, header, fmt='%6.2f')
def _pressureRate(self, pressure, dt, indicator): """Extract the rate of pressure change from the pressure values. Entries corresponding to initial cyclone reports are set to maxint, as the change in pressure from the previous observation is undefined. Entries corresponding to records with no pressure observation are also set to maxint. Input: pressure - array of central pressure observations for TC observations dt - array of times between consecutive TC observations indicator - array of ones/zeros representing initial TC observations (including TCs with a single observation) Output: None - data is written to file """ self.logger.info('Extracting the rate of pressure change') # Change in pressure: pressureChange_ = np.diff(pressure) pressureChange = np.empty(indicator.size, 'f') pressureChange[1:] = pressureChange_ # Rate of pressure change: pressureRate = pressureChange / dt # Mask rates corresponding to initial times, times when # the pressure is known to be missing, and when the # pressure rate is greater than 10 hPa/hour (a sanity check). # The highest rate of intensification on record is # Typhoon Forrest (Sept 1983) 100 mb in 24 hrs. np.putmask(pressureRate, indicator, sys.maxint) np.putmask(pressureRate, pressure >= sys.maxint, sys.maxint) np.putmask(pressureRate, np.isnan(pressureRate), sys.maxint) np.putmask(pressureRate, np.abs(pressureRate) > 10, sys.maxint) if self.ncflag: self.data['pressureRate'] = pressureRate else: pressure_rate = pjoin(self.processPath, 'pressure_rate') self.logger.debug('Outputting data into {0}'.format(pressure_rate)) header = 'All pressure change rates (hPa/hr)' flSaveFile(pressure_rate, pressureRate, header, fmt='%6.2f')
def _speedRate(self, dist, dt, indicator): """Extract the rate of speed change for each cyclone: Note this results in some odd values for the accelerations, propagated from odd position reports. Entries corresponding to initial position reports and the second observation are set to maxint. The first entry is set to maxint as there is no speed associated with it and the second is therefore non-sensical. Input: dist - array of distances between consecutive TC observations dt - array of times between consecutive TC observations indicator - array of ones/zeros representing initial TC observations (including TCs with a single observation) Output: None - data is written to file """ self.logger.info( 'Extracting the rate of speed change for each cyclone') speed = dist / dt speedChange_ = np.diff(speed) speedChange = np.empty(indicator.size, 'd') speedChange[1:] = speedChange_ indicator_ = indicator.copy() np.putmask(indicator_, (speed < 0) | (speed > 200), 1) speedRate = speedChange / dt np.putmask(speedRate, indicator_, sys.maxint) np.putmask(speedRate[1:], indicator_[:-1], sys.maxint) np.putmask(speedRate, (speedRate >= sys.maxint) | (speedRate <= -sys.maxint), sys.maxint) np.putmask(speedRate, np.isnan(speedRate), sys.maxint) if self.ncflag: self.data['speedRate'] = speedRate else: speed_rate = pjoin(self.processPath, 'speed_rate') self.logger.debug('Outputting data into {0}'.format(speed_rate)) header = 'All speed change rates (km/hr/hr)' flSaveFile(speed_rate, speedRate, header, fmt='%6.2f')
def _speedRate(self, dist, dt, indicator): """Extract the rate of speed change for each cyclone: Note this results in some odd values for the accelerations, propagated from odd position reports. Entries corresponding to initial position reports and the second observation are set to maxint. The first entry is set to maxint as there is no speed associated with it and the second is therefore non-sensical. Input: dist - array of distances between consecutive TC observations dt - array of times between consecutive TC observations indicator - array of ones/zeros representing initial TC observations (including TCs with a single observation) Output: None - data is written to file """ self.logger.info( 'Extracting the rate of speed change for each cyclone') speed = dist / dt speedChange_ = np.diff(speed) speedChange = np.empty(indicator.size, 'd') speedChange[1:] = speedChange_ indicator_ = indicator.copy() np.putmask(indicator_, (speed < 0) | (speed > 200), 1) speedRate = speedChange / dt np.putmask(speedRate, indicator_, sys.maxint) np.putmask(speedRate[1:], indicator_[:-1], sys.maxint) np.putmask(speedRate, (speedRate >= sys.maxint) | (speedRate <= -sys.maxint), sys.maxint) np.putmask(speedRate, np.isnan(speedRate), sys.maxint) if self.ncflag: self.data['speedRate'] = speedRate else: speed_rate = pjoin(self.processPath, 'speed_rate') self.logger.debug('Outputting data into %s' % speed_rate) header = 'All speed change rates (km/hr/hr)' flSaveFile(speed_rate, speedRate, header, fmt='%6.2f')
def _rmaxRate(self, rmax, dt, indicator): """Extract the rate of size change from the rmax values. Entries corresponding to initial cyclone reports are set to maxint, as the change in rmax from the previous observation is undefined. Entries corresponding to records with no rmax observation are also set to maxint. Input: rmax - array of radii to maximum winds for TC observations dt - array of times between consecutive TC observations indicator - array of ones/zeros representing initial TC observations (including TCs with a single observation) Output: None - data is written to file """ self.logger.info('Extracting the rate of size change') # Change in rmax: rmaxChange_ = np.diff(rmax) rmaxChange = np.empty(indicator.size, 'f') rmaxChange[1:] = rmaxChange_ # Rate of rmax change: rmaxRate = rmaxChange / dt # Mask rates corresponding to initial times and times when # the rmax is known to be missing. self.logger.debug('Outputting data into %s' % pjoin(self.processPath, 'rmax_rate')) np.putmask(rmaxRate, indicator, sys.maxint) np.putmask(rmaxRate, rmax >= sys.maxint, sys.maxint) np.putmask(rmaxRate, (rmaxRate >= sys.maxint) | (rmaxRate <= -sys.maxint), sys.maxint) np.putmask(rmaxRate, np.isnan(rmaxRate), sys.maxint) if self.ncflag: self.data['rmaxRate'] = rmaxRate else: rmax_rate = pjoin(self.processPath, 'rmax_rate') header = 'All rmax change rates (km/hr)' flSaveFile(rmax_rate, rmaxRate, header, fmt='%6.2f')
def _frequency(self, years, indicator): # Generate a histogram of the annual frequency of events from the input # data self.logger.info('Extracting annual frequency of events') minYr = years.min() maxYr = years.max() genesisYears = years.compress(indicator) if minYr == maxYr: self.logger.info("First and last year of input data are the same") self.logger.info("Cannot generate histogram of frequency") else: frequency = pjoin(self.processPath, 'frequency') bins = np.arange(minYr, maxYr + 2, 1) n, b = np.histogram(genesisYears, bins) header = 'Year,count' flSaveFile(frequency, np.transpose([bins[:-1], n]), header, fmt='%6.2f') self.logger.info("Mean annual frequency: %5.1f" % np.mean(n)) self.logger.info("Standard deviation: %5.1f" % np.std(n))
def _bearingRate(self, bear, dt, indicator): """Extract the rate of bearing change for each cyclone: Entries corresponding to initial position reports and the second observation are set to maxint. The first entry is set to maxint as there is no bearing associated with it and the second entry is therefore non-sensical. Input: bear - array of bearings between consecutive TC observations dt - array of times between consecutive TC observations indicator - array of ones/zeros representing initial TC observations (including TCs with a single observation) Output: None - data is written to file """ self.logger.info('Extracting the rate of bearing change') bearingChange_ = np.diff(bear) ii = np.where((bearingChange_ > 180.)) jj = np.where((bearingChange_ < -180.)) bearingChange_[ii] -= 360. bearingChange_[jj] += 360. bearingChange = np.empty(indicator.size, 'd') bearingChange[1:] = bearingChange_ bearingRate = bearingChange / dt np.putmask(bearingRate, indicator, sys.maxint) np.putmask(bearingRate[1:], indicator[:-1], sys.maxint) np.putmask(bearingRate, (bearingRate >= sys.maxint) | (bearingRate <= -sys.maxint), sys.maxint) np.putmask(bearingRate, np.isnan(bearingRate), sys.maxint) if self.ncflag: self.data['bearingRate'] = bearingRate else: bearing_rate = pjoin(self.processPath, 'bearing_rate') self.logger.debug('Outputting data into %s' % bearing_rate) header = 'All bearing change rates (degrees/hr)' flSaveFile(bearing_rate, bearingRate, header, fmt='%6.2f')
def _bearingRate(self, bear, dt, indicator): """Extract the rate of bearing change for each cyclone: Entries corresponding to initial position reports and the second observation are set to maxint. The first entry is set to maxint as there is no bearing associated with it and the second entry is therefore non-sensical. Input: bear - array of bearings between consecutive TC observations dt - array of times between consecutive TC observations indicator - array of ones/zeros representing initial TC observations (including TCs with a single observation) Output: None - data is written to file """ self.logger.info('Extracting the rate of bearing change') bearingChange_ = np.diff(bear) ii = np.where((bearingChange_ > 180.)) jj = np.where((bearingChange_ < -180.)) bearingChange_[ii] -= 360. bearingChange_[jj] += 360. bearingChange = np.empty(indicator.size, 'd') bearingChange[1:] = bearingChange_ bearingRate = bearingChange / dt np.putmask(bearingRate, indicator, sys.maxint) np.putmask(bearingRate[1:], indicator[:-1], sys.maxint) np.putmask(bearingRate, (bearingRate >= sys.maxint) | (bearingRate <= -sys.maxint), sys.maxint) np.putmask(bearingRate, np.isnan(bearingRate), sys.maxint) if self.ncflag: self.data['bearingRate'] = bearingRate else: bearing_rate = pjoin(self.processPath, 'bearing_rate') self.logger.debug('Outputting data into {0}'.format(bearing_rate)) header = 'All bearing change rates (degrees/hr)' flSaveFile(bearing_rate, bearingRate, header, fmt='%6.2f')
def _juliandays(self, jdays, indicator, years): """ Generate a distribution of the formation day of year from observations """ self.logger.info("Calculating annual distribution of observations") # Do a bodgy job of addressing 29th of February (there surely # must be a recommended way of accounting for leap years) for i in range(len(jdays)): if (years[i] % 4 == 0) and (jdays[i] >= 60): jdays[i] -= 1 bins = np.arange(1, 367) n, b = np.histogram(jdays.compress(indicator), bins) header = 'Day,count' jday_genesis = pjoin(self.processPath, 'jday_genesis') jday_observations = pjoin(self.processPath, 'jday_obs') jday = pjoin(self.processPath, 'jdays') # Distribution of genesis days (histogram): flSaveFile(jday_genesis, np.transpose([bins[:-1], n]), header, fmt='%d', delimiter=',') n, b = np.histogram(jdays, bins) # Distribution of all days (histogram): flSaveFile(jday_observations, np.transpose([bins[:-1], n]), header, fmt='%d', delimiter=',') # All days: flSaveFile(jday, np.transpose(jdays.compress(indicator)), header='Day', fmt='%d')
def _rmax(self, rmax, indicator): """Extract radii to maximum wind: Input: rmax - array of radii to maximum winds for TC observations indicator - array of ones/zeros representing initial TC observations (including TCs with a single observation) Output: None - data is written to file """ self.logger.info("Extracting radii to maximum winds") initrmax = rmax.compress(indicator) rmaxNoInit = rmax.compress(indicator == 0) rmaxNoInit = rmaxNoInit.compress(rmaxNoInit < sys.maxint) if self.ncflag: self.data['rmax'] = rmax self.data['init_rmax'] = initrmax self.data['rmax_no_init'] = rmaxNoInit else: init_rmax = pjoin(self.processPath, 'init_rmax') all_rmax = pjoin(self.processPath, 'all_rmax') rmax_no_init = pjoin(self.processPath, 'rmax_no_init') # extract all rmax self.logger.debug('Outputting data into {0}'.format(all_rmax)) header = 'rMax (km)' flSaveFile(all_rmax, rmax, header, fmt='%6.2f') # extract initial rmax self.logger.debug('Outputting data into {0}'.format(init_rmax)) header = 'initial rmax (km)' flSaveFile(init_rmax, initrmax, header, fmt='%6.2f') # extract rmax no init self.logger.debug('Outputting data into {0}'.format(rmax_no_init)) header = 'rmax excluding initial ones (km)' flSaveFile(rmax_no_init, rmaxNoInit, header, fmt='%6.2f')
def _rmax(self, rmax, indicator): """Extract radii to maximum wind: Input: rmax - array of radii to maximum winds for TC observations indicator - array of ones/zeros representing initial TC observations (including TCs with a single observation) Output: None - data is written to file """ self.logger.info("Extracting radii to maximum winds") initrmax = rmax.compress(indicator) rmaxNoInit = rmax.compress(indicator == 0) rmaxNoInit = rmaxNoInit.compress(rmaxNoInit < sys.maxint) if self.ncflag: self.data['rmax'] = rmax self.data['init_rmax'] = initrmax self.data['rmax_no_init'] = rmaxNoInit else: init_rmax = pjoin(self.processPath, 'init_rmax') all_rmax = pjoin(self.processPath, 'all_rmax') rmax_no_init = pjoin(self.processPath, 'rmax_no_init') # extract all rmax self.logger.debug('Outputting data into %s' % all_rmax) header = 'rMax (km)' flSaveFile(all_rmax, rmax, header, fmt='%6.2f') # extract initial rmax self.logger.debug('Outputting data into %s' % init_rmax) header = 'initial rmax (km)' flSaveFile(init_rmax, initrmax, header, fmt='%6.2f') # extract rmax no init self.logger.debug('Outputting data into %s' % rmax_no_init) header = 'rmax excluding initial ones (km)' flSaveFile(rmax_no_init, rmaxNoInit, header, fmt='%6.2f')
def _frequency(self, years, indicator): """ Generate a histogram of the annual frequency of events from the input data """ self.logger.info('Extracting annual frequency of events') minYr = years.min() maxYr = years.max() genesisYears = years.compress(indicator) if minYr == maxYr: self.logger.info("First and last year of input data are the same") self.logger.info("Cannot generate histogram of frequency") else: frequency = pjoin(self.processPath, 'frequency') bins = np.arange(minYr, maxYr + 2, 1) n, b = np.histogram(genesisYears, bins) header = 'Year,count' flSaveFile(frequency, np.transpose([bins[:-1], n]), header, fmt='%6.2f') self.logger.info("Mean annual frequency: ", np.mean(n)) self.logger.info("Standard deviation: ", np.std(n))
def generateSamples(self, ns, outputFile=None): """Generate random samples of cyclone origins""" if ns <= 0: self.logger.error('Invalid input on ns: number of sample cannot be zero or negative') raise ValueError # Generate 2 vectors of uniform random variables unifX = rand(ns) unifY = rand(ns) self.oLon = empty(ns, 'd') self.oLat = empty(ns, 'd') # For each random variable try: for i in xrange(ns): xi = self.cdfX.searchsorted(unifX[i]) yj = self.cdfY[xi, :].searchsorted(unifY[i]) if (i % (ns/100)) == 0 and i != 0: self.logger.debug("Processing %ith element"%i) self.oLon[i] = self.x[xi] self.oLat[i] = self.y[yj] except IndexError: self.logger.debug("i = %s"%str(i)) self.logger.debug("unifX = %s"%str(unifX[i])) self.logger.debug("unifY = %s"%str(unifY[i])) self.logger.debug("cdfY[xi,:] = %s"%str(self.cdfY[xi, :])) raise if outputFile: flSaveFile(outputFile, transpose([self.oLon, self.oLat]), fmt='%2.3f', header='Origin Lon, Origin Lat', delimiter=',') else: lonLat = empty([ns, 2], 'd') lonLat[:, 0] = self.oLon lonLat[:, 1] = self.oLat return lonLat
def minPressureLat(self, pAllData, latData, latMin=-40., latMax=0.): """ Plot the minimum central pressures as a function of latitude """ rLat = numpy.round(latData, 0) lats = numpy.arange(latMin, latMax + 0.1, 1) minP = numpy.zeros(len(lats)) n = 0 for l in lats: i = numpy.where(rLat == l)[0] if len(i > 0): pvals = pAllData[i] pvals = stats.statRemoveNum(pvals, 0) if len(pvals)>0: minP[n] = pvals.min() else: minP[n] = 1020. else: minP[n] = 1020. n += 1 pyplot.figure(self.figurenum()) pyplot.plot(lats, minP, 'r-', linewidth=2, label=r'Min $P_{centre}$') pyplot.xlim(latMin, latMax) pyplot.ylim(800, 1020) pyplot.xlabel('Latitude', fontsize=10) pyplot.ylabel('Minimum central pressure (hPa)', fontsize=10) pyplot.legend(loc=3) pyplot.grid(True) self.savefig("min_pressure_lat") x = numpy.zeros((len(lats), 2)) x[:, 0] = lats x[:, 1] = minP files.flSaveFile(os.path.join(self.outpath, 'min_pressure_lat.csv'), x, delimiter=',', fmt='%6.2f')
def _speed(self, dist, dt, indicator, initIndex): """ Extract speeds for all obs, initial obs and TC origins Input: dist - array of distances between consecutive TC observations dt - array of times between consecutive TC observations indicator - array of ones/zeros representing initial TC observations (including TCs with a single observation) initIndex - array of ones/zeros representing initial TC observations (excluding TCs with a single observation) Output: None - data is written to file """ self.logger.info('Extracting speeds') speed = dist / dt # Delete speeds less than 0, greated than 200, # or where indicator == 1. np.putmask(speed, (speed < 0) | (speed > 200) | indicator, sys.maxint) np.putmask(speed, np.isnan(speed), sys.maxint) initSpeedIndex = np.flatnonzero(initIndex[:-1]) + 1 initSpeed = speed.take(initSpeedIndex) indicator_ = indicator.copy() indicator_.put(initSpeedIndex, 1) speedNoInit = speed.compress(indicator_ == 0) if self.ncflag: self.data['speed'] = speed self.data['init_speed'] = initSpeed self.data['speed_no_init'] = speedNoInit else: init_speed = pjoin(self.processPath, 'init_speed') all_speed = pjoin(self.processPath, 'all_speed') speed_no_init = pjoin(self.processPath, 'speed_no_init') # Extract all speeds self.logger.debug('Outputting data into %s' % all_speed) header = 'all cyclone speed in km/hour' flSaveFile(all_speed, speed, header, fmt='%6.2f') # Extract initial speeds self.logger.debug('Outputting data into %s' % init_speed) header = 'initial cyclone speed in km/hour' flSaveFile(init_speed, initSpeed, header, fmt='%f') # Extract speeds, excluding initial speeds self.logger.debug('Outputting data into %s' % speed_no_init) header = 'cyclone speed without initial ones in km/hour' flSaveFile(speed_no_init, speedNoInit, header, fmt='%6.2f')
def _speed(self, dist, dt, indicator, initIndex): """ Extract speeds for all obs, initial obs and TC origins Input: dist - array of distances between consecutive TC observations dt - array of times between consecutive TC observations indicator - array of ones/zeros representing initial TC observations (including TCs with a single observation) initIndex - array of ones/zeros representing initial TC observations (excluding TCs with a single observation) Output: None - data is written to file """ self.logger.info('Extracting speeds') speed = dist / dt # Delete speeds less than 0, greated than 200, # or where indicator == 1. np.putmask(speed, (speed < 0) | (speed > 200) | indicator, sys.maxint) np.putmask(speed, np.isnan(speed), sys.maxint) initSpeedIndex = np.flatnonzero(initIndex[:-1]) + 1 initSpeed = speed.take(initSpeedIndex) indicator_ = indicator.copy() indicator_.put(initSpeedIndex, 1) speedNoInit = speed.compress(indicator_ == 0) if self.ncflag: self.data['speed'] = speed self.data['init_speed'] = initSpeed self.data['speed_no_init'] = speedNoInit else: init_speed = pjoin(self.processPath, 'init_speed') all_speed = pjoin(self.processPath, 'all_speed') speed_no_init = pjoin(self.processPath, 'speed_no_init') # Extract all speeds self.logger.debug('Outputting data into ', all_speed) header = 'all cyclone speed in m/s' flSaveFile(all_speed, speed, header, fmt='%6.2f') # Extract initial speeds self.logger.debug('Outputting data into ', init_speed) header = 'initial cyclone speed in m/s' flSaveFile(init_speed, initSpeed, header, fmt='%f') # Extract speeds, excluding initial speeds self.logger.debug('Outputting data into ', speed_no_init) header = 'cyclone speed without initial ones in m/s' flSaveFile(speed_no_init, speedNoInit, header, fmt='%6.2f')
def _bearing(self, bear, indicator, initIndex): """ Extract bearings for all obs, initial obs and TC origins Input: bear - array of bearing of TC observations indicator - array of ones/zeros representing initial TC observations (including TCs with a single observation) initIndex - array of ones/zeros representing initial TC observations (excluding TCs with a single observation) Output: None - data is written to file """ self.logger.info('Extracting bearings') # extract all bearings np.putmask(bear, indicator, sys.maxint) # extract initial bearings initBearingIndex = np.flatnonzero(initIndex[:-1]) + 1 initBearing = bear.take(initBearingIndex) # extract non-initial bearings indicator_ = indicator.copy() indicator_.put(initBearingIndex, 1) bearingNoInit = bear.compress(indicator_ == 0) if self.ncflag: self.data['bearing'] = bear self.data['init_bearing'] = initBearing self.data['bearing_no_init'] = bearingNoInit else: all_bearing = pjoin(self.processPath, 'all_bearing') self.logger.debug('Outputting data into {0}'.format(all_bearing)) header = 'all cyclone bearing in degrees' flSaveFile(all_bearing, bear, header, fmt='%6.2f') init_bearing = pjoin(self.processPath, 'init_bearing') self.logger.debug('Outputting data into {0}'.format(init_bearing)) header = 'initial cyclone bearing in degrees' flSaveFile(init_bearing, initBearing, header, fmt='%6.2f') bearing_no_init = pjoin(self.processPath, 'bearing_no_init') self.logger.debug( 'Outputting data into {0}'.format(bearing_no_init)) header = 'cyclone bearings without initial ones in degrees' flSaveFile(bearing_no_init, bearingNoInit, header, fmt='%6.2f')
def _bearing(self, bear, indicator, initIndex): """ Extract bearings for all obs, initial obs and TC origins Input: bear - array of bearing of TC observations indicator - array of ones/zeros representing initial TC observations (including TCs with a single observation) initIndex - array of ones/zeros representing initial TC observations (excluding TCs with a single observation) Output: None - data is written to file """ self.logger.info('Extracting bearings') # extract all bearings np.putmask(bear, indicator, sys.maxint) # extract initial bearings initBearingIndex = np.flatnonzero(initIndex[:-1]) + 1 initBearing = bear.take(initBearingIndex) # extract non-initial bearings indicator_ = indicator.copy() indicator_.put(initBearingIndex, 1) bearingNoInit = bear.compress(indicator_ == 0) if self.ncflag: self.data['bearing'] = bear self.data['init_bearing'] = initBearing self.data['bearing_no_init'] = bearingNoInit else: all_bearing = pjoin(self.processPath, 'all_bearing') self.logger.debug('Outputting data into %s' % all_bearing) header = 'all cyclone bearing in degrees' flSaveFile(all_bearing, bear, header, fmt='%6.2f') init_bearing = pjoin(self.processPath, 'init_bearing') self.logger.debug('Outputting data into %s' % init_bearing) header = 'initial cyclone bearing in degrees' flSaveFile(init_bearing, initBearing, header, fmt='%6.2f') bearing_no_init = pjoin(self.processPath, 'bearing_no_init') self.logger.debug('Outputting data into %s' % bearing_no_init) header = 'cyclone bearings without initial ones in degrees' flSaveFile(bearing_no_init, bearingNoInit, header, fmt='%6.2f')
def _pressure(self, pressure, indicator): """Extract pressure for all obs, initial obs and TC origins Input: pressure - array of central pressure observations for TC observations indicator - array of ones/zeros representing initial TC observations (including TCs with a single observation) initIndex - array of ones/zeros representing initial TC observations (excluding TCs with a single observation) Output: None - data is written to file """ self.logger.info('Extracting pressures') initPressure = pressure.compress(indicator) pressureNoInit = pressure.compress(indicator == 0) pressureNoInit = pressureNoInit.compress(pressureNoInit < sys.maxint) if self.ncflag: self.data['pressure'] = pressure self.data['init_pressure'] = initPressure self.data['pressure_no_init'] = pressureNoInit else: init_pressure = pjoin(self.processPath, 'init_pressure') all_pressure = pjoin(self.processPath, 'all_pressure') pressure_no_init = pjoin(self.processPath, 'pressure_no_init') # Extract all pressure self.logger.debug('Outputting data into {0}'.format(all_pressure)) header = 'all cyclone pressure in hPa' flSaveFile(all_pressure, pressure, header, fmt='%7.2f') # Extract initial pressures self.logger.debug('Outputting data into {0}'.format(init_pressure)) header = 'initial cyclone pressure in hPa' flSaveFile(init_pressure, initPressure, header, fmt='%7.2f') # Extract pressures, excluding initial times self.logger.debug( 'Outputting data into {0}'.format(pressure_no_init)) header = 'cyclone pressure without initial ones in hPa' flSaveFile(pressure_no_init, pressureNoInit, header, fmt='%7.2f')
def _pressure(self, pressure, indicator): """Extract pressure for all obs, initial obs and TC origins Input: pressure - array of central pressure observations for TC observations indicator - array of ones/zeros representing initial TC observations (including TCs with a single observation) initIndex - array of ones/zeros representing initial TC observations (excluding TCs with a single observation) Output: None - data is written to file """ self.logger.info('Extracting pressures') initPressure = pressure.compress(indicator) pressureNoInit = pressure.compress(indicator == 0) pressureNoInit = pressureNoInit.compress(pressureNoInit < sys.maxint) if self.ncflag: self.data['pressure'] = pressure self.data['init_pressure'] = initPressure self.data['pressure_no_init'] = pressureNoInit else: init_pressure = pjoin(self.processPath, 'init_pressure') all_pressure = pjoin(self.processPath, 'all_pressure') pressure_no_init = pjoin(self.processPath, 'pressure_no_init') # Extract all pressure self.logger.debug('Outputting data into %s' % all_pressure) header = 'all cyclone pressure in hPa' flSaveFile(all_pressure, pressure, header, fmt='%7.2f') # Extract initial pressures self.logger.debug('Outputting data into %s' % init_pressure) header = 'initial cyclone pressure in hPa' flSaveFile(init_pressure, initPressure, header, fmt='%7.2f') # Extract pressures, excluding initial times self.logger.debug('Outputting data into %s' % pressure_no_init) header = 'cyclone pressure without initial ones in hPa' flSaveFile(pressure_no_init, pressureNoInit, header, fmt='%7.2f')
def generateKDE(self, parameters, kdeStep, kdeParameters=None, cdfParameters=None, angular=False, periodic=False, missingValue=sys.maxint): """ Generate a PDF and CDF for a given parameter set using the method of kernel density estimators. Optionally return the PDF and CDF as an array, or write both to separate files. """ self.logger.debug("Running generateKDE") if type(parameters) is str: self.parameters = stats.statRemoveNum(flLoadFile(parameters, '%', ','), missingValue) else: if parameters.size <= 1: self.logger.error("Insufficient members in parameter list") raise IndexError, "Insufficient members in parameter list" self.parameters = stats.statRemoveNum(parameters, missingValue) if angular: xmin = 0.0 xmax = 360.0 elif periodic: xmin = 0.0 xmax = periodic else: xmin = self.parameters.min() xmax = self.parameters.max() self.logger.debug("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f" % (xmin, xmax, kdeStep)) if periodic: x = np.arange(1, periodic + 1, kdeStep) self.grid = np.concatenate( [x - periodic, x, x + periodic] ) self.parameters = np.concatenate([self.parameters - periodic, self.parameters, self.parameters + periodic]) else: self.grid = np.arange(xmin, xmax, kdeStep) if self.grid.size<2: self.logger.critical("Grid for CDF generation is a single value") self.logger.critical("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f" % (xmin, xmax,kdeStep)) raise ValueError bw = KPDF.UPDFOptimumBandwidth(self.parameters) self.pdf = self._generatePDF(self.grid, bw, self.parameters) if periodic: self.pdf = 3.0*self.pdf[(periodic/kdeStep):2*(periodic/kdeStep)] self.grid = self.grid[(periodic/kdeStep):2*(periodic/kdeStep)] self.cy = stats.cdf(self.grid, self.pdf) if kdeParameters is None: return np.transpose(np.array([self.grid, self.pdf, self.cy])) else: # Assume both kdeParameters and cdfParameters are defined as files: self.logger.debug("Saving KDE and CDF data to files") flSaveFile(kdeParameters, np.transpose(np.array([self.grid, self.pdf]))) flSaveFile(cdfParameters, np.transpose(np.array([self.grid, self.cy])))
def allDistributions(self, lonLat, parameterList, parameterName=None, kdeStep=0.1, angular=False, periodic=False, plotParam=False): """ Calculate a distribution for each individual cell and store in a file or return the distribution. :param lonLat: The longitude/latitude of all observations in the model domain. If a string is given, then it is the path to a file containing the longitude/latitude information. If an array is given, then it should be a 2-d array containing the data values. :type lonLat: str or :class:`numpy.ndarray` :param parameterList: Parameter values. If a string is given, then it is the path to a file containing the values. If an array is passed, then it should hold the parameter values. :type parameterList: str or :class:`numpy.ndarray` :param str parameterName: Optional. If given, then the cell distributions will be saved to a file with this name. If absent, the distribution values are returned. :param kdeStep: Increment of the ordinate values at which the distributions will be calculated. :type kdeStep: float, default=`0.1` :param angular: Does the data represent an angular measure (e.g. bearing). :type angular: boolean, default=``False`` :param periodic: Does the data represent some form of periodic data (e.g. day of year). If given, it should be the period of the data (e.g. for annual data, ``periodic=365``). :type periodic: boolean or float, default=``False`` :param boolean plotParam: Plot the parameters. Default is ``False``. :returns: If no ``parameterName`` is given returns ``None`` (data are saved to file), otherwise :class:`numpy.ndarray`. """ if parameterName: self.logger.debug("Running allDistributions for %s"%parameterName) else: self.logger.debug("Running allDistributions") if type(lonLat) is str: self.logger.debug("Loading lat/lon data from file") self.lonLat = np.array(flLoadFile(lonLat, delimiter=',')) else: self.lonLat = lonLat if type(parameterList) is str: self.logger.debug("Loading parameter data from file: %s" % parameterList) self.pList = np.array(flLoadFile(parameterList)) else: self.pList = parameterList self.pName = parameterName maxCellNum = stats.maxCellNum(self.gridLimit, self.gridSpace) # Writing CDF dataset for all individual cell number into files self.logger.debug("Writing CDF dataset for all individual cells into files") for cellNum in xrange(0, maxCellNum + 1): self.logger.debug("Processing cell number %i"%cellNum) # Generate cyclone parameter data for the cell number self.extractParameter(cellNum) # Estimate cyclone parameter data using KDE # The returned array contains the grid, the PDF and the CDF cdf = self.kdeParameter.generateKDE(self.parameter, kdeStep, angular=angular, periodic=periodic) if plotParam: self._plotParameter(cellNum, kdeStep) self.logger.debug('size of parameter array = %d: size of cdf array = %d' % (self.parameter.size,cdf.size)) cellNumlist = [] for i in range(len(cdf)): cellNumlist.append(cellNum) if cellNum == 0: results = np.transpose(np.array([cellNumlist, cdf[:,0], cdf[:,2]])) else: self.logger.debug('size of results array = %s'%str(results.size)) results = np.concatenate((results, np.transpose(np.array([cellNumlist, cdf[:,0], cdf[:,2]])))) if parameterName == None: self.logger.debug("Returning CDF dataset for all individual cell numbers") return results else: cdfHeader = "Cell_Number, CDF_" + self.pName + "_x, CDF_" + \ self.pName + "_y" allCellCdfOutput = pjoin(self.outputPath, 'process', 'all_cell_cdf_' + self.pName) args = {"filename":allCellCdfOutput, "data":results, "header":cdfHeader, "delimiter":",", "fmt":"%f"} self.logger.debug("Writing CDF dataset for all individual cell numbers into files") flSaveFile(**args) # Save to netcdf too filename = allCellCdfOutput + '.nc' ncdf = Dataset(filename, 'w') ncdf.createDimension('cell', len(results[:,0])) cell = ncdf.createVariable('cell', 'i', ('cell',)) cell[:] = results[:,0] x = ncdf.createVariable('x', 'f', ('cell',)) x[:] = results[:,1] y = ncdf.createVariable('CDF', 'f', ('cell',)) y[:] = results[:,2] ncdf.close()
def processData(self, restrictToWindfieldDomain=False): """ Process raw data into ASCII files that can be read by the main components of the system :param bool restrictToWindfieldDomain: if True, only process data within the wind field domain, otherwise, process data from across the track generation domain. """ config = ConfigParser() config.read(self.configFile) self.logger.info("Running %s" % flModuleName()) if config.has_option('DataProcess', 'InputFile'): inputFile = config.get('DataProcess', 'InputFile') if config.has_option('DataProcess', 'Source'): source = config.get('DataProcess', 'Source') self.logger.info('Loading %s dataset', source) fn = config.get(source, 'filename') path = config.get(source, 'path') inputFile = pjoin(path, fn) # If input file has no path information, default to tcrm input folder if len(os.path.dirname(inputFile)) == 0: inputFile = pjoin(self.tcrm_input_dir, inputFile) self.logger.info("Processing %s" % inputFile) self.source = config.get('DataProcess', 'Source') inputData = colReadCSV(self.configFile, inputFile, self.source) inputSpeedUnits = config.get(self.source, 'SpeedUnits') inputPressureUnits = config.get(self.source, 'PressureUnits') inputLengthUnits = config.get(self.source, 'LengthUnits') startSeason = config.getint('DataProcess', 'StartSeason') indicator = loadData.getInitialPositions(inputData) lat = np.array(inputData['lat'], 'd') lon = np.mod(np.array(inputData['lon'], 'd'), 360) if restrictToWindfieldDomain: # Filter the input arrays to only retain the tracks that # pass through the windfield domain. CD = CalcTrackDomain(self.configFile) self.domain = CD.calcDomainFromTracks(indicator, lon, lat) domainIndex = self.extractTracks(indicator, lon, lat) inputData = inputData[domainIndex] indicator = indicator[domainIndex] lon = lon[domainIndex] lat = lat[domainIndex] if self.progressbar is not None: self.progressbar.update(0.125) # Sort date/time information try: dt = np.empty(indicator.size, 'f') dt[1:] = np.diff(inputData['age']) except (ValueError, KeyError): try: self.logger.info("Filtering input data by season: season > %d"%startSeason) # Find indicies that satisfy minimum season filter idx = np.where(inputData['season'] >= startSeason)[0] # Filter records: inputData = inputData[idx] indicator = indicator[idx] lon = lon[idx] lat = lat[idx] except (ValueError, KeyError): pass year, month, day, hour, minute, datetimes \ = loadData.parseDates(inputData, indicator) # Time between observations: dt = loadData.getTimeDelta(year, month, day, hour, minute) # Calculate julian days: jdays = loadData.julianDays(year, month, day, hour, minute) delta_lon = np.diff(lon) delta_lat = np.diff(lat) # Split into separate tracks if large jump occurs (delta_lon > # 15 degrees or delta_lat > 5 degrees) This avoids two tracks # being accidentally combined when seasons and track numbers # match but basins are different as occurs in the IBTrACS # dataset. This problem can also be prevented if the # 'tcserialno' column is specified. indicator[np.where(delta_lon > 15)[0] + 1] = 1 indicator[np.where(delta_lat > 5)[0] + 1] = 1 # Save information required for frequency auto-calculation try: origin_seasonOrYear = np.array( inputData['season'], 'i').compress(indicator) header = 'Season' except (ValueError, KeyError): origin_seasonOrYear = year.compress(indicator) header = 'Year' flSaveFile(self.origin_year, np.transpose(origin_seasonOrYear), header, ',', fmt='%d') pressure = np.array(inputData['pressure'], 'd') novalue_index = np.where(pressure == sys.maxint) pressure = metutils.convert(pressure, inputPressureUnits, "hPa") pressure[novalue_index] = sys.maxint # Convert any non-physical central pressure values to maximum integer # This is required because IBTrACS has a mix of missing value codes # (i.e. -999, 0, 9999) in the same global dataset. pressure = np.where((pressure < 600) | (pressure > 1100), sys.maxint, pressure) if self.progressbar is not None: self.progressbar.update(0.25) try: vmax = np.array(inputData['vmax'], 'd') except (ValueError, KeyError): self.logger.warning("No max wind speed data") vmax = np.empty(indicator.size, 'f') else: novalue_index = np.where(vmax == sys.maxint) vmax = metutils.convert(vmax, inputSpeedUnits, "mps") vmax[novalue_index] = sys.maxint assert lat.size == indicator.size assert lon.size == indicator.size assert pressure.size == indicator.size #assert vmax.size == indicator.size try: rmax = np.array(inputData['rmax']) novalue_index = np.where(rmax == sys.maxint) rmax = metutils.convert(rmax, inputLengthUnits, "km") rmax[novalue_index] = sys.maxint self._rmax(rmax, indicator) self._rmaxRate(rmax, dt, indicator) except (ValueError, KeyError): self.logger.warning("No rmax data available") if self.ncflag: self.data['index'] = indicator # ieast : parameter used in latLon2Azi # FIXME: should be a config setting describing the input data. ieast = 1 # Determine the index of initial cyclone observations, excluding # those cyclones that have only one observation. This is used # for calculating initial bearing and speed indicator2 = np.where(indicator > 0, 1, 0) initIndex = np.concatenate([np.where(np.diff(indicator2) == -1, 1, 0), [0]]) # Calculate the bearing and distance (km) of every two # consecutive records using ll2azi bear_, dist_ = maputils.latLon2Azi(lat, lon, ieast, azimuth=0) assert bear_.size == indicator.size - 1 assert dist_.size == indicator.size - 1 bear = np.empty(indicator.size, 'f') bear[1:] = bear_ dist = np.empty(indicator.size, 'f') dist[1:] = dist_ self._lonLat(lon, lat, indicator, initIndex) self._bearing(bear, indicator, initIndex) self._bearingRate(bear, dt, indicator) if self.progressbar is not None: self.progressbar.update(0.375) self._speed(dist, dt, indicator, initIndex) self._speedRate(dist, dt, indicator) self._pressure(pressure, indicator) self._pressureRate(pressure, dt, indicator) self._windSpeed(vmax) try: self._frequency(year, indicator) self._juliandays(jdays, indicator, year) except (ValueError, KeyError): pass self.logger.info("Completed %s" % flModuleName()) if self.progressbar is not None: self.progressbar.update(0.5)
def _lonLat(self, lon, lat, indicator, initIndex): """ Extract longitudes and latitudes for all obs, initial obs, TC origins and determine a land/sea flag indicating if the TC position is over land or sea. Input: lon - array of TC longitudes lat - array of TC latitudes indicator - array of ones/zeros representing initial TC observations, including TCs with a single observation initIndex - array of ones/zeros representing initial TC observations, excluding TCs with a single observation Output: None - data is written to file """ self.logger.info('Extracting longitudes and latitudes') lsflag = np.zeros(len(lon)) for i, [x, y] in enumerate(zip(lon, lat)): if self.landmask.sampleGrid(x, y) > 0: lsflag[i] = 1 lonOne = lon.compress(indicator) latOne = lat.compress(indicator) lsflagOne = lsflag.compress(indicator) lonInit = lon.compress(initIndex) latInit = lat.compress(initIndex) lsflagInit = lsflag.compress(initIndex) origin_lon_lat = pjoin(self.processPath, 'origin_lon_lat') init_lon_lat = pjoin(self.processPath, 'init_lon_lat') all_lon_lat = pjoin(self.processPath, 'all_lon_lat') # Output the lon & lat of cyclone origins self.logger.debug('Outputting data into %s' % init_lon_lat) self.logger.debug('Outputting data into %s' % origin_lon_lat) self.logger.debug('Outputting data into %s' % all_lon_lat) header = 'Longitude, Latitude, LSFlag' if self.ncflag: self.data['longitude'] = lon self.data['latitude'] = lat self.data['lsflag'] = lsflag else: flSaveFile(origin_lon_lat, np.transpose([lonOne, latOne, lsflagOne]), header, ',', fmt='%6.2f') flSaveFile(init_lon_lat, np.transpose([lonInit, latInit, lsflagInit]), header, ',', fmt='%6.2f') flSaveFile(all_lon_lat, np.transpose([lon, lat, lsflag]), header, ',', fmt='%6.2f') # Output all cyclone positions: cyclone_tracks = pjoin(self.processPath, 'cyclone_tracks') self.logger.debug('Outputting data into %s' % cyclone_tracks) header = 'Cyclone Origin,Longitude,Latitude, LSflag' flSaveFile(cyclone_tracks, np.transpose([indicator, lon, lat, lsflag]), header, ',', fmt='%6.2f')
def _lonLat(self, lon, lat, indicator, initIndex): """ Extract longitudes and latitudes for all obs, initial obs, TC origins and determine a land/sea flag indicating if the TC position is over land or sea. Input: lon - array of TC longitudes lat - array of TC latitudes indicator - array of ones/zeros representing initial TC observations, including TCs with a single observation initIndex - array of ones/zeros representing initial TC observations, excluding TCs with a single observation Output: None - data is written to file """ self.logger.info('Extracting longitudes and latitudes') lsflag = np.zeros(len(lon)) for i, [x, y] in enumerate(zip(lon, lat)): if self.landmask.sampleGrid(x, y) > 0: lsflag[i] = 1 lonOne = lon.compress(indicator) latOne = lat.compress(indicator) lsflagOne = lsflag.compress(indicator) lonInit = lon.compress(initIndex) latInit = lat.compress(initIndex) lsflagInit = lsflag.compress(initIndex) origin_lon_lat = pjoin(self.processPath, 'origin_lon_lat') init_lon_lat = pjoin(self.processPath, 'init_lon_lat') all_lon_lat = pjoin(self.processPath, 'all_lon_lat') # Output the lon & lat of cyclone origins self.logger.debug('Outputting data into {0}'.format(init_lon_lat)) self.logger.debug('Outputting data into {0}'.format(origin_lon_lat)) self.logger.debug('Outputting data into {0}'.format(all_lon_lat)) header = 'Longitude, Latitude, LSFlag' if self.ncflag: self.data['longitude'] = lon self.data['latitude'] = lat self.data['lsflag'] = lsflag else: flSaveFile(origin_lon_lat, np.transpose([lonOne, latOne, lsflagOne]), header, ',', fmt='%6.2f') flSaveFile(init_lon_lat, np.transpose([lonInit, latInit, lsflagInit]), header, ',', fmt='%6.2f') flSaveFile(all_lon_lat, np.transpose([lon, lat, lsflag]), header, ',', fmt='%6.2f') # Output all cyclone positions: cyclone_tracks = pjoin(self.processPath, 'cyclone_tracks') self.logger.debug( 'Outputting data into {0}'.format(cyclone_tracks)) header = 'Cyclone Origin,Longitude,Latitude, LSflag' flSaveFile(cyclone_tracks, np.transpose([indicator, lon, lat, lsflag]), header, ',', fmt='%6.2f')
def processData(self, restrictToWindfieldDomain=False): """ Process raw data into ASCII files that can be read by the main components of the system :param bool restrictToWindfieldDomain: if True, only process data within the wind field domain, otherwise, process data from across the track generation domain. """ config = ConfigParser() config.read(self.configFile) self.logger.info("Running {0}".format(flModuleName())) if config.has_option('DataProcess', 'InputFile'): inputFile = config.get('DataProcess', 'InputFile') if config.has_option('DataProcess', 'Source'): source = config.get('DataProcess', 'Source') self.logger.info('Loading %s dataset', source) fn = config.get(source, 'filename') path = config.get(source, 'path') inputFile = pjoin(path, fn) # If input file has no path information, default to tcrm input folder if len(os.path.dirname(inputFile)) == 0: inputFile = pjoin(self.tcrm_input_dir, inputFile) self.logger.info("Processing {0}".format(inputFile)) self.source = config.get('DataProcess', 'Source') inputData = colReadCSV(self.configFile, inputFile, self.source) inputSpeedUnits = config.get(self.source, 'SpeedUnits') inputPressureUnits = config.get(self.source, 'PressureUnits') inputLengthUnits = config.get(self.source, 'LengthUnits') startSeason = config.getint('DataProcess', 'StartSeason') indicator = loadData.getInitialPositions(inputData) lat = np.array(inputData['lat'], 'd') lon = np.mod(np.array(inputData['lon'], 'd'), 360) if restrictToWindfieldDomain: # Filter the input arrays to only retain the tracks that # pass through the windfield domain. CD = CalcTrackDomain(self.configFile) self.domain = CD.calcDomainFromTracks(indicator, lon, lat) domainIndex = self.extractTracks(indicator, lon, lat) inputData = inputData[domainIndex] indicator = indicator[domainIndex] lon = lon[domainIndex] lat = lat[domainIndex] if self.progressbar is not None: self.progressbar.update(0.125) # Sort date/time information try: dt = np.empty(indicator.size, 'f') dt[1:] = np.diff(inputData['age']) except (ValueError, KeyError): try: self.logger.info(("Filtering input data by season:" "season > {0}".format(startSeason))) # Find indicies that satisfy minimum season filter idx = np.where(inputData['season'] >= startSeason)[0] # Filter records: inputData = inputData[idx] indicator = indicator[idx] lon = lon[idx] lat = lat[idx] except (ValueError, KeyError): pass year, month, day, hour, minute, datetimes \ = loadData.parseDates(inputData, indicator) # Time between observations: dt = loadData.getTimeDelta(year, month, day, hour, minute) # Calculate julian days: jdays = loadData.julianDays(year, month, day, hour, minute) delta_lon = np.diff(lon) delta_lat = np.diff(lat) # Split into separate tracks if large jump occurs (delta_lon > # 15 degrees or delta_lat > 5 degrees) This avoids two tracks # being accidentally combined when seasons and track numbers # match but basins are different as occurs in the IBTrACS # dataset. This problem can also be prevented if the # 'tcserialno' column is specified. indicator[np.where(delta_lon > 15)[0] + 1] = 1 indicator[np.where(delta_lat > 5)[0] + 1] = 1 # Save information required for frequency auto-calculation try: origin_seasonOrYear = np.array(inputData['season'], 'i').compress(indicator) header = 'Season' except (ValueError, KeyError): origin_seasonOrYear = year.compress(indicator) header = 'Year' flSaveFile(self.origin_year, np.transpose(origin_seasonOrYear), header, ',', fmt='%d') pressure = np.array(inputData['pressure'], 'd') novalue_index = np.where(pressure == sys.maxint) pressure = metutils.convert(pressure, inputPressureUnits, "hPa") pressure[novalue_index] = sys.maxint # Convert any non-physical central pressure values to maximum integer # This is required because IBTrACS has a mix of missing value codes # (i.e. -999, 0, 9999) in the same global dataset. pressure = np.where((pressure < 600) | (pressure > 1100), sys.maxint, pressure) if self.progressbar is not None: self.progressbar.update(0.25) try: vmax = np.array(inputData['vmax'], 'd') except (ValueError, KeyError): self.logger.warning("No max wind speed data") vmax = np.empty(indicator.size, 'f') else: novalue_index = np.where(vmax == sys.maxint) vmax = metutils.convert(vmax, inputSpeedUnits, "mps") vmax[novalue_index] = sys.maxint assert lat.size == indicator.size assert lon.size == indicator.size assert pressure.size == indicator.size #assert vmax.size == indicator.size try: rmax = np.array(inputData['rmax']) novalue_index = np.where(rmax == sys.maxint) rmax = metutils.convert(rmax, inputLengthUnits, "km") rmax[novalue_index] = sys.maxint self._rmax(rmax, indicator) self._rmaxRate(rmax, dt, indicator) except (ValueError, KeyError): self.logger.warning("No rmax data available") if self.ncflag: self.data['index'] = indicator # ieast : parameter used in latLon2Azi # FIXME: should be a config setting describing the input data. ieast = 1 # Determine the index of initial cyclone observations, excluding # those cyclones that have only one observation. This is used # for calculating initial bearing and speed indicator2 = np.where(indicator > 0, 1, 0) initIndex = np.concatenate( [np.where(np.diff(indicator2) == -1, 1, 0), [0]]) # Calculate the bearing and distance (km) of every two # consecutive records using ll2azi bear_, dist_ = maputils.latLon2Azi(lat, lon, ieast, azimuth=0) assert bear_.size == indicator.size - 1 assert dist_.size == indicator.size - 1 bear = np.empty(indicator.size, 'f') bear[1:] = bear_ dist = np.empty(indicator.size, 'f') dist[1:] = dist_ self._lonLat(lon, lat, indicator, initIndex) self._bearing(bear, indicator, initIndex) self._bearingRate(bear, dt, indicator) if self.progressbar is not None: self.progressbar.update(0.375) self._speed(dist, dt, indicator, initIndex) self._speedRate(dist, dt, indicator) self._pressure(pressure, indicator) self._pressureRate(pressure, dt, indicator) self._windSpeed(vmax) try: self._frequency(year, indicator) self._juliandays(jdays, indicator, year) except (ValueError, KeyError): pass self.logger.info("Completed {0}".format(flModuleName())) if self.progressbar is not None: self.progressbar.update(0.5)
def generateKDE(self, parameters, kdeStep, kdeParameters=None, cdfParameters=None, angular=False, periodic=False, missingValue=sys.maxsize): """ Generate a PDF and CDF for a given parameter set using the method of kernel density estimators. Optionally return the PDF and CDF as an array, or write both to separate files. :param parameters: Parameter values. If a string is given, then it is the path to a file containing the values. If an array is passed, then it should hold the parameter values. :param kdeStep: Increment of the ordinate values at which the distributions will be calculated. :type kdeStep: float, default=`0.1` :param str kdeParameters: Optional. If given, then the cell distributions will be saved to a file with this name. If absent, the distribution values are returned. :param str cdfParameters: Optional. If given, then the cell distributions will be saved to a file with this name. If absent, the distribution values are returned. :param angular: Does the data represent an angular measure (e.g. bearing). :type angular: boolean, default=``False`` :param periodic: Does the data represent some form of periodic data (e.g. day of year). If given, it should be the period of the data (e.g. for annual data, ``periodic=365``). :type periodic: boolean or int, default=``False`` :param missingValue: Missing values have this value (default :attr:`sys.maxint`). returns: If ``kdeParameters`` is given, returns ``None`` (data are saved to file), otherwise :class:`numpy.ndarray` of the parameter grid, the PDF and CDF. """ LOG.debug("Running generateKDE") if type(parameters) is str: self.parameters = stats.statRemoveNum( flLoadFile(parameters, '%', ','), missingValue) else: if parameters.size <= 1: LOG.error("Insufficient members in parameter list") raise IndexError("Insufficient members in parameter list") self.parameters = stats.statRemoveNum(parameters, missingValue) if angular: xmin = 0.0 xmax = 360.0 elif periodic: xmin = 0.0 xmax = periodic else: xmin = self.parameters.min() xmax = self.parameters.max() LOG.debug("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f" % (xmin, xmax, kdeStep)) if periodic: x = np.arange(1, periodic + 1, kdeStep) self.grid = np.concatenate([x - periodic, x, x + periodic]) self.parameters = np.concatenate([ self.parameters - periodic, self.parameters, self.parameters + periodic ]) else: self.grid = np.arange(xmin, xmax, kdeStep) if self.grid.size < 2: LOG.critical("Grid for CDF generation is a single value") LOG.critical("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f", xmin, xmax, kdeStep) raise ValueError #bw = KPDF.UPDFOptimumBandwidth(self.parameters) bw = stats.bandwidth(self.parameters) self.pdf = self._generatePDF(self.grid, bw, self.parameters) if periodic: idx = int(periodic / kdeStep) self.pdf = 3.0 * self.pdf[idx:2 * idx] self.grid = self.grid[idx:2 * idx] self.cy = stats.cdf(self.grid, self.pdf) if kdeParameters is None: return np.transpose(np.array([self.grid, self.pdf, self.cy])) else: # Assume both kdeParameters and cdfParameters are defined as files: LOG.debug("Saving KDE and CDF data to files") flSaveFile(kdeParameters, np.transpose(np.array([self.grid, self.pdf]))) flSaveFile(cdfParameters, np.transpose(np.array([self.grid, self.cy])))
def generateKDE(self, parameters, kdeStep, kdeParameters=None, cdfParameters=None, angular=False, periodic=False, missingValue=sys.maxint): """ Generate a PDF and CDF for a given parameter set using the method of kernel density estimators. Optionally return the PDF and CDF as an array, or write both to separate files. :param parameters: Parameter values. If a string is given, then it is the path to a file containing the values. If an array is passed, then it should hold the parameter values. :param kdeStep: Increment of the ordinate values at which the distributions will be calculated. :type kdeStep: float, default=`0.1` :param str kdeParameters: Optional. If given, then the cell distributions will be saved to a file with this name. If absent, the distribution values are returned. :param str cdfParameters: Optional. If given, then the cell distributions will be saved to a file with this name. If absent, the distribution values are returned. :param angular: Does the data represent an angular measure (e.g. bearing). :type angular: boolean, default=``False`` :param periodic: Does the data represent some form of periodic data (e.g. day of year). If given, it should be the period of the data (e.g. for annual data, ``periodic=365``). :type periodic: boolean or float, default=``False`` :param missingValue: Missing values have this value (default :attr:`sys.maxint`). returns: If ``kdeParameters`` is given, returns ``None`` (data are saved to file), otherwise :class:`numpy.ndarray` of the parameter grid, the PDF and CDF. """ self.logger.debug("Running generateKDE") if type(parameters) is str: self.parameters = stats.statRemoveNum(flLoadFile(parameters, '%', ','), missingValue) else: if parameters.size <= 1: self.logger.error("Insufficient members in parameter list") raise IndexError, "Insufficient members in parameter list" self.parameters = stats.statRemoveNum(parameters, missingValue) if angular: xmin = 0.0 xmax = 360.0 elif periodic: xmin = 0.0 xmax = periodic else: xmin = self.parameters.min() xmax = self.parameters.max() self.logger.debug("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f" % (xmin, xmax, kdeStep)) if periodic: x = np.arange(1, periodic + 1, kdeStep) self.grid = np.concatenate( [x - periodic, x, x + periodic] ) self.parameters = np.concatenate([self.parameters - periodic, self.parameters, self.parameters + periodic]) else: self.grid = np.arange(xmin, xmax, kdeStep) if self.grid.size<2: self.logger.critical("Grid for CDF generation is a single value") self.logger.critical("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f" % (xmin, xmax,kdeStep)) raise ValueError bw = KPDF.UPDFOptimumBandwidth(self.parameters) self.pdf = self._generatePDF(self.grid, bw, self.parameters) if periodic: self.pdf = 3.0*self.pdf[(periodic/kdeStep):2*(periodic/kdeStep)] self.grid = self.grid[(periodic/kdeStep):2*(periodic/kdeStep)] self.cy = stats.cdf(self.grid, self.pdf) if kdeParameters is None: return np.transpose(np.array([self.grid, self.pdf, self.cy])) else: # Assume both kdeParameters and cdfParameters are defined as files: self.logger.debug("Saving KDE and CDF data to files") flSaveFile(kdeParameters, np.transpose(np.array([self.grid, self.pdf]))) flSaveFile(cdfParameters, np.transpose(np.array([self.grid, self.cy])))
def allDistributions(self, lonLat, parameterList, parameterName=None, kdeStep=0.1, angular=False, periodic=False, plotParam=False): """ Calculate a distribution for each individual cell and store in a file or return """ if parameterName: self.logger.debug("Running allDistributions for %s"%parameterName) else: self.logger.debug("Running allDistributions") if type(lonLat) is str: self.logger.debug("Loading lat/lon data from file") self.lonLat = np.array(flLoadFile(lonLat, delimiter=',')) else: self.lonLat = lonLat if type(parameterList) is str: self.logger.debug("Loading parameter data from file: %s" % parameterList) self.pList = np.array(flLoadFile(parameterList)) else: self.pList = parameterList self.pName = parameterName maxCellNum = stats.maxCellNum(self.gridLimit, self.gridSpace) # Writing CDF dataset for all individual cell number into files self.logger.debug("Writing CDF dataset for all individual cells into files") for cellNum in xrange(0, maxCellNum + 1): self.logger.debug("Processing cell number %i"%cellNum) # Generate cyclone parameter data for the cell number self.extractParameter(cellNum) # Estimate cyclone parameter data using KDE # The returned array contains the grid, the PDF and the CDF cdf = self.kdeParameter.generateKDE(self.parameter, kdeStep, angular=angular, periodic=periodic) if plotParam: self._plotParameter(cellNum, kdeStep) self.logger.debug('size of parameter array = %d: size of cdf array = %d' % (self.parameter.size,cdf.size)) cellNumlist = [] for i in range(len(cdf)): cellNumlist.append(cellNum) if cellNum == 0: results = np.transpose(np.array([cellNumlist, cdf[:,0], cdf[:,2]])) else: self.logger.debug('size of results array = %s'%str(results.size)) results = np.concatenate((results, np.transpose(np.array([cellNumlist, cdf[:,0], cdf[:,2]])))) if parameterName == None: self.logger.debug("Returning CDF dataset for all individual cell numbers") return results else: cdfHeader = "Cell_Number, CDF_" + self.pName + "_x, CDF_" + \ self.pName + "_y" allCellCdfOutput = pjoin(self.outputPath, 'process', 'all_cell_cdf_' + self.pName) args = {"filename":allCellCdfOutput, "data":results, "header":cdfHeader, "delimiter":",", "fmt":"%f"} self.logger.debug("Writing CDF dataset for all individual cell numbers into files") flSaveFile(**args) # Save to netcdf too filename = allCellCdfOutput + '.nc' ncdf = Dataset(filename, 'w') ncdf.createDimension('cell', len(results[:,0])) cell = ncdf.createVariable('cell', 'i', ('cell',)) cell[:] = results[:,0] x = ncdf.createVariable('x', 'f', ('cell',)) x[:] = results[:,1] y = ncdf.createVariable('CDF', 'f', ('cell',)) y[:] = results[:,2] ncdf.close()
def allDistributions(self, lonLat, parameterList, parameterName=None, kdeStep=0.1, angular=False, periodic=False, plotParam=False): """ Calculate a distribution for each individual cell and store in a file or return the distribution. :param lonLat: The longitude/latitude of all observations in the model domain. If a string is given, then it is the path to a file containing the longitude/latitude information. If an array is given, then it should be a 2-d array containing the data values. :type lonLat: str or :class:`numpy.ndarray` :param parameterList: Parameter values. If a string is given, then it is the path to a file containing the values. If an array is passed, then it should hold the parameter values. :type parameterList: str or :class:`numpy.ndarray` :param str parameterName: Optional. If given, then the cell distributions will be saved to a file with this name. If absent, the distribution values are returned. :param kdeStep: Increment of the ordinate values at which the distributions will be calculated. :type kdeStep: float, default=`0.1` :param angular: Does the data represent an angular measure (e.g. bearing). :type angular: boolean, default=``False`` :param periodic: Does the data represent some form of periodic data (e.g. day of year). If given, it should be the period of the data (e.g. for annual data, ``periodic=365``). :type periodic: boolean or float, default=``False`` :param boolean plotParam: Plot the parameters. Default is ``False``. :returns: If no ``parameterName`` is given returns ``None`` (data are saved to file), otherwise :class:`numpy.ndarray`. """ if parameterName: self.logger.debug("Running allDistributions for %s", parameterName) else: self.logger.debug("Running allDistributions") if isinstance(lonLat, str): self.logger.debug("Loading lat/lon data from file") self.lonLat = np.array(flLoadFile(lonLat, delimiter=',')) else: self.lonLat = lonLat if isinstance(parameterList, str): self.logger.debug("Loading parameter data from file: %s", parameterList) self.pList = np.array(flLoadFile(parameterList)) else: self.pList = parameterList self.pName = parameterName if len(self.pList) != len(self.lonLat): errmsg = ("Parameter data and " "Lon/Lat data are not the same length " "for {}.".format(parameterName)) self.logger.critical(errmsg) raise IndexError(errmsg) maxCellNum = stats.maxCellNum(self.gridLimit, self.gridSpace) # Writing CDF dataset for all individual cell number into files self.logger.debug(("Writing CDF dataset for all individual " "cells into files")) for cellNum in range(0, maxCellNum + 1): self.logger.debug("Processing cell number %i", cellNum) # Generate cyclone parameter data for the cell number self.extractParameter(cellNum) # Estimate cyclone parameter data using KDE # The returned array contains the grid, the PDF and the CDF cdf = self.kdeParameter.generateKDE(self.parameter, kdeStep, angular=angular, periodic=periodic) if plotParam: self._plotParameter(cellNum, kdeStep) self.logger.debug(('size of parameter array = %d: ' 'size of cdf array = %d'), self.parameter.size, cdf.size) cellNumlist = [] for i in range(len(cdf)): cellNumlist.append(cellNum) if cellNum == 0: results = np.transpose(np.array([cellNumlist, cdf[:, 0], cdf[:, 2]])) else: self.logger.debug('size of results = %s', str(results.size)) results = np.concatenate((results, np.transpose(np.array([cellNumlist, cdf[:, 0], cdf[:, 2]])))) if parameterName == None: self.logger.debug(("Returning CDF dataset for all " "individual cell numbers")) return results else: cdfHeader = "Cell_Number, CDF_" + self.pName + "_x, CDF_" + \ self.pName + "_y" allCellCdfOutput = pjoin(self.outputPath, 'process', 'all_cell_cdf_' + self.pName) args = {"filename":allCellCdfOutput, "data":results, "header":cdfHeader, "delimiter":",", "fmt":"%f"} self.logger.debug(("Writing CDF dataset for all individual " "cell numbers into files")) flSaveFile(**args) # Save to netcdf too filename = allCellCdfOutput + '.nc' ncdf = Dataset(filename, 'w') ncdf.createDimension('cell', len(results[:, 0])) cell = ncdf.createVariable('cell', 'i', ('cell',)) cell[:] = results[:, 0] x = ncdf.createVariable('x', 'f', ('cell',)) x[:] = results[:, 1] y = ncdf.createVariable('CDF', 'f', ('cell',)) y[:] = results[:, 2] ncdf.close()