def loadTracks(trackfile): """ Read tracks from a track .csv file and return a list of :class:`Track` objects. This calls the function `readMultipleTrackData` to parse the track .csv file. :type trackfile: str :param trackfile: the track data filename. """ tracks = [] datas = readMultipleTrackData(trackfile) n = len(datas) for i, data in enumerate(datas): track = Track(data) track.trackfile = trackfile track.trackId = (i, n) tracks.append(track) return tracks
def loadTrackFile(configFile, trackFile, source, missingValue=0, calculateWindSpeed=True): """ Load TC track data from the given input file, from a specified source. The configFile is a configuration file that contains a section called 'source' that describes the data. This returns a collection of :class:`Track` objects that contains the details of the TC tracks in the input file. :param str configFile: Configuration file with a section ``source``. :param str trackFile: Path to a csv-formatted file containing TC data. :pararm str source: Name of the source format of the TC data. There *must* be a section in ``configFile`` matching this string, containing the details of the format of the data. :param missingValue: Replace all null values in the input data with this value (default=0). :param boolean calculateWindSpeed: Calculate maximum wind speed using a pressure-wind relation described in :func:`maxWindSpeed` :returns: A collection of :class:`Track` objects. If any of the variables are not present in the input dataset, they are (where possible) calculated (date/time/windspeed), sampled from default datasets (e.g. environmental pressure) or set to the missing value. Example:: >>> tracks = loadTrackFile('tcrm.ini', 'IBTRaCS.csv', 'IBTrACS' ) """ logger.info("Loading %s" % trackFile) inputData = colReadCSV(configFile, trackFile, source) #, #nullValue=missingValue) config = ConfigParser() config.read(configFile) inputSpeedUnits = config.get(source, 'SpeedUnits') inputPressureUnits = config.get(source, 'PressureUnits') inputLengthUnits = config.get(source, 'LengthUnits') inputDateFormat = config.get(source, 'DateFormat') if config.getboolean('DataProcess', 'FilterSeasons'): startSeason = config.getint('DataProcess', 'StartSeason') idx = np.where(inputData['season'] >= startSeason)[0] inputData = inputData[idx] # Determine the initial TC positions... indicator = getInitialPositions(inputData) # Sort date/time information if 'age' in inputData.dtype.names: year, month, day, hour, minute, datetimes = parseAge(inputData, indicator) timeElapsed = inputData['age'] else: year, month, day, hour, minute, datetimes = parseDates(inputData, indicator, inputDateFormat) timeElapsed = getTimeElapsed(indicator, year, month, day, hour, minute) # Time between observations: dt = getTimeDelta(year, month, day, hour, minute) # Calculate julian days jdays = julianDays(year, month, day, hour, minute) lat = np.array(inputData['lat'], 'd') lon = np.mod(np.array(inputData['lon'], 'd'), 360) delta_lon = np.diff(lon) delta_lat = np.diff(lat) # Split into separate tracks if large jump occurs (delta_lon > 10 degrees # or delta_lat > 5 degrees) # This avoids two tracks being accidentally combined when seasons and track # numbers match but basins are different as occurs in the IBTrACS dataset. # This problem can also be prevented if the 'tcserialno' column is # specified. indicator[np.where(delta_lon > 10)[0] + 1] = 1 indicator[np.where(delta_lat > 5)[0] + 1] = 1 pressure = filterPressure(np.array(inputData['pressure'], 'd'), inputPressureUnits, missingValue) try: windspeed = np.array(inputData['vmax'], 'd') novalue_index = np.where(windspeed == sys.maxint) windspeed = metutils.convert(windspeed, inputSpeedUnits, "mps") windspeed[novalue_index] = missingValue except (ValueError,KeyError): logger.debug("No max wind speed data - all values will be zero") windspeed = np.zeros(indicator.size, 'f') assert lat.size == indicator.size assert lon.size == indicator.size assert pressure.size == indicator.size try: rmax = np.array(inputData['rmax']) novalue_index = np.where(rmax == missingValue) rmax = metutils.convert(rmax, inputLengthUnits, "km") rmax[novalue_index] = missingValue except (ValueError, KeyError): logger.debug("No radius to max wind data - all values will be zero") rmax = np.zeros(indicator.size, 'f') if 'penv' in inputData.dtype.names: penv = np.array(inputData['penv'], 'd') else: logger.debug("No ambient MSLP data in this input file") logger.debug("Sampling data from MSLP data defined in " "configuration file") # Warning: using sampled data will likely lead to some odd behaviour # near the boundary of the MSLP grid boundaries - higher resolution # MSLP data will decrease this unusual behaviour. try: ncfile = cnfGetIniValue(configFile, 'Input', 'MSLPFile') except: logger.exception("No input MSLP file specified in configuration") raise time = getTime(year, month, day, hour, minute) penv = ltmPressure(jdays, time, lon, lat, ncfile) speed, bearing = getSpeedBearing(indicator, lon, lat, dt, missingValue=missingValue) if calculateWindSpeed: windspeed = maxWindSpeed(indicator, dt, lon, lat, pressure, penv) TCID = np.cumsum(indicator) data = np.empty(len(indicator), dtype={ 'names': trackFields, 'formats': trackTypes } ) for key, value in zip(trackFields, [indicator, TCID, year, month, day, hour, minute, timeElapsed, datetimes, lon, lat, speed, bearing, pressure, windspeed, rmax, penv]): data[key] = value tracks = [] n = np.max(TCID) for i in range(1, n + 1): track = Track(data[TCID == i]) track.trackId = (i, n) track.trackfile = trackFile getMinPressure(track, missingValue) getMaxWind(track, missingValue) tracks.append(track) return tracks
def loadTrackFile(configFile, trackFile, source, missingValue=0, calculateWindSpeed=True): """ Load TC track data from the given input file, from a specified source. The configFile is a configuration file that contains a section called 'source' that describes the data. This returns a collection of :class:`Track` objects that contains the details of the TC tracks in the input file. :param str configFile: Configuration file with a section ``source``. :param str trackFile: Path to a csv-formatted file containing TC data. :pararm str source: Name of the source format of the TC data. There *must* be a section in ``configFile`` matching this string, containing the details of the format of the data. :param missingValue: Replace all null values in the input data with this value (default=0). :param boolean calculateWindSpeed: Calculate maximum wind speed using a pressure-wind relation described in :func:`maxWindSpeed` :returns: A collection of :class:`Track` objects. If any of the variables are not present in the input dataset, they are (where possible) calculated (date/time/windspeed), sampled from default datasets (e.g. environmental pressure) or set to the missing value. Example:: >>> tracks = loadTrackFile('tcrm.ini', 'IBTRaCS.csv', 'IBTrACS' ) """ LOG.info("Loading %s" % trackFile) inputData = colReadCSV(configFile, trackFile, source) #, #nullValue=missingValue) config = ConfigParser() config.read(configFile) inputSpeedUnits = config.get(source, 'SpeedUnits') inputPressureUnits = config.get(source, 'PressureUnits') inputLengthUnits = config.get(source, 'LengthUnits') inputDateFormat = config.get(source, 'DateFormat') if config.getboolean('DataProcess', 'FilterSeasons'): startSeason = config.getint('DataProcess', 'StartSeason') idx = np.where(inputData['season'] >= startSeason)[0] inputData = inputData[idx] # Determine the initial TC positions... indicator = getInitialPositions(inputData) # Sort date/time information if 'age' in inputData.dtype.names: year, month, day, hour, minute, datetimes = parseAge( inputData, indicator) timeElapsed = inputData['age'] else: year, month, day, hour, minute, datetimes = parseDates( inputData, indicator, inputDateFormat) timeElapsed = getTimeElapsed(indicator, year, month, day, hour, minute) # Time between observations: dt = getTimeDelta(year, month, day, hour, minute) # Calculate julian days jdays = julianDays(year, month, day, hour, minute) lat = np.array(inputData['lat'], 'd') lon = np.mod(np.array(inputData['lon'], 'd'), 360) delta_lon = np.diff(lon) delta_lat = np.diff(lat) # Split into separate tracks if large jump occurs (delta_lon > 10 degrees # or delta_lat > 5 degrees) # This avoids two tracks being accidentally combined when seasons and track # numbers match but basins are different as occurs in the IBTrACS dataset. # This problem can also be prevented if the 'tcserialno' column is # specified. indicator[np.where(delta_lon > 10)[0] + 1] = 1 indicator[np.where(delta_lat > 5)[0] + 1] = 1 pressure = filterPressure(np.array(inputData['pressure'], 'd'), inputPressureUnits, missingValue) try: windspeed = np.array(inputData['vmax'], 'd') novalue_index = np.where(windspeed == sys.maxint) windspeed = metutils.convert(windspeed, inputSpeedUnits, "mps") windspeed[novalue_index] = missingValue except (ValueError, KeyError): LOG.debug("No max wind speed data - all values will be zero") windspeed = np.zeros(indicator.size, 'f') assert lat.size == indicator.size assert lon.size == indicator.size assert pressure.size == indicator.size try: rmax = np.array(inputData['rmax']) novalue_index = np.where(rmax == missingValue) rmax = metutils.convert(rmax, inputLengthUnits, "km") rmax[novalue_index] = missingValue except (ValueError, KeyError): LOG.debug("No radius to max wind data - all values will be zero") rmax = np.zeros(indicator.size, 'f') if 'penv' in inputData.dtype.names: penv = np.array(inputData['penv'], 'd') else: LOG.debug("No ambient MSLP data in this input file") LOG.debug("Sampling data from MSLP data defined in " "configuration file") # Warning: using sampled data will likely lead to some odd behaviour # near the boundary of the MSLP grid boundaries - higher resolution # MSLP data will decrease this unusual behaviour. try: ncfile = cnfGetIniValue(configFile, 'Input', 'MSLPFile') except: LOG.exception("No input MSLP file specified in configuration") raise time = getTime(year, month, day, hour, minute) penv = ltmPressure(jdays, time, lon, lat, ncfile) if 'poci' in inputData.dtype.names: poci = np.array(inputData['poci'], 'd') else: LOG.debug("Determining poci") eps = np.random.normal(0, scale=2.5717) poci = getPoci(penv, pressure, lat, jdays, eps) speed, bearing = getSpeedBearing(indicator, lon, lat, dt, missingValue=missingValue) if calculateWindSpeed: windspeed = maxWindSpeed(indicator, dt, lon, lat, pressure, poci) TCID = np.cumsum(indicator) data = np.empty(len(indicator), dtype={ 'names': trackFields, 'formats': trackTypes }) for key, value in zip(trackFields, [ indicator, TCID, year, month, day, hour, minute, timeElapsed, datetimes, lon, lat, speed, bearing, pressure, windspeed, rmax, poci ]): data[key] = value tracks = [] n = np.max(TCID) for i in range(1, n + 1): track = Track(data[TCID == i]) track.trackId = (i, n) track.trackfile = trackFile getMinPressure(track, missingValue) getMaxWind(track, missingValue) tracks.append(track) return tracks
def interpolate(track, delta, interpolation_type=None): """ Interpolate the records in time to have a uniform time difference between records. Each of the input arrays represent the values for a single TC event. :param track: :class:`Track` object containing all data for the track. :param delta: `float` time difference to interpolate the dataset to. Must be positive. :param interpolation_type: Optional ['linear', 'akima'], specify the type of interpolation used for the locations (i.e. longitude and latitude) of the records. # FIXME: Need to address masking values - scipy.interpolate.interp1d handles numpy.ma masked arrays. """ LOG.debug("Performing interpolation of TC track") if not hasattr(track, 'Datetime'): day_ = [ datetime(*x) for x in zip(track.Year, track.Month, track.Day, track.Hour, track.Minute) ] else: day_ = track.Datetime timestep = timedelta(delta / 24.) try: time_ = np.array( [d.toordinal() + (d.hour + d.minute / 60.) / 24.0 for d in day_], dtype=float) except AttributeError: import cftime if isinstance(day_[0], cftime.DatetimeJulian): day__ = [d._to_real_datetime() for d in day_] time_ = np.array([ d.toordinal() + (d.hour + d.minute / 60.) / 24. for d in day__ ], dtype=float) else: raise dt_ = 24.0 * np.diff(time_) dt = np.zeros(len(track.data), dtype=float) dt[1:] = dt_ # Convert all times to a time after initial observation: timestep = 24.0 * (time_ - time_[0]) newtime = np.arange(timestep[0], timestep[-1] + .01, delta) newtime[-1] = timestep[-1] _newtime = (newtime / 24.) + time_[0] newdates = num2date(_newtime) newdates = np.array([n.replace(tzinfo=None) for n in newdates]) if not hasattr(track, 'Speed'): idx = np.zeros(len(track.data)) idx[0] = 1 # TODO: Possibly could change `np.mean(dt)` to `dt`? track.WindSpeed = maxWindSpeed(idx, np.mean(dt), track.Longitude, track.Latitude, track.CentralPressure, track.EnvPressure) # Find the indices of valid pressure observations: validIdx = np.where(track.CentralPressure < sys.maxsize)[0] # FIXME: Need to address the issue when the time between obs is less # than delta (e.g. only two obs 5 hrs apart, but delta = 6 hrs). if len(track.data) <= 3: # Use linear interpolation only (only a start and end point given): nLon = interp1d(timestep, track.Longitude, kind='linear')(newtime) nLat = interp1d(timestep, track.Latitude, kind='linear')(newtime) if len(validIdx) >= 2: npCentre = interp1d(timestep, track.CentralPressure, kind='linear')(newtime) nwSpd = interp1d(timestep, track.WindSpeed, kind='linear')(newtime) elif len(validIdx) == 1: # If one valid observation, assume no change and # apply value to all times npCentre = np.ones(len(newtime)) * track.CentralPressure[validIdx] nwSpd = np.ones(len(newtime)) * track.WindSpeed[validIdx] else: npCentre = np.zeros(len(newtime)) nwSpd = np.zeros(len(newtime)) npEnv = interp1d(timestep, track.EnvPressure, kind='linear')(newtime) nrMax = interp1d(timestep, track.rMax, kind='linear')(newtime) else: if interpolation_type == 'akima': # Use the Akima interpolation method: try: import akima except ImportError: LOG.exception(("Akima interpolation module unavailable " " - default to scipy.interpolate")) nLon = splev(newtime, splrep(timestep, track.Longitude, s=0), der=0) nLat = splev(newtime, splrep(timestep, track.Latitude, s=0), der=0) else: nLon = akima.interpolate(timestep, track.Longitude, newtime) nLat = akima.interpolate(timestep, track.Latitude, newtime) elif interpolation_type == 'linear': nLon = interp1d(timestep, track.Longitude, kind='linear')(newtime) nLat = interp1d(timestep, track.Latitude, kind='linear')(newtime) else: nLon = splev(newtime, splrep(timestep, track.Longitude, s=0), der=0) nLat = splev(newtime, splrep(timestep, track.Latitude, s=0), der=0) if len(validIdx) >= 2: # No valid data at the final new time, # would require extrapolation: firsttime = np.where(newtime >= timestep[validIdx[0]])[0][0] lasttime = np.where(newtime <= timestep[validIdx[-1]])[0][-1] if firsttime == lasttime: # only one valid observation: npCentre = np.zeros(len(newtime)) nwSpd = np.zeros(len(newtime)) npCentre[firsttime] = track.CentralPressure[validIdx[0]] nwSpd[firsttime] = track.WindSpeed[validIdx[0]] else: npCentre = np.zeros(len(newtime)) nwSpd = np.zeros(len(newtime)) _npCentre = interp1d(timestep[validIdx], track.CentralPressure[validIdx], kind='linear')( newtime[firsttime:lasttime]) _nwSpd = interp1d(timestep[validIdx], track.Speed[validIdx], kind='linear')(newtime[firsttime:lasttime]) npCentre[firsttime:lasttime] = _npCentre nwSpd[firsttime:lasttime] = _nwSpd npCentre[lasttime] = _npCentre[-1] nwSpd[lasttime] = _nwSpd[-1] elif len(validIdx) == 1: npCentre = np.ones(len(newtime)) * track.CentralPressure[validIdx] nwSpd = np.ones(len(newtime)) * track.WindSpeed[validIdx] else: npCentre = np.zeros(len(newtime)) nwSpd = np.zeros(len(newtime)) npEnv = interp1d(timestep, track.EnvPressure, kind='linear')(newtime) nrMax = interp1d(timestep, track.rMax, kind='linear')(newtime) if len(nLat) >= 2: bear_, dist_ = latLon2Azi(nLat, nLon, 1, azimuth=0) nthetaFm = np.zeros(newtime.size, dtype=float) nthetaFm[:-1] = bear_ nthetaFm[-1] = bear_[-1] dist = np.zeros(newtime.size, dtype=float) dist[:-1] = dist_ dist[-1] = dist_[-1] nvFm = dist / delta else: nvFm = track.Speed[-1] nthetaFm = track.Bearing[-1] nYear = [date.year for date in newdates] nMonth = [date.month for date in newdates] nDay = [date.day for date in newdates] nHour = [date.hour for date in newdates] nMin = [date.minute for date in newdates] np.putmask(npCentre, npCentre > 10e+6, sys.maxsize) np.putmask(npCentre, npCentre < 700, sys.maxsize) newindex = np.zeros(len(newtime)) newindex[0] = 1 newTCID = np.ones(len(newtime)) * track.trackId[0] newdata = np.empty(len(newtime), dtype={ 'names': TRACKFILE_COLS, 'formats': TRACKFILE_FMTS }) for key, val in zip(TRACKFILE_COLS, [ newindex, newTCID, nYear, nMonth, nDay, nHour, nMin, newtime, newdates, nLon, nLat, nvFm, nthetaFm, npCentre, nwSpd, nrMax, npEnv ]): newdata[key] = val newtrack = Track(newdata) newtrack.trackId = track.trackId newtrack.trackfile = track.trackfile return newtrack