def get_obs(ts, mybb): # copied from the browser url box metar_cat_url = 'http://thredds.ucar.edu/thredds/catalog/nws/metar/ncdecoded/catalog.xml?dataset=nws/metar/ncdecoded/Metar_Station_Data_fc.cdmr' # parse the xml metar_cat = TDSCatalog(metar_cat_url) # what datasets are here? only one "dataset" in this catalog dataset = list(metar_cat.datasets.values())[0] ncss_url = dataset.access_urls["NetcdfSubset"] ncss = NCSS(ncss_url) query = ncss.query().accept('csv').time(ts - datetime.timedelta(minutes=1)) query.lonlat_box(**mybb) query.variables('air_temperature', 'dew_point_temperature', 'inches_ALTIM', 'wind_speed', 'wind_from_direction', 'cloud_area_fraction', 'weather') try: data = ncss.get_data(query) lats = data['latitude'][:] lons = data['longitude'][:] tair = data['air_temperature'][:] dewp = data['dew_point_temperature'][:] slp = (data['inches_ALTIM'][:] * units('inHg')).to('mbar') # Convert wind to components u, v = mpcalc.get_wind_components(data['wind_speed'] * units.knot, data['wind_from_direction'] * units.deg) # Need to handle missing (NaN) and convert to proper code cloud_cover = 8 * data['cloud_area_fraction'] cloud_cover[np.isnan(cloud_cover)] = 9 cloud_cover = cloud_cover.astype(np.int) # For some reason these come back as bytes instead of strings stid = [s.decode() for s in data['station']] # Convert the text weather observations to WMO codes we can map to symbols if data['weather'].dtype != bool: wx_text = [s.decode('ascii') for s in data['weather']] wx_codes = np.array(list(to_code(wx_text))) else: wx_codes = np.array([0] * len(data['weather'])) sfc_data = {'latitude': lats, 'longitude': lons, 'air_temperature': tair, 'dew_point_temperature': dewp, 'eastward_wind': u, 'northward_wind': v, 'cloud_coverage': cloud_cover, 'air_pressure_at_sea_level': slp, 'present_weather': wx_codes} have_obs = True except: have_obs = False sfc_data = {} return sfc_data, have_obs
def get_sounding(source, lat, long): # source unused for now bc testing only on ncss source_place_holder = source #print(source_place_holder) best_gfs = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p5deg/' + 'catalog.xml?dataset=grib/NCEP/GFS/Global_0p5deg/Best') best_ds = list(best_gfs.datasets.values())[0] ncss = NCSS(best_ds.access_urls['NetcdfSubset']) query = ncss.query() query.lonlat_point(long, lat).time(datetime.utcnow()) query.accept('netcdf4') query.variables('Temperature_isobaric', 'Relative_humidity_isobaric', 'u-component_of_wind_isobaric', 'v-component_of_wind_isobaric') data = ncss.get_data(query) temp = data.variables['Temperature_isobaric'] temp_vals = temp[:].squeeze() * units.kelvin relh = data.variables['Relative_humidity_isobaric'] relh_values = relh[:] / 100 td = dewpoint_rh(temp_vals, relh_values) td_vals = td[:].squeeze() press = data.variables['isobaric3'] press_vals = press[:].squeeze() u_wind = data.variables['u-component_of_wind_isobaric'] u_wind_vals = u_wind[:].squeeze() v_wind = data.variables['v-component_of_wind_isobaric'] v_wind_vals = v_wind[:].squeeze() # Put temp, dewpoint, pressure, u/v winds into numpy arrays and reorder t = np.array(temp_vals)[::-1] td = np.array(td_vals)[::-1] p = np.array(press_vals)[::-1] u = np.array(u_wind_vals)[::-1] v = np.array(v_wind_vals)[::-1] # Change units for proper skew-T p = (p * units.pascals).to('mbar') t = (t * units.kelvin).to('degC') td = td * units.degC u = (u * units('m/s')).to('knot') v = (v * units('m/s')).to('knot') # spd = spd * units.knot # direc = direc * units.deg # u, v = get_wind_components(spd, direc) return t, td, p, u, v, lat, long, str(datetime.utcnow())[:-7]
def get_data(lon_w, lon_e, lat_s, lat_n, variable): """TODO Add reset, change colors of map, variable selection, model selection, lat/long validator """ cat_url = 'http://thredds-jumbo.unidata.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p25deg/catalog.xml' latest_gfs = get_latest_access_url(cat_url, 'NetcdfSubset') ncss = NCSS(latest_gfs) query = ncss.query() query.lonlat_box(west=lon_w, east=lon_e, south=lat_s, north=lat_n).all_times() query.accept('netcdf4') query.variables(variable_dict(variable)) data = ncss.get_data(query) list(data.variables.keys()) var1 = data.variables[variable_dict(variable)] # only works if has name time+... or only has 1 dimension for dim in var1.dimensions: if 'time' in dim: time_name = dim if time_name is None: raise ValueError("Couldn't find a time dimension for " + var1.name) time_1d = data.variables[time_name] lat_1d = data.variables['lat'] lon_1d = data.variables['lon'] # Reduce the dimensions of the data lat_1d = lat_1d[:].squeeze() lon_1d = lon_1d[:].squeeze() # Convert the number of hours since the reference time to an actual date time_val = num2date(time_1d[:].squeeze(), time_1d.units) # Combine latitude and longitudes lon_2d, lat_2d = np.meshgrid(lon_1d, lat_1d) # Flatten() combines all the lists from meshgrid into one list full_lat_1d = lat_2d.flatten() full_lon_1d = lon_2d.flatten() # Create one list that pairs longs and lats lonlat_list = zip(full_lon_1d, full_lat_1d) return lon_2d, lat_2d, var1, time_val, lonlat_list
def get_closest_gfs(time, level, field): """ Retreive the current best 0.25 deg GFS model for a given field, level, time. time : datetime object level : level of results (in hPa) field : CF field to retrieve """ # Get the catalog and best GFS entry catalog = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/grib/NCEP/GFS/Global_0p25deg/catalog.xml') best_gfs = list(catalog.datasets.values())[1] # Using NCSS, build a query and getch the data ncss = NCSS(best_gfs.access_urls['NetcdfSubset']) query = ncss.query() query.lonlat_box(north=90, south=10, east=360, west=160) query.vertical_level(level) query.time(time) query.accept('netcdf4') query.variables(field) data = ncss.get_data(query) # Pull out the variables we will use lat_var = data.variables['lat'] lon_var = data.variables['lon'] data_var = data.variables[field] # Find the correct time dimension name for coord in data_var.coordinates.split(): if 'time' in coord: time_var = data.variables[coord] break # Convert number of hours since the reference time into an actual date time_vals = netCDF4.num2date(time_var[:].squeeze(), time_var.units) # Combine 1D latitude and longitudes into a 2D grid of locations lon_2d, lat_2d = np.meshgrid(lon_var[:], lat_var[:]) # Filter the data to smooth it out a bit data_var = ndimage.gaussian_filter(data_var[:][0][0], sigma=1.5, order=0) return time_vals, lat_2d, lon_2d, data_var
def retrieve_point_forecast(ds, lat, lon, var, ensemble): ''' ds: a siphon dataset object lat: lon: var: model variable name to extract ensemble: True/False indicator if the ds object contains ensemble data Given a siphon dataset object, retrieve the forecast variable for the given coordinates. If the object is from an ensemble, the variables object has an additional dimension. ''' ncss = NCSS(ds.access_urls['NetcdfSubset']) query = ncss.query() query.lonlat_point(lon, lat) query.all_times() query.variables(var).accept('netcdf') data = ncss.get_data(query) temps = data.variables[var] time = data.variables['time'] time_vals = num2date(time[:].squeeze(), time.units) ureg = UnitRegistry() if ensemble: ensemble_temp_series = [] num_ens = temps.shape[2] for i in range(num_ens): temp_vals = ((temps[:, :, i, :].squeeze() * ureg.kelvin) .to(ureg.degF)) temp_series = pd.Series(temp_vals, index=time_vals) ensemble_temp_series.append(temp_series) return ensemble_temp_series else: temp_vals = (temps[:, :, 0].squeeze() * ureg.kelvin).to(ureg.degF) temp_series = pd.Series(temp_vals, index=time_vals) return temp_series
class ForecastModel(object): ''' An object for holding forecast model information for use within the pvlib library. Simplifies use of siphon library on a THREDDS server. Parameters ---------- model_type: string UNIDATA category in which the model is located. model_name: string Name of the UNIDATA forecast model. set_type: string Model dataset type. Attributes ---------- access_url: string URL specifying the dataset from data will be retrieved. base_tds_url : string The top level server address catalog_url : string The url path of the catalog to parse. columns: list List of headers used to create the data DataFrame. data: pd.DataFrame Data returned from the query. data_format: string Format of the forecast data being requested from UNIDATA. dataset: Dataset Object containing information used to access forecast data. dataframe_variables: list Model variables that are present in the data. datasets_list: list List of all available datasets. fm_models: Dataset Object containing all available foreast models. fm_models_list: list List of all available forecast models from UNIDATA. latitude: list A list of floats containing latitude values. location: Location A pvlib Location object containing geographic quantities. longitude: list A list of floats containing longitude values. lbox: boolean Indicates the use of a location bounding box. ncss: NCSS object NCSS model_name: string Name of the UNIDATA forecast model. model: Dataset A dictionary of Dataset object, whose keys are the name of the dataset's name. model_url: string The url path of the dataset to parse. modelvariables: list Common variable names that correspond to queryvariables. query: NCSS query object NCSS object used to complete the forecast data retrival. queryvariables: list Variables that are used to query the THREDDS Data Server. rad_type: dictionary Dictionary labeling the method used for calculating radiation values. time: datetime Time range specified for the NCSS query. utctime: DatetimeIndex Time range in UTC. var_stdnames: dictionary Dictionary containing the standard names of the variables in the query, where the keys are the common names. var_units: dictionary Dictionary containing the unites of the variables in the query, where the keys are the common names. variables: dictionary Dictionary that translates model specific variables to common named variables. vert_level: float or integer Vertical altitude for query data. wind_type: string Quantity that was used to calculate wind_speed. zenith: numpy.array Solar zenith angles for the given time range. ''' access_url_key = 'NetcdfSubset' catalog_url = 'http://thredds.ucar.edu/thredds/catalog.xml' base_tds_url = catalog_url.split('/thredds/')[0] data_format = 'netcdf' vert_level = 100000 columns = np.array(['temperature', 'wind_speed', 'total_clouds', 'low_clouds', 'mid_clouds', 'high_clouds', 'dni', 'dhi', 'ghi', ]) def __init__(self, model_type, model_name, set_type): self.model_type = model_type self.model_name = model_name self.set_type = set_type self.catalog = TDSCatalog(self.catalog_url) self.fm_models = TDSCatalog(self.catalog.catalog_refs[model_type].href) self.fm_models_list = sorted(list(self.fm_models.catalog_refs.keys())) try: model_url = self.fm_models.catalog_refs[model_name].href except ParseError: raise ParseError(self.model_name + ' model may be unavailable.') try: self.model = TDSCatalog(model_url) except HTTPError: raise HTTPError(self.model_name + ' model may be unavailable.') self.datasets_list = list(self.model.datasets.keys()) self.set_dataset() def set_dataset(self): ''' Retreives the designated dataset, creates NCSS object, and initiates a NCSS query. ''' keys = list(self.model.datasets.keys()) labels = [item.split()[0].lower() for item in keys] if self.set_type == 'best': self.dataset = self.model.datasets[keys[labels.index('best')]] elif self.set_type == 'latest': self.dataset = self.model.datasets[keys[labels.index('latest')]] elif self.set_type == 'full': self.dataset = self.model.datasets[keys[labels.index('full')]] self.access_url = self.dataset.access_urls[self.access_url_key] self.ncss = NCSS(self.access_url) self.query = self.ncss.query() def set_query_latlon(self): ''' Sets the NCSS query location latitude and longitude. ''' if isinstance(self.longitude, list): self.lbox = True # west, east, south, north self.query.lonlat_box(self.latitude[0], self.latitude[1], self.longitude[0], self.longitude[1]) else: self.lbox = False self.query.lonlat_point(self.longitude, self.latitude) def set_query_time(self): ''' Sets the NCSS query time range. as: single or range ''' if len(self.utctime) == 1: self.query.time(pd.to_datetime(self.utctime)[0]) else: self.query.time_range(pd.to_datetime(self.utctime)[0], pd.to_datetime(self.utctime)[-1]) def set_location(self, time): ''' Sets the location for Parameters ---------- time: datetime or DatetimeIndex Time range of the query. ''' if isinstance(time, datetime.datetime): tzinfo = time.tzinfo else: tzinfo = time.tz if tzinfo is None: self.location = Location(self.latitude, self.longitude) else: self.location = Location(self.latitude, self.longitude, tz=tzinfo) def get_query_data(self, latitude, longitude, time, vert_level=None, variables=None): ''' Submits a query to the UNIDATA servers using siphon NCSS and converts the netcdf data to a pandas DataFrame. Parameters ---------- latitude: list A list of floats containing latitude values. longitude: list A list of floats containing longitude values. time: pd.datetimeindex Time range of interest. vert_level: float or integer Vertical altitude of interest. variables: dictionary Variables and common names being queried. Returns ------- pd.DataFrame ''' if vert_level is not None: self.vert_level = vert_level if variables is not None: self.variables = variables self.modelvariables = list(self.variables.keys()) self.queryvariables = [self.variables[key] for key in \ self.modelvariables] self.columns = self.modelvariables self.dataframe_variables = self.modelvariables self.latitude = latitude self.longitude = longitude self.set_query_latlon() self.set_location(time) self.utctime = localize_to_utc(time, self.location) self.set_query_time() self.query.vertical_level(self.vert_level) self.query.variables(*self.queryvariables) self.query.accept(self.data_format) netcdf_data = self.ncss.get_data(self.query) try: time_var = 'time' self.set_time(netcdf_data.variables[time_var]) except KeyError: time_var = 'time1' self.set_time(netcdf_data.variables[time_var]) self.data = self.netcdf2pandas(netcdf_data) self.set_variable_units(netcdf_data) self.set_variable_stdnames(netcdf_data) if self.__class__.__name__ is 'HRRR': self.calc_temperature(netcdf_data) self.convert_temperature() self.calc_wind(netcdf_data) self.calc_radiation(netcdf_data) self.data = self.data.tz_convert(self.location.tz) netcdf_data.close() return self.data def netcdf2pandas(self, data): ''' Transforms data from netcdf to pandas DataFrame. Currently only supports one-dimensional netcdf data. Parameters ---------- data: netcdf Data returned from UNIDATA NCSS query. Returns ------- pd.DataFrame ''' if not self.lbox: ''' one-dimensional data ''' data_dict = {} for var in self.dataframe_variables: data_dict[var] = pd.Series( data[self.variables[var]][:].squeeze(), index=self.utctime) return pd.DataFrame(data_dict, columns=self.columns) else: return pd.DataFrame(columns=self.columns, index=self.utctime) def set_time(self, time): ''' Converts time data into a pandas date object. Parameters ---------- time: netcdf Contains time information. Returns ------- pandas.DatetimeIndex ''' times = num2date(time[:].squeeze(), time.units) self.time = pd.DatetimeIndex(pd.Series(times), tz='UTC') self.time = self.time.tz_convert(self.location.tz) self.utctime = localize_to_utc(self.time, self.location.tz) def set_variable_units(self, data): ''' Extracts variable unit information from netcdf data. Parameters ---------- data: netcdf Contains queried variable information. ''' self.var_units = {} for var in self.variables: self.var_units[var] = data[self.variables[var]].units def set_variable_stdnames(self, data): ''' Extracts standard names from netcdf data. Parameters ---------- data: netcdf Contains queried variable information. ''' self.var_stdnames = {} for var in self.variables: try: self.var_stdnames[var] = \ data[self.variables[var]].standard_name except AttributeError: self.var_stdnames[var] = var def calc_radiation(self, data, cloud_type='total_clouds'): ''' Determines shortwave radiation values if they are missing from the model data. Parameters ---------- data: netcdf Query data formatted in netcdf format. cloud_type: string Type of cloud cover to use for calculating radiation values. ''' self.rad_type = {} if not self.lbox and cloud_type in self.modelvariables: cloud_prct = self.data[cloud_type] solpos = get_solarposition(self.time, self.location) self.zenith = np.array(solpos.zenith.tz_convert('UTC')) for rad in ['dni','dhi','ghi']: if self.model_name is 'HRRR_ESRL': # HRRR_ESRL is the only model with the # correct equation of time. if rad in self.modelvariables: self.data[rad] = pd.Series( data[self.variables[rad]][:].squeeze(), index=self.time) self.rad_type[rad] = 'forecast' self.data[rad].fillna(0, inplace=True) else: for rad in ['dni','dhi','ghi']: self.rad_type[rad] = 'liujordan' self.data[rad] = liujordan(self.zenith, cloud_prct)[rad] self.data[rad].fillna(0, inplace=True) for var in ['dni', 'dhi', 'ghi']: self.data[var].fillna(0, inplace=True) self.var_units[var] = '$W m^{-2}$' def convert_temperature(self): ''' Converts Kelvin to celsius. ''' if 'Temperature_surface' in self.queryvariables or 'Temperature_isobaric' in self.queryvariables: self.data['temperature'] -= 273.15 self.var_units['temperature'] = 'C' def calc_temperature(self, data): ''' Calculates temperature (in degrees C) from isobaric temperature. Parameters ---------- data: netcdf Query data in netcdf format. ''' P = data['Pressure_surface'][:].squeeze() / 100.0 Tiso = data['Temperature_isobaric'][:].squeeze() Td = data['Dewpoint_temperature_isobaric'][:].squeeze() - 273.15 e = 6.11 * 10**((7.5 * Td) / (Td + 273.3)) w = 0.622 * (e / (P - e)) T = Tiso - ((2.501 * 10.**6) / 1005.7) * w self.data['temperature'] = T def calc_wind(self, data): ''' Computes wind speed. In some cases only gust wind speed is available. The wind_type attribute will indicate the type of wind speed that is present. Parameters ---------- data: netcdf Query data in netcdf format. ''' if not self.lbox: if 'u-component_of_wind_isobaric' in self.queryvariables and \ 'v-component_of_wind_isobaric' in self.queryvariables: wind_data = np.sqrt(\ data['u-component_of_wind_isobaric'][:].squeeze()**2 + data['v-component_of_wind_isobaric'][:].squeeze()**2) self.wind_type = 'component' elif 'Wind_speed_gust_surface' in self.queryvariables: wind_data = data['Wind_speed_gust_surface'][:].squeeze() self.wind_type = 'gust' if 'wind_speed' in self.data: self.data['wind_speed'] = pd.Series(wind_data, index=self.time) self.var_units['wind_speed'] = 'm/s'
class TestNCSS(object): server = 'http://thredds.ucar.edu/thredds/ncss/' urlPath = 'grib/NCEP/GFS/Global_0p5deg/GFS_Global_0p5deg_20150612_1200.grib2' @recorder.use_cassette('ncss_test_metadata') def setup(self): dt = datetime(2015, 6, 12, 15, 0, 0) self.ncss = NCSS(self.server + self.urlPath) self.nq = self.ncss.query().lonlat_point(-105, 40).time(dt) self.nq.variables('Temperature_isobaric', 'Relative_humidity_isobaric') def test_good_query(self): assert self.ncss.validate_query(self.nq) def test_bad_query(self): self.nq.variables('foo') assert not self.ncss.validate_query(self.nq) def test_bad_query_no_vars(self): self.nq.var.clear() assert not self.ncss.validate_query(self.nq) @recorder.use_cassette('ncss_gfs_xml_point') def test_xml_point(self): self.nq.accept('xml') xml_data = self.ncss.get_data(self.nq) assert 'Temperature_isobaric' in xml_data assert 'Relative_humidity_isobaric' in xml_data assert xml_data['lat'][0] == 40 assert xml_data['lon'][0] == -105 @recorder.use_cassette('ncss_gfs_csv_point') def test_csv_point(self): self.nq.accept('csv') csv_data = self.ncss.get_data(self.nq) assert 'Temperature_isobaric' in csv_data assert 'Relative_humidity_isobaric' in csv_data assert csv_data['lat'][0] == 40 assert csv_data['lon'][0] == -105 @recorder.use_cassette('ncss_gfs_csv_point') def test_unit_handler_csv(self): self.nq.accept('csv') self.ncss.unit_handler = tuple_unit_handler csv_data = self.ncss.get_data(self.nq) temp = csv_data['Temperature_isobaric'] assert len(temp) == 2 assert temp[1] == 'K' relh = csv_data['Relative_humidity_isobaric'] assert len(relh) == 2 assert relh[1] == '%' @recorder.use_cassette('ncss_gfs_xml_point') def test_unit_handler_xml(self): self.nq.accept('xml') self.ncss.unit_handler = tuple_unit_handler xml_data = self.ncss.get_data(self.nq) temp = xml_data['Temperature_isobaric'] assert len(temp) == 2 assert temp[1] == 'K' relh = xml_data['Relative_humidity_isobaric'] assert len(relh) == 2 assert relh[1] == '%' @recorder.use_cassette('ncss_gfs_netcdf_point') def test_netcdf_point(self): self.nq.accept('netcdf') nc = self.ncss.get_data(self.nq) assert 'Temperature_isobaric' in nc.variables assert 'Relative_humidity_isobaric' in nc.variables assert nc.variables['latitude'][0] == 40 assert nc.variables['longitude'][0] == -105 @recorder.use_cassette('ncss_gfs_netcdf4_point') def test_netcdf4_point(self): self.nq.accept('netcdf4') nc = self.ncss.get_data(self.nq) assert 'Temperature_isobaric' in nc.variables assert 'Relative_humidity_isobaric' in nc.variables assert nc.variables['latitude'][0] == 40 assert nc.variables['longitude'][0] == -105 @recorder.use_cassette('ncss_gfs_vertical_level') def test_vertical_level(self): self.nq.accept('csv').vertical_level(50000) csv_data = self.ncss.get_data(self.nq) assert str(csv_data['Temperature_isobaric'])[:6] == '263.39' @recorder.use_cassette('ncss_gfs_csv_point') def test_raw_csv(self): self.nq.accept('csv') csv_data = self.ncss.get_data_raw(self.nq) assert csv_data.startswith(b'date,lat') @recorder.use_cassette('ncss_gfs_csv_point') def test_unknown_mime(self): self.nq.accept('csv') with response_context(): csv_data = self.ncss.get_data(self.nq) assert csv_data.startswith(b'date,lat')
class TestNCSS(object): """Test NCSS queries and response parsing.""" server = 'http://thredds.ucar.edu/thredds/ncss/' urlPath = 'grib/NCEP/GFS/Global_0p5deg/GFS_Global_0p5deg_20150612_1200.grib2' @recorder.use_cassette('ncss_test_metadata') def setup(self): """Set up for tests with a default valid query.""" dt = datetime(2015, 6, 12, 15, 0, 0) self.ncss = NCSS(self.server + self.urlPath) self.nq = self.ncss.query().lonlat_point(-105, 40).time(dt) self.nq.variables('Temperature_isobaric', 'Relative_humidity_isobaric') def test_good_query(self): """Test that a good query is properly validated.""" assert self.ncss.validate_query(self.nq) def test_bad_query(self): """Test that a query with an unknown variable is invalid.""" self.nq.variables('foo') assert not self.ncss.validate_query(self.nq) def test_empty_query(self): """Test that an empty query is invalid.""" query = self.ncss.query() res = self.ncss.validate_query(query) assert not res assert not isinstance(res, set) def test_bad_query_no_vars(self): """Test that a query without variables is invalid.""" self.nq.var.clear() assert not self.ncss.validate_query(self.nq) @recorder.use_cassette('ncss_gfs_xml_point') def test_xml_point(self): """Test parsing XML point returns.""" self.nq.accept('xml') xml_data = self.ncss.get_data(self.nq) assert 'Temperature_isobaric' in xml_data assert 'Relative_humidity_isobaric' in xml_data assert xml_data['lat'][0] == 40 assert xml_data['lon'][0] == -105 @recorder.use_cassette('ncss_gfs_csv_point') def test_csv_point(self): """Test parsing CSV point returns.""" self.nq.accept('csv') csv_data = self.ncss.get_data(self.nq) assert 'Temperature_isobaric' in csv_data assert 'Relative_humidity_isobaric' in csv_data assert csv_data['lat'][0] == 40 assert csv_data['lon'][0] == -105 @recorder.use_cassette('ncss_gfs_csv_point') def test_unit_handler_csv(self): """Test unit-handling from CSV returns.""" self.nq.accept('csv') self.ncss.unit_handler = tuple_unit_handler csv_data = self.ncss.get_data(self.nq) temp = csv_data['Temperature_isobaric'] assert len(temp) == 2 assert temp[1] == 'K' relh = csv_data['Relative_humidity_isobaric'] assert len(relh) == 2 assert relh[1] == '%' @recorder.use_cassette('ncss_gfs_xml_point') def test_unit_handler_xml(self): """Test unit-handling from XML returns.""" self.nq.accept('xml') self.ncss.unit_handler = tuple_unit_handler xml_data = self.ncss.get_data(self.nq) temp = xml_data['Temperature_isobaric'] assert len(temp) == 2 assert temp[1] == 'K' relh = xml_data['Relative_humidity_isobaric'] assert len(relh) == 2 assert relh[1] == '%' @recorder.use_cassette('ncss_gfs_netcdf_point') def test_netcdf_point(self): """Test handling of netCDF point returns.""" self.nq.accept('netcdf') nc = self.ncss.get_data(self.nq) assert 'Temperature_isobaric' in nc.variables assert 'Relative_humidity_isobaric' in nc.variables assert nc.variables['latitude'][0] == 40 assert nc.variables['longitude'][0] == -105 @recorder.use_cassette('ncss_gfs_netcdf4_point') def test_netcdf4_point(self): """Test handling of netCDF4 point returns.""" self.nq.accept('netcdf4') nc = self.ncss.get_data(self.nq) assert 'Temperature_isobaric' in nc.variables assert 'Relative_humidity_isobaric' in nc.variables assert nc.variables['latitude'][0] == 40 assert nc.variables['longitude'][0] == -105 @recorder.use_cassette('ncss_gfs_vertical_level') def test_vertical_level(self): """Test data return from a single vertical level is correct.""" self.nq.accept('csv').vertical_level(50000) csv_data = self.ncss.get_data(self.nq) np.testing.assert_almost_equal(csv_data['Temperature_isobaric'], np.array([263.40]), 2) @recorder.use_cassette('ncss_gfs_csv_point') def test_raw_csv(self): """Test CSV point return from a GFS request.""" self.nq.accept('csv') csv_data = self.ncss.get_data_raw(self.nq) assert csv_data.startswith(b'date,lat') @recorder.use_cassette('ncss_gfs_csv_point') def test_unknown_mime(self): """Test handling of unknown mimetypes.""" self.nq.accept('csv') with response_context(): csv_data = self.ncss.get_data(self.nq) assert csv_data.startswith(b'date,lat')
class ForecastModel(object): """ An object for querying and holding forecast model information for use within the pvlib library. Simplifies use of siphon library on a THREDDS server. Parameters ---------- model_type: string UNIDATA category in which the model is located. model_name: string Name of the UNIDATA forecast model. set_type: string Model dataset type. Attributes ---------- access_url: string URL specifying the dataset from data will be retrieved. base_tds_url : string The top level server address catalog_url : string The url path of the catalog to parse. data: pd.DataFrame Data returned from the query. data_format: string Format of the forecast data being requested from UNIDATA. dataset: Dataset Object containing information used to access forecast data. dataframe_variables: list Model variables that are present in the data. datasets_list: list List of all available datasets. fm_models: Dataset TDSCatalog object containing all available forecast models from UNIDATA. fm_models_list: list List of all available forecast models from UNIDATA. latitude: list A list of floats containing latitude values. location: Location A pvlib Location object containing geographic quantities. longitude: list A list of floats containing longitude values. lbox: boolean Indicates the use of a location bounding box. ncss: NCSS object NCSS model_name: string Name of the UNIDATA forecast model. model: Dataset A dictionary of Dataset object, whose keys are the name of the dataset's name. model_url: string The url path of the dataset to parse. modelvariables: list Common variable names that correspond to queryvariables. query: NCSS query object NCSS object used to complete the forecast data retrival. queryvariables: list Variables that are used to query the THREDDS Data Server. time: DatetimeIndex Time range. variables: dict Defines the variables to obtain from the weather model and how they should be renamed to common variable names. units: dict Dictionary containing the units of the standard variables and the model specific variables. vert_level: float or integer Vertical altitude for query data. """ access_url_key = 'NetcdfSubset' catalog_url = 'http://thredds.ucar.edu/thredds/catalog.xml' base_tds_url = catalog_url.split('/thredds/')[0] data_format = 'netcdf' vert_level = 100000 units = { 'temp_air': 'C', 'wind_speed': 'm/s', 'ghi': 'W/m^2', 'ghi_raw': 'W/m^2', 'dni': 'W/m^2', 'dhi': 'W/m^2', 'total_clouds': '%', 'low_clouds': '%', 'mid_clouds': '%', 'high_clouds': '%'} def __init__(self, model_type, model_name, set_type): self.model_type = model_type self.model_name = model_name self.set_type = set_type self.catalog = TDSCatalog(self.catalog_url) self.fm_models = TDSCatalog(self.catalog.catalog_refs[model_type].href) self.fm_models_list = sorted(list(self.fm_models.catalog_refs.keys())) try: model_url = self.fm_models.catalog_refs[model_name].href except ParseError: raise ParseError(self.model_name + ' model may be unavailable.') try: self.model = TDSCatalog(model_url) except HTTPError: try: self.model = TDSCatalog(model_url) except HTTPError: raise HTTPError(self.model_name + ' model may be unavailable.') self.datasets_list = list(self.model.datasets.keys()) self.set_dataset() def __repr__(self): return '{}, {}'.format(self.model_name, self.set_type) def set_dataset(self): ''' Retrieves the designated dataset, creates NCSS object, and creates a NCSS query object. ''' keys = list(self.model.datasets.keys()) labels = [item.split()[0].lower() for item in keys] if self.set_type == 'best': self.dataset = self.model.datasets[keys[labels.index('best')]] elif self.set_type == 'latest': self.dataset = self.model.datasets[keys[labels.index('latest')]] elif self.set_type == 'full': self.dataset = self.model.datasets[keys[labels.index('full')]] self.access_url = self.dataset.access_urls[self.access_url_key] self.ncss = NCSS(self.access_url) self.query = self.ncss.query() def set_query_latlon(self): ''' Sets the NCSS query location latitude and longitude. ''' if (isinstance(self.longitude, list) and isinstance(self.latitude, list)): self.lbox = True # west, east, south, north self.query.lonlat_box(self.latitude[0], self.latitude[1], self.longitude[0], self.longitude[1]) else: self.lbox = False self.query.lonlat_point(self.longitude, self.latitude) def set_location(self, time, latitude, longitude): ''' Sets the location for the query. Parameters ---------- time: datetime or DatetimeIndex Time range of the query. ''' if isinstance(time, datetime.datetime): tzinfo = time.tzinfo else: tzinfo = time.tz if tzinfo is None: self.location = Location(latitude, longitude) else: self.location = Location(latitude, longitude, tz=tzinfo) def get_data(self, latitude, longitude, start, end, vert_level=None, query_variables=None, close_netcdf_data=True): """ Submits a query to the UNIDATA servers using Siphon NCSS and converts the netcdf data to a pandas DataFrame. Parameters ---------- latitude: float The latitude value. longitude: float The longitude value. start: datetime or timestamp The start time. end: datetime or timestamp The end time. vert_level: None, float or integer Vertical altitude of interest. variables: None or list If None, uses self.variables. close_netcdf_data: bool Controls if the temporary netcdf data file should be closed. Set to False to access the raw data. Returns ------- forecast_data : DataFrame column names are the weather model's variable names. """ if vert_level is not None: self.vert_level = vert_level if query_variables is None: self.query_variables = list(self.variables.values()) else: self.query_variables = query_variables self.latitude = latitude self.longitude = longitude self.set_query_latlon() # modifies self.query self.set_location(start, latitude, longitude) self.start = start self.end = end self.query.time_range(self.start, self.end) self.query.vertical_level(self.vert_level) self.query.variables(*self.query_variables) self.query.accept(self.data_format) self.netcdf_data = self.ncss.get_data(self.query) # might be better to go to xarray here so that we can handle # higher dimensional data for more advanced applications self.data = self._netcdf2pandas(self.netcdf_data, self.query_variables) if close_netcdf_data: self.netcdf_data.close() return self.data def process_data(self, data, **kwargs): """ Defines the steps needed to convert raw forecast data into processed forecast data. Most forecast models implement their own version of this method which also call this one. Parameters ---------- data: DataFrame Raw forecast data Returns ------- data: DataFrame Processed forecast data. """ data = self.rename(data) return data def get_processed_data(self, *args, **kwargs): """ Get and process forecast data. Parameters ---------- *args: positional arguments Passed to get_data **kwargs: keyword arguments Passed to get_data and process_data Returns ------- data: DataFrame Processed forecast data """ return self.process_data(self.get_data(*args, **kwargs), **kwargs) def rename(self, data, variables=None): """ Renames the columns according the variable mapping. Parameters ---------- data: DataFrame variables: None or dict If None, uses self.variables Returns ------- data: DataFrame Renamed data. """ if variables is None: variables = self.variables return data.rename(columns={y: x for x, y in variables.items()}) def _netcdf2pandas(self, netcdf_data, query_variables): """ Transforms data from netcdf to pandas DataFrame. Parameters ---------- data: netcdf Data returned from UNIDATA NCSS query. query_variables: list The variables requested. Returns ------- pd.DataFrame """ # set self.time try: time_var = 'time' self.set_time(netcdf_data.variables[time_var]) except KeyError: # which model does this dumb thing? time_var = 'time1' self.set_time(netcdf_data.variables[time_var]) data_dict = {key: data[:].squeeze() for key, data in netcdf_data.variables.items() if key in query_variables} return pd.DataFrame(data_dict, index=self.time) def set_time(self, time): ''' Converts time data into a pandas date object. Parameters ---------- time: netcdf Contains time information. Returns ------- pandas.DatetimeIndex ''' times = num2date(time[:].squeeze(), time.units) self.time = pd.DatetimeIndex(pd.Series(times), tz=self.location.tz) def cloud_cover_to_ghi_linear(self, cloud_cover, ghi_clear, offset=35, **kwargs): """ Convert cloud cover to GHI using a linear relationship. 0% cloud cover returns ghi_clear. 100% cloud cover returns offset*ghi_clear. Parameters ---------- cloud_cover: numeric Cloud cover in %. ghi_clear: numeric GHI under clear sky conditions. offset: numeric Determines the minimum GHI. kwargs Not used. Returns ------- ghi: numeric Estimated GHI. References ---------- Larson et. al. "Day-ahead forecasting of solar power output from photovoltaic plants in the American Southwest" Renewable Energy 91, 11-20 (2016). """ offset = offset / 100. cloud_cover = cloud_cover / 100. ghi = (offset + (1 - offset) * (1 - cloud_cover)) * ghi_clear return ghi def cloud_cover_to_irradiance_clearsky_scaling(self, cloud_cover, method='linear', **kwargs): """ Estimates irradiance from cloud cover in the following steps: 1. Determine clear sky GHI using Ineichen model and climatological turbidity. 2. Estimate cloudy sky GHI using a function of cloud_cover e.g. :py:meth:`~ForecastModel.cloud_cover_to_ghi_linear` 3. Estimate cloudy sky DNI using the DISC model. 4. Calculate DHI from DNI and DHI. Parameters ---------- cloud_cover : Series Cloud cover in %. method : str Method for converting cloud cover to GHI. 'linear' is currently the only option. **kwargs Passed to the method that does the conversion Returns ------- irrads : DataFrame Estimated GHI, DNI, and DHI. """ solpos = self.location.get_solarposition(cloud_cover.index) cs = self.location.get_clearsky(cloud_cover.index, model='ineichen', solar_position=solpos) method = method.lower() if method == 'linear': ghi = self.cloud_cover_to_ghi_linear(cloud_cover, cs['ghi'], **kwargs) else: raise ValueError('invalid method argument') dni = disc(ghi, solpos['zenith'], cloud_cover.index)['dni'] dhi = ghi - dni * np.cos(np.radians(solpos['zenith'])) irrads = pd.DataFrame({'ghi': ghi, 'dni': dni, 'dhi': dhi}).fillna(0) return irrads def cloud_cover_to_transmittance_linear(self, cloud_cover, offset=0.75, **kwargs): """ Convert cloud cover to atmospheric transmittance using a linear model. 0% cloud cover returns offset. 100% cloud cover returns 0. Parameters ---------- cloud_cover : numeric Cloud cover in %. offset : numeric Determines the maximum transmittance. kwargs Not used. Returns ------- ghi : numeric Estimated GHI. """ transmittance = ((100.0 - cloud_cover) / 100.0) * 0.75 return transmittance def cloud_cover_to_irradiance_liujordan(self, cloud_cover, **kwargs): """ Estimates irradiance from cloud cover in the following steps: 1. Determine transmittance using a function of cloud cover e.g. :py:meth:`~ForecastModel.cloud_cover_to_transmittance_linear` 2. Calculate GHI, DNI, DHI using the :py:func:`pvlib.irradiance.liujordan` model Parameters ---------- cloud_cover : Series Returns ------- irradiance : DataFrame Columns include ghi, dni, dhi """ # in principle, get_solarposition could use the forecast # pressure, temp, etc., but the cloud cover forecast is not # accurate enough to justify using these minor corrections solar_position = self.location.get_solarposition(cloud_cover.index) dni_extra = extraradiation(cloud_cover.index) airmass = self.location.get_airmass(cloud_cover.index) transmittance = self.cloud_cover_to_transmittance_linear(cloud_cover, **kwargs) irrads = liujordan(solar_position['apparent_zenith'], transmittance, airmass['airmass_absolute'], dni_extra=dni_extra) irrads = irrads.fillna(0) return irrads def cloud_cover_to_irradiance(self, cloud_cover, how='clearsky_scaling', **kwargs): """ Convert cloud cover to irradiance. A wrapper method. Parameters ---------- cloud_cover : Series how : str Selects the method for conversion. Can be one of clearsky_scaling or liujordan. **kwargs Passed to the selected method. Returns ------- irradiance : DataFrame Columns include ghi, dni, dhi """ how = how.lower() if how == 'clearsky_scaling': irrads = self.cloud_cover_to_irradiance_clearsky_scaling( cloud_cover, **kwargs) elif how == 'liujordan': irrads = self.cloud_cover_to_irradiance_liujordan( cloud_cover, **kwargs) else: raise ValueError('invalid how argument') return irrads def kelvin_to_celsius(self, temperature): """ Converts Kelvin to celsius. Parameters ---------- temperature: numeric Returns ------- temperature: numeric """ return temperature - 273.15 def isobaric_to_ambient_temperature(self, data): """ Calculates temperature from isobaric temperature. Parameters ---------- data: DataFrame Must contain columns pressure, temperature_iso, temperature_dew_iso. Input temperature in K. Returns ------- temperature : Series Temperature in K """ P = data['pressure'] / 100.0 Tiso = data['temperature_iso'] Td = data['temperature_dew_iso'] - 273.15 # saturation water vapor pressure e = 6.11 * 10**((7.5 * Td) / (Td + 273.3)) # saturation water vapor mixing ratio w = 0.622 * (e / (P - e)) T = Tiso - ((2.501 * 10.**6) / 1005.7) * w return T def uv_to_speed(self, data): """ Computes wind speed from wind components. Parameters ---------- data : DataFrame Must contain the columns 'wind_speed_u' and 'wind_speed_v'. Returns ------- wind_speed : Series """ wind_speed = np.sqrt(data['wind_speed_u']**2 + data['wind_speed_v']**2) return wind_speed def gust_to_speed(self, data, scaling=1/1.4): """ Computes standard wind speed from gust. Very approximate and location dependent. Parameters ---------- data : DataFrame Must contain the column 'wind_speed_gust'. Returns ------- wind_speed : Series """ wind_speed = data['wind_speed_gust'] * scaling return wind_speed
class ForecastModel(object): """ An object for querying and holding forecast model information for use within the pvlib library. Simplifies use of siphon library on a THREDDS server. Parameters ---------- model_type: string UNIDATA category in which the model is located. model_name: string Name of the UNIDATA forecast model. set_type: string Model dataset type. Attributes ---------- access_url: string URL specifying the dataset from data will be retrieved. base_tds_url : string The top level server address catalog_url : string The url path of the catalog to parse. data: pd.DataFrame Data returned from the query. data_format: string Format of the forecast data being requested from UNIDATA. dataset: Dataset Object containing information used to access forecast data. dataframe_variables: list Model variables that are present in the data. datasets_list: list List of all available datasets. fm_models: Dataset TDSCatalog object containing all available forecast models from UNIDATA. fm_models_list: list List of all available forecast models from UNIDATA. latitude: list A list of floats containing latitude values. location: Location A pvlib Location object containing geographic quantities. longitude: list A list of floats containing longitude values. lbox: boolean Indicates the use of a location bounding box. ncss: NCSS object NCSS model_name: string Name of the UNIDATA forecast model. model: Dataset A dictionary of Dataset object, whose keys are the name of the dataset's name. model_url: string The url path of the dataset to parse. modelvariables: list Common variable names that correspond to queryvariables. query: NCSS query object NCSS object used to complete the forecast data retrival. queryvariables: list Variables that are used to query the THREDDS Data Server. time: DatetimeIndex Time range. variables: dict Defines the variables to obtain from the weather model and how they should be renamed to common variable names. units: dict Dictionary containing the units of the standard variables and the model specific variables. vert_level: float or integer Vertical altitude for query data. """ access_url_key = 'NetcdfSubset' catalog_url = 'https://thredds.ucar.edu/thredds/catalog.xml' base_tds_url = catalog_url.split('/thredds/')[0] data_format = 'netcdf' units = { 'temp_air': 'C', 'wind_speed': 'm/s', 'ghi': 'W/m^2', 'ghi_raw': 'W/m^2', 'dni': 'W/m^2', 'dhi': 'W/m^2', 'total_clouds': '%', 'low_clouds': '%', 'mid_clouds': '%', 'high_clouds': '%' } def __init__(self, model_type, model_name, set_type, vert_level=None): self.model_type = model_type self.model_name = model_name self.set_type = set_type self.connected = False self.vert_level = vert_level def connect_to_catalog(self): self.catalog = TDSCatalog(self.catalog_url) self.fm_models = TDSCatalog( self.catalog.catalog_refs[self.model_type].href) self.fm_models_list = sorted(list(self.fm_models.catalog_refs.keys())) try: model_url = self.fm_models.catalog_refs[self.model_name].href except ParseError: raise ParseError(self.model_name + ' model may be unavailable.') try: self.model = TDSCatalog(model_url) except HTTPError: try: self.model = TDSCatalog(model_url) except HTTPError: raise HTTPError(self.model_name + ' model may be unavailable.') self.datasets_list = list(self.model.datasets.keys()) self.set_dataset() self.connected = True def __repr__(self): return '{}, {}'.format(self.model_name, self.set_type) def set_dataset(self): ''' Retrieves the designated dataset, creates NCSS object, and creates a NCSS query object. ''' keys = list(self.model.datasets.keys()) labels = [item.split()[0].lower() for item in keys] if self.set_type == 'best': self.dataset = self.model.datasets[keys[labels.index('best')]] elif self.set_type == 'latest': self.dataset = self.model.datasets[keys[labels.index('latest')]] elif self.set_type == 'full': self.dataset = self.model.datasets[keys[labels.index('full')]] self.access_url = self.dataset.access_urls[self.access_url_key] self.ncss = NCSS(self.access_url) self.query = self.ncss.query() def set_query_time_range(self, start, end): """ Parameters ---------- start : datetime.datetime, pandas.Timestamp Must be tz-localized. end : datetime.datetime, pandas.Timestamp Must be tz-localized. Notes ----- Assigns ``self.start``, ``self.end``. Modifies ``self.query`` """ self.start = pd.Timestamp(start) self.end = pd.Timestamp(end) if self.start.tz is None or self.end.tz is None: raise TypeError('start and end must be tz-localized') self.query.time_range(self.start, self.end) def set_query_latlon(self): ''' Sets the NCSS query location latitude and longitude. ''' if (isinstance(self.longitude, list) and isinstance(self.latitude, list)): self.lbox = True # west, east, south, north self.query.lonlat_box(self.longitude[0], self.longitude[1], self.latitude[0], self.latitude[1]) else: self.lbox = False self.query.lonlat_point(self.longitude, self.latitude) def set_location(self, tz, latitude, longitude): ''' Sets the location for the query. Parameters ---------- tz: tzinfo Timezone of the query latitude: float Latitude of the query longitude: float Longitude of the query Notes ----- Assigns ``self.location``. ''' self.location = Location(latitude, longitude, tz=tz) def get_data(self, latitude, longitude, start, end, vert_level=None, query_variables=None, close_netcdf_data=True, **kwargs): """ Submits a query to the UNIDATA servers using Siphon NCSS and converts the netcdf data to a pandas DataFrame. Parameters ---------- latitude: float The latitude value. longitude: float The longitude value. start: datetime or timestamp The start time. end: datetime or timestamp The end time. vert_level: None, float or integer, default None Vertical altitude of interest. query_variables: None or list, default None If None, uses self.variables. close_netcdf_data: bool, default True Controls if the temporary netcdf data file should be closed. Set to False to access the raw data. **kwargs: Additional keyword arguments are silently ignored. Returns ------- forecast_data : DataFrame column names are the weather model's variable names. """ if not self.connected: self.connect_to_catalog() if vert_level is not None: self.vert_level = vert_level if query_variables is None: self.query_variables = list(self.variables.values()) else: self.query_variables = query_variables self.set_query_time_range(start, end) self.latitude = latitude self.longitude = longitude self.set_query_latlon() # modifies self.query self.set_location(self.start.tz, latitude, longitude) if self.vert_level is not None: self.query.vertical_level(self.vert_level) self.query.variables(*self.query_variables) self.query.accept(self.data_format) self.netcdf_data = self.ncss.get_data(self.query) # might be better to go to xarray here so that we can handle # higher dimensional data for more advanced applications self.data = self._netcdf2pandas(self.netcdf_data, self.query_variables, self.start, self.end) if close_netcdf_data: self.netcdf_data.close() return self.data def process_data(self, data, **kwargs): """ Defines the steps needed to convert raw forecast data into processed forecast data. Most forecast models implement their own version of this method which also call this one. Parameters ---------- data: DataFrame Raw forecast data Returns ------- data: DataFrame Processed forecast data. """ data = self.rename(data) return data def get_processed_data(self, *args, **kwargs): """ Get and process forecast data. Parameters ---------- *args: positional arguments Passed to get_data **kwargs: keyword arguments Passed to get_data and process_data Returns ------- data: DataFrame Processed forecast data """ return self.process_data(self.get_data(*args, **kwargs), **kwargs) def rename(self, data, variables=None): """ Renames the columns according the variable mapping. Parameters ---------- data: DataFrame variables: None or dict, default None If None, uses self.variables Returns ------- data: DataFrame Renamed data. """ if variables is None: variables = self.variables return data.rename(columns={y: x for x, y in variables.items()}) def _netcdf2pandas(self, netcdf_data, query_variables, start, end): """ Transforms data from netcdf to pandas DataFrame. Parameters ---------- data: netcdf Data returned from UNIDATA NCSS query. query_variables: list The variables requested. start: Timestamp The start time end: Timestamp The end time Returns ------- pd.DataFrame """ # set self.time try: time_var = 'time' self.set_time(netcdf_data.variables[time_var]) except KeyError: # which model does this dumb thing? time_var = 'time1' self.set_time(netcdf_data.variables[time_var]) data_dict = {} for key, data in netcdf_data.variables.items(): # if accounts for possibility of extra variable returned if key not in query_variables: continue squeezed = data[:].squeeze() # If the data is big endian, swap the byte order to make it # little endian if squeezed.dtype.byteorder == '>': squeezed = squeezed.byteswap().newbyteorder() if squeezed.ndim == 1: data_dict[key] = squeezed elif squeezed.ndim == 2: for num, data_level in enumerate(squeezed.T): data_dict[key + '_' + str(num)] = data_level else: raise ValueError('cannot parse ndim > 2') data = pd.DataFrame(data_dict, index=self.time) # sometimes data is returned as hours since T0 # where T0 is before start. Then the hours between # T0 and start are added *after* end. So sort and slice # to remove the garbage data = data.sort_index().loc[start:end] return data def set_time(self, time): ''' Converts time data into a pandas date object. Parameters ---------- time: netcdf Contains time information. Returns ------- pandas.DatetimeIndex ''' times = num2date(time[:].squeeze(), time.units, only_use_cftime_datetimes=False, only_use_python_datetimes=True) self.time = pd.DatetimeIndex(pd.Series(times), tz=self.location.tz) def cloud_cover_to_ghi_linear(self, cloud_cover, ghi_clear, offset=35, **kwargs): """ Convert cloud cover to GHI using a linear relationship. 0% cloud cover returns ghi_clear. 100% cloud cover returns offset*ghi_clear. Parameters ---------- cloud_cover: numeric Cloud cover in %. ghi_clear: numeric GHI under clear sky conditions. offset: numeric, default 35 Determines the minimum GHI. kwargs Not used. Returns ------- ghi: numeric Estimated GHI. References ---------- Larson et. al. "Day-ahead forecasting of solar power output from photovoltaic plants in the American Southwest" Renewable Energy 91, 11-20 (2016). """ offset = offset / 100. cloud_cover = cloud_cover / 100. ghi = (offset + (1 - offset) * (1 - cloud_cover)) * ghi_clear return ghi def cloud_cover_to_irradiance_clearsky_scaling(self, cloud_cover, method='linear', **kwargs): """ Estimates irradiance from cloud cover in the following steps: 1. Determine clear sky GHI using Ineichen model and climatological turbidity. 2. Estimate cloudy sky GHI using a function of cloud_cover e.g. :py:meth:`~ForecastModel.cloud_cover_to_ghi_linear` 3. Estimate cloudy sky DNI using the DISC model. 4. Calculate DHI from DNI and GHI. Parameters ---------- cloud_cover : Series Cloud cover in %. method : str, default 'linear' Method for converting cloud cover to GHI. 'linear' is currently the only option. **kwargs Passed to the method that does the conversion Returns ------- irrads : DataFrame Estimated GHI, DNI, and DHI. """ solpos = self.location.get_solarposition(cloud_cover.index) cs = self.location.get_clearsky(cloud_cover.index, model='ineichen', solar_position=solpos) method = method.lower() if method == 'linear': ghi = self.cloud_cover_to_ghi_linear(cloud_cover, cs['ghi'], **kwargs) else: raise ValueError('invalid method argument') dni = disc(ghi, solpos['zenith'], cloud_cover.index)['dni'] dhi = ghi - dni * np.cos(np.radians(solpos['zenith'])) irrads = pd.DataFrame({'ghi': ghi, 'dni': dni, 'dhi': dhi}).fillna(0) return irrads def cloud_cover_to_transmittance_linear(self, cloud_cover, offset=0.75, **kwargs): """ Convert cloud cover to atmospheric transmittance using a linear model. 0% cloud cover returns offset. 100% cloud cover returns 0. Parameters ---------- cloud_cover : numeric Cloud cover in %. offset : numeric, default 0.75 Determines the maximum transmittance. kwargs Not used. Returns ------- ghi : numeric Estimated GHI. """ transmittance = ((100.0 - cloud_cover) / 100.0) * offset return transmittance def cloud_cover_to_irradiance_liujordan(self, cloud_cover, **kwargs): """ Estimates irradiance from cloud cover in the following steps: 1. Determine transmittance using a function of cloud cover e.g. :py:meth:`~ForecastModel.cloud_cover_to_transmittance_linear` 2. Calculate GHI, DNI, DHI using the :py:func:`pvlib.irradiance.liujordan` model Parameters ---------- cloud_cover : Series Returns ------- irradiance : DataFrame Columns include ghi, dni, dhi """ # in principle, get_solarposition could use the forecast # pressure, temp, etc., but the cloud cover forecast is not # accurate enough to justify using these minor corrections solar_position = self.location.get_solarposition(cloud_cover.index) dni_extra = get_extra_radiation(cloud_cover.index) airmass = self.location.get_airmass(cloud_cover.index) transmittance = self.cloud_cover_to_transmittance_linear( cloud_cover, **kwargs) irrads = liujordan(solar_position['apparent_zenith'], transmittance, airmass['airmass_absolute'], dni_extra=dni_extra) irrads = irrads.fillna(0) return irrads def cloud_cover_to_irradiance(self, cloud_cover, how='clearsky_scaling', **kwargs): """ Convert cloud cover to irradiance. A wrapper method. Parameters ---------- cloud_cover : Series how : str, default 'clearsky_scaling' Selects the method for conversion. Can be one of clearsky_scaling or liujordan. **kwargs Passed to the selected method. Returns ------- irradiance : DataFrame Columns include ghi, dni, dhi """ how = how.lower() if how == 'clearsky_scaling': irrads = self.cloud_cover_to_irradiance_clearsky_scaling( cloud_cover, **kwargs) elif how == 'liujordan': irrads = self.cloud_cover_to_irradiance_liujordan( cloud_cover, **kwargs) else: raise ValueError('invalid how argument') return irrads def kelvin_to_celsius(self, temperature): """ Converts Kelvin to celsius. Parameters ---------- temperature: numeric Returns ------- temperature: numeric """ return temperature - 273.15 def isobaric_to_ambient_temperature(self, data): """ Calculates temperature from isobaric temperature. Parameters ---------- data: DataFrame Must contain columns pressure, temperature_iso, temperature_dew_iso. Input temperature in K. Returns ------- temperature : Series Temperature in K """ P = data['pressure'] / 100.0 # noqa: N806 Tiso = data['temperature_iso'] # noqa: N806 Td = data['temperature_dew_iso'] - 273.15 # noqa: N806 # saturation water vapor pressure e = 6.11 * 10**((7.5 * Td) / (Td + 273.3)) # saturation water vapor mixing ratio w = 0.622 * (e / (P - e)) temperature = Tiso - ((2.501 * 10.**6) / 1005.7) * w return temperature def uv_to_speed(self, data): """ Computes wind speed from wind components. Parameters ---------- data : DataFrame Must contain the columns 'wind_speed_u' and 'wind_speed_v'. Returns ------- wind_speed : Series """ wind_speed = np.sqrt(data['wind_speed_u']**2 + data['wind_speed_v']**2) return wind_speed def gust_to_speed(self, data, scaling=1 / 1.4): """ Computes standard wind speed from gust. Very approximate and location dependent. Parameters ---------- data : DataFrame Must contain the column 'wind_speed_gust'. Returns ------- wind_speed : Series """ wind_speed = data['wind_speed_gust'] * scaling return wind_speed
from netCDF4 import num2date import numpy as np import scipy.ndimage as ndimage from siphon.ncss import NCSS ######################################## # Set up access to the data # Create NCSS object to access the NetcdfSubset base_url = 'https://www.ncei.noaa.gov/thredds/ncss/grid/gfs-g4-anl-files/' dt = datetime(2016, 8, 22, 18) ncss = NCSS('{}{dt:%Y%m}/{dt:%Y%m%d}/gfsanl_4_{dt:%Y%m%d}_' '{dt:%H}00_003.grb2'.format(base_url, dt=dt)) # Create lat/lon box for location you want to get data for query = ncss.query() query.lonlat_box(north=50, south=30, east=-80, west=-115) query.time(datetime(2016, 8, 22, 21)) # Request data for geopotential height query.variables('Geopotential_height_isobaric', 'u-component_of_wind_isobaric', 'v-component_of_wind_isobaric') query.vertical_level(100000) data = ncss.get_data(query) # Pull out variables you want to use height_var = data.variables['Geopotential_height_isobaric'] u_wind_var = data.variables['u-component_of_wind_isobaric'] v_wind_var = data.variables['v-component_of_wind_isobaric'] # Find the name of the time coordinate
from scipy.ndimage import gaussian_filter from siphon.ncss import NCSS ########################### # **Get the data** # # This example will use data from the North American Mesoscale Model Analysis # (https://nomads.ncdc.gov/) for 12 UTC 27 April 2011. base_url = 'https://www.ncei.noaa.gov/thredds/ncss/grid/namanl/' dt = datetime(2011, 4, 27) ncss = NCSS('{}{dt:%Y%m}/{dt:%Y%m%d}/namanl_218_{dt:%Y%m%d}_' '1800_000.grb'.format(base_url, dt=dt)) # Query for required variables gfsdata = ncss.query().all_times() gfsdata.variables('Geopotential_height_isobaric', 'u-component_of_wind_isobaric', 'v-component_of_wind_isobaric', 'Temperature_isobaric', 'Relative_humidity_isobaric', 'Best_4_layer_lifted_index_layer_between_two_pressure_' 'difference_from_ground_layer', 'Absolute_vorticity_isobaric', 'Pressure_reduced_to_MSL_msl', 'Dew_point_temperature_height_above_ground' ).add_lonlat() # Set the lat/lon box for the data to pull in. gfsdata.lonlat_box(-135, -60, 15, 65)
class TestNCSS(object): server = 'http://thredds.ucar.edu/thredds/ncss/' urlPath = 'grib/NCEP/GFS/Global_0p5deg/GFS_Global_0p5deg_20150612_1200.grib2' @recorder.use_cassette('ncss_test_metadata') def setup(self): dt = datetime(2015, 6, 12, 15, 0, 0) self.ncss = NCSS(self.server + self.urlPath) self.nq = self.ncss.query().lonlat_point(-105, 40).time(dt) self.nq.variables('Temperature_isobaric', 'Relative_humidity_isobaric') def test_good_query(self): assert self.ncss.validate_query(self.nq) def test_bad_query(self): self.nq.variables('foo') assert not self.ncss.validate_query(self.nq) def test_bad_query_no_vars(self): self.nq.var.clear() assert not self.ncss.validate_query(self.nq) @recorder.use_cassette('ncss_gfs_xml_point') def test_xml_point(self): self.nq.accept('xml') xml_data = self.ncss.get_data(self.nq) assert 'Temperature_isobaric' in xml_data assert 'Relative_humidity_isobaric' in xml_data eq_(xml_data['lat'][0], 40) eq_(xml_data['lon'][0], -105) @recorder.use_cassette('ncss_gfs_csv_point') def test_csv_point(self): self.nq.accept('csv') csv_data = self.ncss.get_data(self.nq) assert 'Temperature_isobaric' in csv_data assert 'Relative_humidity_isobaric' in csv_data eq_(csv_data['lat'][0], 40) eq_(csv_data['lon'][0], -105) @recorder.use_cassette('ncss_gfs_csv_point') def test_unit_handler_csv(self): self.nq.accept('csv') self.ncss.unit_handler = tuple_unit_handler csv_data = self.ncss.get_data(self.nq) temp = csv_data['Temperature_isobaric'] eq_(len(temp), 2) eq_(temp[1], 'K') relh = csv_data['Relative_humidity_isobaric'] eq_(len(relh), 2) eq_(relh[1], '%') @recorder.use_cassette('ncss_gfs_xml_point') def test_unit_handler_xml(self): self.nq.accept('xml') self.ncss.unit_handler = tuple_unit_handler xml_data = self.ncss.get_data(self.nq) temp = xml_data['Temperature_isobaric'] eq_(len(temp), 2) eq_(temp[1], 'K') relh = xml_data['Relative_humidity_isobaric'] eq_(len(relh), 2) eq_(relh[1], '%') @recorder.use_cassette('ncss_gfs_netcdf_point') def test_netcdf_point(self): self.nq.accept('netcdf') nc = self.ncss.get_data(self.nq) assert 'Temperature_isobaric' in nc.variables assert 'Relative_humidity_isobaric' in nc.variables eq_(nc.variables['latitude'][0], 40) eq_(nc.variables['longitude'][0], -105) @recorder.use_cassette('ncss_gfs_netcdf4_point') def test_netcdf4_point(self): self.nq.accept('netcdf4') nc = self.ncss.get_data(self.nq) assert 'Temperature_isobaric' in nc.variables assert 'Relative_humidity_isobaric' in nc.variables eq_(nc.variables['latitude'][0], 40) eq_(nc.variables['longitude'][0], -105) @recorder.use_cassette('ncss_gfs_vertical_level') def test_vertical_level(self): self.nq.accept('csv').vertical_level(50000) csv_data = self.ncss.get_data(self.nq) eq_(str(csv_data['Temperature_isobaric'])[:6], '263.39') @recorder.use_cassette('ncss_gfs_csv_point') def test_raw_csv(self): self.nq.accept('csv') csv_data = self.ncss.get_data_raw(self.nq) assert csv_data.startswith(b'date,lat') @recorder.use_cassette('ncss_gfs_csv_point') def test_unknown_mime(self): self.nq.accept('csv') with response_context(): csv_data = self.ncss.get_data(self.nq) assert csv_data.startswith(b'date,lat')
import numpy as np import scipy.ndimage as ndimage from siphon.catalog import TDSCatalog from siphon.ncss import NCSS # Latest GFS Dataset cat = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/grib/' 'NCEP/GFS/Global_0p5deg/latest.xml') best_ds = list(cat.datasets.values())[0] ncss = NCSS(best_ds.access_urls['NetcdfSubset']) # Set the time to current now = datetime.utcnow() # Query for Latest GFS Run gfsdata_hght = ncss.query().time(now).accept('netcdf4') gfsdata_hght.variables('Geopotential_height_isobaric').add_lonlat() # Set the lat/lon box for the data you want to pull in. # lonlat_box(north_lat,south_lat,east_lon,west_lon) gfsdata_hght.lonlat_box(0, 360, 0, 90) # Set desired level 50000 = 50000 Pa = 500 hPa gfsdata_hght.vertical_level(25000) # Actually getting the data data_hght = ncss.get_data(gfsdata_hght) # Query for Latest GFS Run gfsdata_wind = ncss.query().time(now).accept('netcdf4') gfsdata_wind.variables('u-component_of_wind_isobaric',
# Extract HRRR data using Unidata's Siphon package # <codecell> # Resolve the latest HRRR dataset from siphon.catalog import TDSCatalog latest_hrrr = TDSCatalog('http://thredds-jumbo.unidata.ucar.edu/thredds/catalog/grib/HRRR/CONUS_3km/surface/latest.xml') hrrr_ds = list(latest_hrrr.datasets.values())[0] # Set up access via NCSS from siphon.ncss import NCSS ncss = NCSS(hrrr_ds.access_urls['NetcdfSubset']) # Create a query to ask for all times in netcdf4 format for # the Temperature_surface variable, with a bounding box query = ncss.query() # <codecell> dap_url = hrrr_ds.access_urls['OPENDAP'] # <codecell> query.all_times().accept('netcdf4').variables('u-component_of_wind_height_above_ground', 'v-component_of_wind_height_above_ground') query.lonlat_box(45, 41., -63, -71.5) # Get the raw bytes and write to a file. data = ncss.get_data_raw(query) with open('test_uv.nc', 'wb') as outf: outf.write(data)
import metpy.calc as mpcalc from metpy.units import units from netCDF4 import num2date import numpy as np import scipy.ndimage as ndimage from siphon.ncss import NCSS ################################## # Set up netCDF Subset Service link dt = datetime(2016, 4, 16, 18) base_url = 'https://www.ncei.noaa.gov/thredds/ncss/grid/namanl/' ncss = NCSS('{}{dt:%Y%m}/{dt:%Y%m%d}/namanl_218_{dt:%Y%m%d}_' '{dt:%H}00_000.grb'.format(base_url, dt=dt)) # Data Query hgt = ncss.query().time(dt) hgt.variables('Geopotential_height_isobaric', 'u-component_of_wind_isobaric', 'v-component_of_wind_isobaric').add_lonlat() # Actually getting the data data = ncss.get_data(hgt) ################################## # Pull apart the data # Get dimension names to pull appropriate variables dtime = data.variables['Geopotential_height_isobaric'].dimensions[0] dlev = data.variables['Geopotential_height_isobaric'].dimensions[1] dlat = data.variables['Geopotential_height_isobaric'].dimensions[2] dlon = data.variables['Geopotential_height_isobaric'].dimensions[3]