def test_urlopen_requests_kwargs(): """ Test that urlopen can pass kwargs to requests """ base_url = "http://erddap.sensors.ioos.us/erddap/tabledap/" timeout_seconds = 1 # request timeout in seconds slowwly_milliseconds = (timeout_seconds + 1) * 1000 slowwly_url = ( "http://slowwly.robertomurray.co.uk/delay/" + str(slowwly_milliseconds) + "/url/" + base_url ) with pytest.raises(ReadTimeout): urlopen(slowwly_url, timeout=timeout_seconds)
def get_var_by_attr(self, dataset_id=None, **kwargs): """Similar to netCDF4-python `get_variables_by_attributes` for an ERDDAP `dataset_id`. The `get_var_by_attr` method will create an info `csv` return, for the `dataset_id`, and the variables attribute dictionary. Examples: >>> e = ERDDAP(server_url='https://data.ioos.us/gliders/erddap') >>> dataset_id = 'whoi_406-20160902T1700' Get variables with x-axis attribute. >>> e.get_var_by_attr(dataset_id, axis='X') ['longitude'] Get variables with matching "standard_name" attribute >>> e.get_var_by_attr(dataset_id, standard_name='northward_sea_water_velocity') ['v'] Get Axis variables >>> e.get_var_by_attr(dataset_id, axis=lambda v: v in ['X', 'Y', 'Z', 'T']) ['latitude', 'longitude', 'time', 'depth'] """ if not dataset_id: dataset_id = self.dataset_id url = info_url(self.server, dataset_id=dataset_id, response='csv') # Creates the variables dictionary for the `get_var_by_attr` lookup. variables = {} _df = pd.read_csv( urlopen(url, params=self.params, **self.requests_kwargs)) self._dataset_id = self.dataset_id for variable in set(_df['Variable Name']): attributes = _df.loc[_df['Variable Name'] == variable, ['Attribute Name', 'Value']].set_index( 'Attribute Name').to_dict()['Value'] variables.update({variable: attributes}) self._variables = variables # Virtually the same code as the netCDF4 counterpart. vs = [] has_value_flag = False for vname in self._variables: var = self._variables[vname] for k, v in kwargs.items(): if callable(v): has_value_flag = v(var.get(k, None)) if has_value_flag is False: break elif var.get(k) and var.get(k) == v: has_value_flag = True else: has_value_flag = False break if has_value_flag is True: vs.append(vname) return vs
def get_data(dataset=None, standard_name=None, constraints=None): print(dataset_id, standard_name, constraints) var = e.get_var_by_attr(dataset_id=dataset, standard_name=lambda v: str(v).lower() == standard_name.lower())[0] download_url = e.get_download_url(dataset_id=dataset, constraints=constraints, variables=['time',var], response='csv') df = pd.read_csv(urlopen(download_url), index_col='time', parse_dates=True, skiprows=[1]) return df, var
def to_pandas(self, **kw): """Save a data request to a pandas.DataFrame. Accepts any `pandas.read_csv` keyword arguments. """ url = self.get_download_url(response='csv') return pd.read_csv( urlopen(url, params=self.params, **self.requests_kwargs), **kw)
def test__tempnc(): url = "https://data.ioos.us/gliders/erddap/tabledap/cp_336-20170116T1254.nc" data = urlopen(url).read() with _tempnc(data) as tmp: # Check that the file was exists. assert os.path.exists(tmp) # Confirm that it is a netCDF file. assert tmp.endswith("nc") # Check that the file was removed. assert not os.path.exists(tmp)
def to_xarray(self, **kw): """Load the data request into a xarray.Dataset. Accepts any `xr.open_dataset` keyword arguments. """ import xarray as xr url = self.get_download_url(response="nc") data = urlopen(url, params=self.params, **self.requests_kwargs).read() with _tempnc(data) as tmp: return xr.open_dataset(tmp.name, **kw)
def to_iris(self, **kw): """Load the data request into an iris.CubeList. Accepts any `iris.load_raw` keyword arguments. """ import iris url = self.get_download_url(response="nc") data = urlopen(url, params=self.params, **self.requests_kwargs).read() with _tempnc(data) as tmp: cubes = iris.load_raw(tmp.name, **kw) cubes.realise_data() return cubes
def to_xarray(self, **kw): """Save a data request to a xarray.Dataset. Accepts any `xr.open_dataset` keyword arguments. """ import xarray as xr from tempfile import NamedTemporaryFile url = self.get_download_url(response='nc') data = urlopen(url, params=self.params, **self.requests_kwargs).read() with NamedTemporaryFile(suffix='.nc', prefix='erddapy_') as tmp: tmp.write(data) tmp.flush() return xr.open_dataset(tmp.name, **kw)
def adv_search(e, standard_name, cdm_data_type, min_time, max_time): try: search_url = e.get_search_url(response='csv', cdm_data_type=cdm_data_type.lower(), items_per_page=100000, standard_name=standard_name, min_time=min_time, max_time=max_time) df = pd.read_csv(urlopen(search_url)) except: df = [] if len(var)>14: v = '{}...'.format(standard_name[:15]) else: v = standard_name figure.title = 'No {} found in this time range. Pick another variable.'.format(v) figure.marks[0].y = 0.0 * figure.marks[0].y return df
def to_pandas(self, **kw): """Save a data request to a pandas.DataFrame. Accepts any `pandas.read_csv` keyword arguments. This method uses the .csvp [1] response as the default for simplicity, please check ERDDAP's documentation for the other csv options available. [1] Download a ISO-8859-1 .csv file with line 1: name (units). Times are ISO 8601 strings. """ response = kw.pop("response", "csvp") url = self.get_download_url(response=response, **kw) return pd.read_csv(urlopen(url, auth=self.auth), **kw)
def get_data(dataset=None, standard_name=None, constraints=None): print(dataset_id, standard_name, constraints) var = e.get_var_by_attr( dataset_id=dataset, standard_name=lambda v: str(v).lower() == standard_name.lower())[0] download_url = e.get_download_url(dataset_id=dataset, constraints=constraints, variables=['time', var], response='csv') df = pd.read_csv(urlopen(download_url), index_col='time', parse_dates=True, skiprows=[1]) return df, var
def to_iris(self, **kw): """Load the data request into an iris.CubeList. Accepts any `iris.load_raw` keyword arguments. """ import iris url = self.get_download_url(response="ncCF", **kw) data = urlopen(url, auth=self.auth).read() with _tempnc(data) as tmp: cubes = iris.load_raw(tmp, **kw) try: cubes.realise_data() except ValueError: iris.cube.CubeList([cube.data for cube in cubes]) return cubes
def adv_search(e, standard_name, cdm_data_type, min_time, max_time): try: search_url = e.get_search_url(response='csv', cdm_data_type=cdm_data_type.lower(), items_per_page=100000, standard_name=standard_name, min_time=min_time, max_time=max_time) df = pd.read_csv(urlopen(search_url)) except: df = [] if len(var) > 14: v = '{}...'.format(standard_name[:15]) else: v = standard_name figure.title = 'No {} found in this time range. Pick another variable.'.format( v) figure.marks[0].y = 0.0 * figure.marks[0].y return df
def _get_variables(self, dataset_id: OptionalStr = None) -> Dict: if not dataset_id: dataset_id = self.dataset_id if dataset_id is None: raise ValueError( f"You must specify a valid dataset_id, got {dataset_id}") url = self.get_info_url(dataset_id=dataset_id, response="csv") variables = {} _df = pd.read_csv(urlopen(url, auth=self.auth)) self._dataset_id = dataset_id for variable in set(_df["Variable Name"]): attributes = (_df.loc[_df["Variable Name"] == variable, ["Attribute Name", "Value"]].set_index( "Attribute Name").to_dict()["Value"]) variables.update({variable: attributes}) return variables
search_min_time = now.subtract(weeks=2) # In[11]: e = ERDDAP(server=server, protocol='tabledap') # Find all the `standard_name` attributes that exist on this ERDDAP endpoint, using [ERDDAP's "categorize" service](http://www.neracoos.org/erddap/categorize/index.html) # In[12]: url='{}/categorize/standard_name/index.csv'.format(server) df = pd.read_csv(urlopen(url), skiprows=[1, 2]) standard_names = df['Category'].values # Create a dropdown menu widget with all the `standard_name` values found # In[13]: widget_std_names = ipyw.Dropdown(options=standard_names, value=standard_name) # Create a text widget to enter the search minimum time # In[14]:
def alllonlat(e, cdm_data_type, min_time, max_time): url='{}/tabledap/allDatasets.csv?datasetID%2CminLongitude%2CminLatitude&cdm_data_type=%22{}%22&minTime%3C={}&maxTime%3E={}'.format(e.server, cdm_data_type,max_time.to_datetime_string(),min_time.to_datetime_string()) df = pd.read_csv(urlopen(url), skiprows=[1]) return df
zoom = 6 now = pendulum.now(tz='utc') search_max_time = now search_min_time = now.subtract(weeks=2) # In[11]: e = ERDDAP(server=server, protocol='tabledap') # Find all the `standard_name` attributes that exist on this ERDDAP endpoint, using [ERDDAP's "categorize" service](http://www.neracoos.org/erddap/categorize/index.html) # In[12]: url = '{}/categorize/standard_name/index.csv'.format(server) df = pd.read_csv(urlopen(url), skiprows=[1, 2]) standard_names = df['Category'].values # Create a dropdown menu widget with all the `standard_name` values found # In[13]: widget_std_names = ipyw.Dropdown(options=standard_names, value=standard_name) # Create a text widget to enter the search minimum time # In[14]: widget_search_min_time = ipyw.Text(value=search_min_time.to_datetime_string(), description='Search Min', disabled=False)
def alllonlat(e, cdm_data_type, min_time, max_time): url = '{}/tabledap/allDatasets.csv?datasetID%2CminLongitude%2CminLatitude&cdm_data_type=%22{}%22&minTime%3C={}&maxTime%3E={}'.format( e.server, cdm_data_type, max_time.to_datetime_string(), min_time.to_datetime_string()) df = pd.read_csv(urlopen(url), skiprows=[1]) return df
def test_urlopen_raise(): """Assure that urlopen will raise for bad URLs.""" url = "https://developer.mozilla.org/en-US/404" with pytest.raises(HTTPError): urlopen(url)
def test_urlopen(): """Assure that urlopen is always a BytesIO object.""" url = "http://erddap.sensors.ioos.us/erddap/tabledap/" ret = urlopen(url) isinstance(ret, io.BytesIO)