def test_search_url_bad_request(e): """Test if a bad request returns HTTPError.""" kw = { "min_time": "1700-01-01T12:00:00Z", "max_time": "1750-01-01T12:00:00Z", } with pytest.raises(HTTPError): _check_url_response(e.get_search_url(**kw))
def test__check_url_response(): """Test if a bad request returns HTTPError.""" bad_request = ('http://erddap.sensors.ioos.us/erddap/tabledap/' 'gov_usgs_waterdata_340800117235901.htmlTable?' 'time,' '&time>=2017-08-29T00:00:00Z' '&time<=2015-09-05T19:00:00Z') with pytest.raises(HTTPError): _check_url_response(bad_request)
def __init__(self, server_url): if server_url in servers.keys(): server_url = servers[server_url].url _check_url_response(server_url) self.server_url = server_url self.search_options = {} self.download_options = {} # Caching the last `dataset_id` request for quicker multiple accesses, # will be overridden when requesting a new `dataset_id`. self._dataset_id = None self._variables = {}
def get_opendap_url(self, dataset_id, protocol='tabledap'): """Compose the opendap URL for the `server_url` the endpoint. Args: dataset_id (str): a dataset unique id. Returns: download_url (str): the download URL for the `response` chosen. """ base = '{server_url}/{protocol}/{dataset_id}'.format opendap_url = base(server_url=self.server_url, protocol=protocol, dataset_id=dataset_id) _check_url_response(opendap_url) return opendap_url
def info_url(server, dataset_id, response='html'): """The info URL for the `server` endpoint. Args: dataset_id (str): a dataset unique id. response (str): default is HTML. Returns: url (str): the info URL for the `response` chosen. """ url = f'{server}/info/{dataset_id}/index.{response}' return _check_url_response(url)
def download_url(server, dataset_id, protocol, variables, response='html', constraints=None): """The download URL for the `server` endpoint. Args: dataset_id (str): a dataset unique id. protocol (str): tabledap or griddap. variables (list/tuple): a list of the variables to download. response (str): default is HTML. constraints (dict): download constraints, default None (opendap-like url) example: constraints = {'latitude<=': 41.0, 'latitude>=': 38.0, 'longitude<=': -69.0, 'longitude>=': -72.0, 'time<=': '2017-02-10T00:00:00+00:00', 'time>=': '2016-07-10T00:00:00+00:00', } Returns: url (str): the download URL for the `response` chosen. """ base = '{server_url}/{protocol}/{dataset_id}' if not constraints: url = base.format( server_url=server, protocol=protocol, dataset_id=dataset_id ) else: base += '.{response}?{variables}{constraints}' _constraints = copy.copy(constraints) for k, v in _constraints.items(): if k.startswith('time'): _constraints.update({k: parse_dates(v)}) _constraints = quote_string_constraints(_constraints) _constraints = ''.join(['&{}{}'.format(k, v) for k, v in _constraints.items()]) variables = ','.join(variables) url = base.format( server_url=server, protocol=protocol, dataset_id=dataset_id, response=response, variables=variables, constraints=_constraints ) return _check_url_response(url)
def get_download_url(self, dataset_id, variables, response='csv', protocol='tabledap', **kwargs): """Compose the download URL for the `server_url` endpoint. Args: dataset_id (str): a dataset unique id. variables (list/tuple): a list of the variables to download. response (str): default is a Comma Separated Value ('csv'). See ERDDAP docs for all the options, - tabledap: http://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html - griddap: http://coastwatch.pfeg.noaa.gov/erddap/griddap/documentation.html Returns: download_url (str): the download URL for the `response` chosen. """ self.download_options.update(kwargs) variables = ','.join(variables) base = ('{server_url}/{protocol}/{dataset_id}.{response}' '?{variables}' '{kwargs}').format kwargs = ''.join( ['&{}{}'.format(k, v) for k, v in self.download_options.items()]) download_url = base(server_url=self.server_url, protocol=protocol, dataset_id=dataset_id, response=response, variables=variables, kwargs=kwargs) _check_url_response(download_url) return download_url
def get_info_url(self, dataset_id, response='csv'): """Compose the info URL for the `server_url` endpoint. Args: dataset_id (str): a dataset unique id. response (str): default is a Comma Separated Value ('csv'). See ERDDAP docs for all the options, - tabledap: http://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html - griddap: http://coastwatch.pfeg.noaa.gov/erddap/griddap/documentation.html Returns: info_url (str): the info URL for the `response` chosen. """ response = _clean_response(response) base = '{server_url}/info/{dataset_id}/index.{response}'.format info_options = { 'server_url': self.server_url, 'dataset_id': dataset_id, 'response': response } info_url = base(**info_options) _check_url_response(info_url) return info_url
def __init__(self, server, protocol=None, response="html"): if server in servers.keys(): server = servers[server].url self.server = _check_url_response(server) self.protocol = protocol # Initialized only via properties. self.constraints = None self.dataset_id = None self.params = None self.requests_kwargs = {} self.response = response self.variables = "" # Caching the last `dataset_id` and `variables` list request for quicker multiple accesses, # will be overridden when requesting a new `dataset_id`. self._dataset_id = None self._variables = {}
def get_categorize_url(self, categorize_by, value=None, response=None): """The categorize URL for the `server` endpoint. Args: categorize_by (str): a valid attribute, e.g.: ioos_category or standard_name. value (str): an attribute value. response (str): default is HTML. Returns: url (str): the categorized URL for the `response` chosen. """ response = response if response else self.response if value: url = f"{self.server}/categorize/{categorize_by}/{value}/index.{response}" else: url = f"{self.server}/categorize/{categorize_by}/index.{response}" return _check_url_response(url)
def get_info_url(self, dataset_id=None, response=None): """The info URL for the `server` endpoint. Args: dataset_id (str): a dataset unique id. response (str): default is HTML. Returns: url (str): the info URL for the `response` chosen. """ dataset_id = dataset_id if dataset_id else self.dataset_id response = response if response else self.response if not dataset_id: raise ValueError( f"You must specify a valid dataset_id, got {dataset_id}") url = f"{self.server}/info/{dataset_id}/index.{response}" return _check_url_response(url)
def __init__(self, server, dataset_id=None, protocol=None, variables='', response='html', constraints=None, params=None, requests_kwargs=None): if server in servers.keys(): server = servers[server].url self.server = _check_url_response(server) self.dataset_id = dataset_id self.protocol = protocol self.variables = variables self.response = response self.constraints = constraints self.params = params self.requests_kwargs = requests_kwargs if requests_kwargs else {} # Caching the last `dataset_id` request for quicker multiple accesses, # will be overridden when requesting a new `dataset_id`. self._dataset_id = None self._variables = {}
def get_download_url( self, dataset_id=None, protocol=None, variables=None, response=None, constraints=None, ): """The download URL for the `server` endpoint. Args: dataset_id (str): a dataset unique id. protocol (str): tabledap or griddap. variables (list/tuple): a list of the variables to download. response (str): default is HTML. constraints (dict): download constraints, default None (opendap-like url) example: constraints = {'latitude<=': 41.0, 'latitude>=': 38.0, 'longitude<=': -69.0, 'longitude>=': -72.0, 'time<=': '2017-02-10T00:00:00+00:00', 'time>=': '2016-07-10T00:00:00+00:00',} Returns: url (str): the download URL for the `response` chosen. """ dataset_id = dataset_id if dataset_id else self.dataset_id protocol = protocol if protocol else self.protocol variables = variables if variables else self.variables response = response if response else self.response constraints = constraints if constraints else self.constraints if not dataset_id: raise ValueError( f"Please specify a valid `dataset_id`, got {dataset_id}") if not protocol: raise ValueError( f"Please specify a valid `protocol`, got {protocol}") # This is an unconstrained OPeNDAP response b/c # the integer based constrained version is just not worth supporting ;-p if response == "opendap": return f"{self.server}/{protocol}/{dataset_id}" else: url = f"{self.server}/{protocol}/{dataset_id}.{response}?" if variables: variables = ",".join(variables) url += f"{variables}" if constraints: _constraints = copy.copy(constraints) for k, v in _constraints.items(): if k.startswith("time"): _constraints.update({k: parse_dates(v)}) _constraints = quote_string_constraints(_constraints) _constraints = "".join( [f"&{k}{v}" for k, v in _constraints.items()]) url += f"{_constraints}" return _check_url_response(url)
def get_search_url( self, response=None, search_for=None, items_per_page=1000, page=1, **kwargs, ): """The search URL for the `server` endpoint provided. Args: search_for (str): "Google-like" search of the datasets' metadata. - Type the words you want to search for, with spaces between the words. ERDDAP will search for the words separately, not as a phrase. - To search for a phrase, put double quotes around the phrase (for example, `"wind speed"`). - To exclude datasets with a specific word, use `-excludedWord`. - To exclude datasets with a specific phrase, use `-"excluded phrase"` - Searches are not case-sensitive. - To find just grid or just table datasets, include `protocol=griddap` or `protocol=tabledap` in your search. - You can search for any part of a word. For example, searching for `spee` will find datasets with `speed` and datasets with `WindSpeed` - The last word in a phrase may be a partial word. For example, to find datasets from a specific website (usually the start of the datasetID), include (for example) `"datasetID=erd"` in your search. response (str): default is HTML. items_per_page (int): how many items per page in the return, default is 1000. page (int): which page to display, default is the first page (1). kwargs (dict): extra search constraints based on metadata and/or coordinates ke/value. metadata: `cdm_data_type`, `institution`, `ioos_category`, `keywords`, `long_name`, `standard_name`, and `variableName`. coordinates: `minLon`, `maxLon`, `minLat`, `maxLat`, `minTime`, and `maxTime`. Returns: url (str): the search URL. """ base = ("{server}/search/advanced.{response}" "?page={page}" "&itemsPerPage={itemsPerPage}" "&protocol={protocol}" "&cdm_data_type={cdm_data_type}" "&institution={institution}" "&ioos_category={ioos_category}" "&keywords={keywords}" "&long_name={long_name}" "&standard_name={standard_name}" "&variableName={variableName}" "&minLon={minLon}" "&maxLon={maxLon}" "&minLat={minLat}" "&maxLat={maxLat}" "&minTime={minTime}" "&maxTime={maxTime}") if search_for: search_for = quote_plus(search_for) base += "&searchFor={searchFor}" # Convert dates from datetime to `seconds since 1970-01-01T00:00:00Z`. min_time = kwargs.pop("min_time", None) max_time = kwargs.pop("max_time", None) if min_time: kwargs.update({"min_time": parse_dates(min_time)}) if max_time: kwargs.update({"max_time": parse_dates(max_time)}) default = "(ANY)" response = response if response else self.response url = base.format( server=self.server, response=response, page=page, itemsPerPage=items_per_page, protocol=kwargs.get("protocol", default), cdm_data_type=kwargs.get("cdm_data_type", default), institution=kwargs.get("institution", default), ioos_category=kwargs.get("ioos_category", default), keywords=kwargs.get("keywords", default), long_name=kwargs.get("long_name", default), standard_name=kwargs.get("standard_name", default), variableName=kwargs.get("variableName", default), minLon=kwargs.get("min_lon", default), maxLon=kwargs.get("max_lon", default), minLat=kwargs.get("min_lat", default), maxLat=kwargs.get("max_lat", default), minTime=kwargs.get("min_time", default), maxTime=kwargs.get("max_time", default), searchFor=search_for, ) return _check_url_response(url)
def test_servers(): for server in servers.values(): # Should raise HTTPError if broken, otherwise returns the URL. _check_url_response(server.url) == server.url
def get_search_url(self, response='csv', search_for=None, items_per_page=1000, page=1, **kwargs): """Compose the search URL for the `server_url` endpoint provided. Args: search_for (str): "Google-like" search of the datasets' metadata. - Type the words you want to search for, with spaces between the words. ERDDAP will search for the words separately, not as a phrase. - To search for a phrase, put double quotes around the phrase (for example, `"wind speed"`). - To exclude datasets with a specific word, use `-excludedWord`. - To exclude datasets with a specific phrase, use `-"excluded phrase"` - Searches are not case-sensitive. - To find just grid or just table datasets, include `protocol=griddap` or `protocol=tabledap` in your search. - You can search for any part of a word. For example, searching for `spee` will find datasets with `speed` and datasets with `WindSpeed` - The last word in a phrase may be a partial word. For example, to find datasets from a specific website (usually the start of the datasetID), include (for example) `"datasetID=erd"` in your search. response (str): default is a Comma Separated Value ('csv'). See ERDDAP docs for all the options, - tabledap: http://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html - griddap: http://coastwatch.pfeg.noaa.gov/erddap/griddap/documentation.html items_per_page (int): how many items per page in the return, default is 1000. page (int): which page to display, defatul is the first page (1). kwargs (dict): extra search constraints based on metadata and/or coordinates ke/value. metadata: `cdm_data_type`, `institution`, `ioos_category`, `keywords`, `long_name`, `standard_name`, and `variableName`. coordinates: `minLon`, `maxLon`, `minLat`, `maxLat`, `minTime`, and `maxTime`. Returns: search_url (str): the search URL for the `response` chosen. """ base = ('{server_url}/search/advanced.{response}' '?page={page}' '&itemsPerPage={itemsPerPage}' '&protocol={protocol}' '&cdm_data_type={cdm_data_type}' '&institution={institution}' '&ioos_category={ioos_category}' '&keywords={keywords}' '&long_name={long_name}' '&standard_name={standard_name}' '&variableName={variableName}' '&minLon={minLon}' '&maxLon={maxLon}' '&minLat={minLat}' '&maxLat={maxLat}' '&minTime={minTime}' '&maxTime={maxTime}') if search_for: search_for = quote_plus(search_for) base += '&searchFor={searchFor}' # Convert dates from datetime to `seconds since 1970-01-01T00:00:00Z`. min_time = kwargs.pop('min_time', None) max_time = kwargs.pop('max_time', None) if min_time: kwargs.update({'min_time': parse_dates(min_time)}) if max_time: kwargs.update({'max_time': parse_dates(max_time)}) default = '(ANY)' response = _clean_response(response) search_options = { 'server_url': self.server_url, 'response': response, 'page': page, 'itemsPerPage': items_per_page, 'protocol': kwargs.get('protocol', default), 'cdm_data_type': kwargs.get('cdm_data_type', default), 'institution': kwargs.get('institution', default), 'ioos_category': kwargs.get('ioos_category', default), 'keywords': kwargs.get('keywords', default), 'long_name': kwargs.get('long_name', default), 'standard_name': kwargs.get('standard_name', default), 'variableName': kwargs.get('variableName', default), 'minLon': kwargs.get('min_lon', default), 'maxLon': kwargs.get('max_lon', default), 'minLat': kwargs.get('min_lat', default), 'maxLat': kwargs.get('max_lat', default), 'minTime': kwargs.get('min_time', default), 'maxTime': kwargs.get('max_time', default), 'searchFor': search_for, } self.search_options.update(search_options) search_url = base.format(**search_options) _check_url_response(search_url) return search_url