def any_case_match_filters(self, propertyname, word_list): ''' Helper function to return and/or filter from a word list for partial case insensitivity Work-around resolves issues where "matchCase=False" is ignored and keywords with whitespace characters are not found @param propertyname: String denoting CSW property to search @param word_list: List of strings for (partially) case-insensitive "and" search @return: List of FES filters expressing "and" query ''' filter_list = [] for word in word_list: word_variant_set = set( [word, word.upper(), word.lower(), word.title()]) if len(word_variant_set ) == 1: # Use single filter if no "or" required filter_list.append( fes.PropertyIsLike(propertyname=propertyname, literal=word_variant_set.pop(), matchCase=False)) else: filter_list.append( fes.Or([ fes.PropertyIsLike(propertyname=propertyname, literal=re.sub( '(\s)', '_', word_variant), matchCase=False) for word_variant in set([word, word.upper(), word.lower(), word.title()]) ])) return filter_list
def make_filter(config, ): kw = { "wildCard": "*", "escapeChar": "\\", "singleChar": "?", "propertyname": "apiso:Subject", } if len(config["cf_names"]) > 1: or_filt = fes.Or([ fes.PropertyIsLike(literal=("*%s*" % val), **kw) for val in config["cf_names"] ]) else: or_filt = fes.PropertyIsLike(literal=("*%s*" % config["cf_names"][0]), **kw) kw.update({"propertyname": "apiso:AnyText"}) not_filt = fes.Not([fes.PropertyIsLike(literal="*cdip*", **kw)]) begin, end = fes_date_filter(config["date"]["start"], config["date"]["stop"]) bbox_crs = fes.BBox(config["region"]["bbox"], crs=config["region"]["crs"]) filter_list = [fes.And([bbox_crs, begin, end, or_filt, not_filt])] return filter_list
def make_fes_filter(self): ''' Generates the filter for querying the IOOS database ''' begin, end = fes_date_filter(self.start, self.stop) kw = dict(wildCard='*', escapeChar='\\', singleChar='?', propertyname='apiso:AnyText') if len(self.target) > 1: prop_filt = fes.Or([ fes.PropertyIsLike(literal=('*%s*' % val), **kw) for val in self.target ]) else: prop_filt = fes.PropertyIsLike(literal=(self.target[0]), **kw) if self.models == True: self.filter_list = [ fes.And([ self.bbox_crs, begin, end, prop_filt, fes.PropertyIsLike(literal=('*%s*' % 'forecast'), **kw), fes.Not([fes.PropertyIsLike(literal='*cdip', **kw)]), fes.Not([fes.PropertyIsLike(literal='*grib*', **kw)]) ]) ] else: self.filter_list = [ fes.And([ self.bbox_crs, begin, end, prop_filt, fes.Not([fes.PropertyIsLike(literal='*cdip', **kw)]), fes.Not([fes.PropertyIsLike(literal='*grib*', **kw)]) ]) ]
def csw_query(endpoint, bbox=None, start=None, stop=None, kw_names=None, crs="urn:ogc:def:crs:OGC:1.3:CRS84"): crs = "urn:ogc:def:crs:::EPSG:4326" #https://github.com/qgis/QGIS/issues/40778 constraints = [] csw = None while csw is None: try: csw = CatalogueServiceWeb(endpoint, timeout=60) #csw.getrecords2(maxrecords=10) #for rec in csw.records: # print(vars(csw.records[rec])) # print(csw.records[rec].title) except: pass if kw_names: kw = dict(wildCard="*", escapeChar="\\", singleChar="?", propertyname="apiso:AnyText") or_filt = fes.Or([ fes.PropertyIsLike(literal=("*%s*" % val), **kw) for val in kw_names ]) constraints.append(or_filt) if all(v is not None for v in [start, stop]): begin, end = fes_date_filter(start, stop) constraints.append(begin) constraints.append(end) if bbox: bbox_crs = fes.BBox(bbox, crs=crs) constraints.append(bbox_crs) if len(constraints) >= 2: filter_list = [fes.And(constraints)] else: filter_list = constraints get_csw_records(csw, filter_list, pagesize=10, maxrecords=10) print("Found {} records.\n".format(len(csw.records.keys()))) for key, value in list(csw.records.items()): print(u"Title: [{}]\nID: {}\n".format(value.title, key)) msg = "geolink: {geolink}\nscheme: {scheme}\nURL: {url}\n".format for ref in value.references: print(msg(geolink=sniff_link(ref["url"]), **ref)) print("#########################################################", '\n')
def make_filter(config): from owslib import fes from ioos_tools.ioos import fes_date_filter kw = dict( wildCard="*", escapeChar="\\", singleChar="?", propertyname="apiso:AnyText" ) or_filt = fes.Or( [fes.PropertyIsLike(literal=("*%s*" % val), **kw) for val in config["cf_names"]] ) not_filt = fes.Not([fes.PropertyIsLike(literal="GRIB-2", **kw)]) begin, end = fes_date_filter(config["date"]["start"], config["date"]["stop"]) bbox_crs = fes.BBox(config["region"]["bbox"], crs=config["region"]["crs"]) filter_list = [fes.And([bbox_crs, begin, end, or_filt, not_filt])] return filter_list
def retrieve(self, variable=None, dataframe=False, count=False, bbox=None, or_filter=False, *searchArgs, **kwargs): ''' Retrieve data from WFS service ''' if not self._valid: raise Exception(self._status) if not variable: variable = self._client.items()[0][0] query_params = dict(typename=variable, propertyname=None) # construct query from searchArgs if count: query_params['resulttype'] = 'hits' if bbox: query_params['bbox'] = bbox #query_params.update(kwargs) if len(kwargs) > 0: import owslib.fes as fes from owslib.etree import etree filter_components = [ fes.PropertyIsLike(propertyname=k, literal='%s' % (str(v))) for k, v in kwargs.items() ] if len(filter_components) == 1: query_filter = filter_components[0] elif or_filter: query_filter = fes.Or(filter_components) else: query_filter = fes.And(filter_components) filterxml = etree.tostring(query_filter.toXML()).decode('utf-8') filterxml = '<Filter>%s</Filter>' % filterxml query_params['filter'] = filterxml filter_result = False try: results = self._client.getfeature(**query_params) except: if 'filter' in query_params: query_params.pop('filter') filter_result = True results = self._client.getfeature(**query_params) else: raise feature_text = results.read() if not isinstance(feature_text, str): feature_text = feature_text.decode('utf-8') tmp_fn = 'tmp.gml' open(tmp_fn, 'w').write(feature_text) from osgeo import ogr ds = ogr.Open(tmp_fn) # remove tmp file layer = ds.GetLayer() features = [ layer.GetNextFeature() for i in range(layer.GetFeatureCount()) ] if filter_result: features = self.filter_features(features, or_filter, kwargs) from shapely.wkt import loads for f in features: geom = f.geometry() wkt = geom.ExportToWkt() try: f.geom = loads(wkt) except: f.geom = loads(wkt.replace('MULTISURFACE', 'MULTIPOLYGON')) if dataframe: from geopandas import GeoDataFrame feature_dicts = [ dict([('geometry', f.geom)] + [(p, f[p]) for p in f.keys()]) for f in features ] return GeoDataFrame(feature_dicts) return features
# <markdowncell> # ##### Setup BCSW Filters to find models in the area of the Important Bird Polygon # <codecell> from owslib import fes # Polygon filters polygon_filters = [] for s in shapes: f = fes.BBox(bbox=list(reversed(s.bounds))) polygon_filters.append(f) # If we have more than one polygon filter, OR them together if len(polygon_filters) > 1: polygon_filters = fes.Or(polygon_filters) elif len(polygon_filters) == 1: polygon_filters = polygon_filters[0] # Name filters name_filters = [] model_strings = [ 'roms', 'selfe', 'adcirc', 'ncom', 'hycom', 'fvcom', 'wrf', 'wrams' ] for model in model_strings: title_filter = fes.PropertyIsLike(propertyname='apiso:Title', literal='*%s*' % model, wildCard='*') name_filters.append(title_filter) subject_filter = fes.PropertyIsLike(propertyname='apiso:Subject', literal='*%s*' % model,
# In[ ]: # trying to do this search: # ('roms' OR 'selfe' OR 'adcirc' OR 'ncom' OR 'hycom' OR 'fvcom') AND 'ocean' NOT 'regridded' NOT 'espresso' # should return 11 records from NODC geoportal search_text = ['waves', 'selfe', 'adcirc', 'ncom', 'hycom', 'fvcom'] filt = [] for val in search_text: filt.append( fes.PropertyIsLike(propertyname='apiso:AnyText', literal=('*%s*' % val), escapeChar='\\', wildCard='*', singleChar='?')) filter1 = fes.Or(filt) val = 'ocean' filter2 = fes.PropertyIsLike(propertyname='apiso:AnyText', literal=('*%s*' % val), escapeChar='\\', wildCard='*', singleChar='?') val = 'regridded' filt = fes.PropertyIsLike(propertyname='apiso:AnyText', literal=('*%s*' % val), escapeChar='\\', wildCard='*', singleChar='?') filter3 = fes.Not([filt])
# ### Searching for models via CSW 'keyword' # <markdowncell> # #### Construct CSW Filters # <codecell> from owslib import fes model_name_filters = [] for model in known_model_strings: title_filter = fes.PropertyIsLike(propertyname='apiso:Title', literal='*%s*' % model, wildCard='*') subject_filter = fes.PropertyIsLike(propertyname='apiso:Subject', literal='*%s*' % model, wildCard='*') model_name_filters.append(fes.Or([title_filter, subject_filter])) # <markdowncell> # #### Query each CSW catalog for revery model_name_filter constructed above # <codecell> from owslib.csw import CatalogueServiceWeb model_results = [] for x in range(len(model_name_filters)): model_name = known_model_strings[x] single_model_filter = model_name_filters[x] for url in known_csw_servers:
# <codecell> endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw' # NGDC Geoportal csw = CatalogueServiceWeb(endpoint,timeout=60) # <codecell> # convert User Input into FES filters start, stop = fes_date_filter(start_date,end_date) bbox = fes.BBox(bounding_box) #use the search name to create search filter or_filt = fes.Or([fes.PropertyIsLike(propertyname='apiso:AnyText', literal='*%s*' % val, escapeChar='\\', wildCard='*', singleChar='?') for val in data_dict["temp"]["names"]]) # try request using multiple filters "and" syntax: [[filter1,filter2]] filter_list = [fes.And([ bbox, start, stop, or_filt]) ] csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full') print str(len(csw.records)) + " csw records found" # <markdowncell> # #### Dap URLs # <codecell>
# <codecell> endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw' # NGDC Geoportal csw = CatalogueServiceWeb(endpoint, timeout=60) # <codecell> # convert User Input into FES filters start, stop = date_range(start_date, stop_date) bbox = fes.BBox(bounding_box) #use the search name to create search filter or_filt = fes.Or([ fes.PropertyIsLike(propertyname='apiso:AnyText', literal=('*%s*' % val), escapeChar='\\', wildCard='*', singleChar='?') for val in data_dict['currents']['names'] ]) val = 'Averages' not_filt = fes.Not([ fes.PropertyIsLike(propertyname='apiso:AnyText', literal=('*%s*' % val), escapeChar='\\', wildCard='*', singleChar='?') ]) filter_list = [fes.And([bbox, start, stop, or_filt, not_filt])] # connect to CSW, explore it's properties # try request using multiple filters "and" syntax: [[filter1,filter2]]
# #### Search the catologue using the FES filters # <codecell> # Convert User Input into FES filters. start, stop = fes_date_filter(start_date, stop_date) bbox = fes.BBox(bounding_box) # Use the search name to create search filter. kw = dict(propertyname='apiso:AnyText', escapeChar='\\', wildCard='*', singleChar='?') or_filt = fes.Or([ fes.PropertyIsLike(literal='*%s*' % val, **kw) for val in data_dict['winds']['u_names'] ]) val = 'Averages' not_filt = fes.Not([fes.PropertyIsLike(literal=('*%s*' % val), **kw)]) filter_list = [fes.And([bbox, start, stop, or_filt, not_filt])] csw.getrecords2(constraints=filter_list, maxrecords=1000, esn='full') print("%s csw records found" % len(csw.records)) # <markdowncell> # #### DAP endpoints # <codecell>
literal=stop) else: raise NameError('Unrecognized constraint {}'.format(constraint)) return begin, end # In[5]: from owslib import fes kw = dict(wildCard='*', escapeChar='\\', singleChar='?', propertyname='apiso:AnyText') or_filt = fes.Or( [fes.PropertyIsLike(literal=('*%s*' % val), **kw) for val in name_list]) # Exculde ROMS Averages and History files. not_filt = fes.Not([fes.PropertyIsLike(literal='*Averages*', **kw)]) begin, end = fes_date_filter(start, stop) filter_list = [fes.And([fes.BBox(bbox), begin, end, or_filt, not_filt])] # In[6]: from owslib.csw import CatalogueServiceWeb endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw' csw = CatalogueServiceWeb(endpoint, timeout=60) csw.getrecords2(constraints=filter_list, maxrecords=1000, esn='full')
propertyname=propertyname, literal=start ) elif constraint == "within": propertyname = "apiso:TempExtent_begin" begin = fes.PropertyIsGreaterThanOrEqualTo( propertyname=propertyname, literal=start ) propertyname = "apiso:TempExtent_end" end = fes.PropertyIsLessThanOrEqualTo(propertyname=propertyname, literal=stop) else: raise NameError("Unrecognized constraint {}".format(constraint)) return begin, end kw = dict(wildCard="*", escapeChar="\\", singleChar="?", propertyname="apiso:AnyText") or_filt = fes.Or([fes.PropertyIsLike(literal=("*%s*" % val), **kw) for val in cf_names]) begin, end = fes_date_filter(start, stop) bbox_crs = fes.BBox(bbox, crs=crs) filter_list = [ fes.And( [ bbox_crs, # bounding box begin, end, # start and end date or_filt, # or conditions (CF variable names) ] ) ]
def query_csw(self, identifier_list=None, alt_identifier_list=None, keyword_list=None, bounding_box=None, bounding_box_crs=None, anytext_list=None, titleword_list=None, start_datetime=None, stop_datetime=None, record_type_list=None, max_total_records=None, get_layers=None): ''' Function to query CSW using AND combination of provided search parameters and return generator object yielding nested dicts containing information about each record including distributions @param identifier_list: List of strings or comma-separated string containing metadata identifiers (UUID) @param alt_identifier: List of strings or comma-separated string containing metadata alternate identifiers (eCat ID) @param keyword_list: List of strings or comma-separated string containing keyword search terms @param bounding_box: Bounding box to search as a list of ordinates [bbox.minx, bbox.minx, bbox.maxx, bbox.maxy] @param bounding_box_crs: Coordinate reference system for bounding box. Defaults to value of self.settings['DEFAULT_CRS'] @param anytext_list: List of strings or comma-separated string containing any text search terms @param titleword: List of strings or comma-separated string containing title search terms @param start_datetime: Datetime object defining start of temporal search period @param stop_datetime: Datetime object defining end of temporal search period @param record_type_list: List of strings or comma-separated string containing record type(s) to return @param max_total_records: Maximum total number of records to return. Defaults to value of self.settings['DEFAULT_MAXTOTALRECORDS'] @param get_layers: Boolean flag indicating whether to get WMS/WCS layer names. Defaults to value of self.settings['DEFAULT_GET_LAYERS'] @return: generator object yielding nested dicts containing information about each record including distributions ''' bounding_box_crs = bounding_box_crs or self.settings['DEFAULT_CRS'] get_layers = self.settings[ 'DEFAULT_GET_LAYERS'] if get_layers is None else get_layers # Convert strings to lists if required if type(identifier_list) == str: identifier_list = self.list_from_comma_separated_string( identifier_list) if type(alt_identifier_list) == str: alt_identifier_list = self.list_from_comma_separated_string( alt_identifier_list) if type(keyword_list) == str: keyword_list = self.list_from_comma_separated_string(keyword_list) if type(anytext_list) == str: anytext_list = self.list_from_comma_separated_string(anytext_list) if type(titleword_list) == str: titleword_list = self.list_from_comma_separated_string( titleword_list) record_type_list = record_type_list or self.settings[ 'DEFAULT_RECORD_TYPES'] if type(record_type_list) == str: record_type_list = self.list_from_comma_separated_string( record_type_list) # Build filter list fes_filter_list = [] if identifier_list: if len(identifier_list) == 1: fes_filter_list += [ fes.PropertyIsLike(propertyname='Identifier', literal=identifier_list[0], matchCase=False) ] else: fes_filter_list.append( fes.Or([ fes.PropertyIsLike(propertyname='Identifier', literal=identifier, matchCase=False) for identifier in identifier_list ])) if alt_identifier_list: if len(alt_identifier_list) == 1: fes_filter_list += [ fes.PropertyIsLike(propertyname='AlternateIdentifier', literal=alt_identifier_list[0], matchCase=False) ] else: fes_filter_list.append( fes.Or([ fes.PropertyIsLike(propertyname='AlternateIdentifier', literal=alt_identifier, matchCase=False) for alt_identifier in alt_identifier_list ])) # Check for unchanged, upper-case, lower-case and capitalised keywords # with single-character wildcards substituted for whitespace characters # GeoNetwork keyword search is always case sensitive if keyword_list: fes_filter_list += self.any_case_match_filters( 'Subject', keyword_list) if anytext_list: fes_filter_list += [ fes.PropertyIsLike(propertyname='anyText', literal=phrase, matchCase=False) for phrase in anytext_list ] if start_datetime or stop_datetime: fes_filter_list += self.get_date_filter(start_datetime, stop_datetime) if titleword_list: fes_filter_list += [ fes.PropertyIsLike(propertyname='title', literal=titleword, matchCase=False) for titleword in titleword_list ] # Check for unchanged, upper-case, lower-case and capitalised keywords # with single-character wildcards substituted for whitespace characters # GeoNetwork type search is always case sensitive if record_type_list: fes_filter_list += self.any_case_match_filters( 'type', record_type_list) if bounding_box: # N.B: Bounding box ordinate ordering must match CRS. Default CRS84 supports lon-lat ordering, not lat-lon # See https://gis.stackexchange.com/questions/124050/how-do-i-specify-the-lon-lat-ordering-in-csw-bounding-box-request fes_filter_list += [fes.BBox(bounding_box, crs=bounding_box_crs)] assert fes_filter_list, 'No search criteria defined' # Use single filter if no "and" required if len(fes_filter_list) == 1: fes_filter_list = fes_filter_list[0] # Return generator object return self.get_csw_records(fes_filter_list, max_total_records=max_total_records, get_layers=get_layers)
jd_start = dt.datetime.strptime(start_date,'%Y-%m-%d %H:%M') jd_stop = dt.datetime.strptime(stop_date,'%Y-%m-%d %H:%M') print start_date,'to',stop_date sos_name = 'water_surface_height_above_reference_datum' # <codecell> # convert User Input into FES filters start,stop = dateRange(start_date,stop_date) bbox = fes.BBox(box) or_filt = fes.Or([fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val), escapeChar='\\',wildCard='*',singleChar='?') for val in model_name_list]) val = 'Averages' not_filt = fes.Not([fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val), escapeChar='\\',wildCard='*',singleChar='?')]) filter_list = [fes.And([ bbox, start, stop, or_filt, not_filt]) ] # <markdowncell> # ##Find model results at NODC # <codecell>
if constraint == 'overlaps': start = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=start_date) stop = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=stop_date) elif constraint == 'within': start = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=start_date) stop = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=stop_date) return fes.And([start, stop]) # <codecell> # Standard Name filters cf_name_filters = [] for cf_name in variables_to_query: text_filter = fes.PropertyIsLike(propertyname='apiso:AnyText', literal="*%s*" % cf_name, wildCard='*') cf_name_filters.append(text_filter) cf_name_filters = fes.Or(cf_name_filters) # Geographic filters geographic_filter = fes.BBox(bbox=bounding_box) # Temporal filters temporal_filter = fes_date_filter(start_date_string, end_date_string) filters = fes.And([cf_name_filters, geographic_filter, temporal_filter]) # <markdowncell> # ##### The actual CSW filter POST envelope looks like this # <codecell>
# With these 3 elements it is possible to assemble a [OGC Filter Encoding (FE)](http://www.opengeospatial.org/standards/filter) using the `owslib.fes`\* module. # # \* OWSLib is a Python package for client programming with Open Geospatial Consortium (OGC) web service (hence OWS) interface standards, and their related content models. # In[8]: from owslib import fes from ioos_tools.ioos import fes_date_filter kw = dict(wildCard='*', escapeChar='\\', singleChar='?', propertyname='apiso:AnyText') or_filt = fes.Or( [fes.PropertyIsLike(literal=('*%s*' % val), **kw) for val in model_names]) kw = dict(wildCard='*', escapeChar='\\', singleChar='?', propertyname='apiso:ServiceType') serviceType = fes.PropertyIsLike(literal=('*%s*' % service_type), **kw) begin, end = fes_date_filter(start, stop) bbox_crs = fes.BBox(bbox, crs=crs) filter_list = [ fes.And([ bbox_crs, # bounding box begin,
# Search # <codecell> # Convert User Input into FES filters. start, stop = fes_date_filter(start_date, stop_date) bbox = fes.BBox(bounding_box) # Use the search name to create search filter. kw = dict(propertyname='apiso:AnyText', escapeChar='\\', wildCard='*', singleChar='?') or_filt = fes.Or([ fes.PropertyIsLike(literal=('*%s*' % val), **kw) for val in data_dict['currents']['names'] ]) val = 'Averages' not_filt = fes.Not([fes.PropertyIsLike(literal=('*%s*' % val), **kw)]) filter_list = [fes.And([bbox, start, stop, or_filt, not_filt])] # Connect to CSW, explore it's properties # try request using multiple filters "and" syntax: [[filter1, filter2]] csw.getrecords2(constraints=filter_list, maxrecords=1000, esn='full') print("%s csw records found" % len(csw.records)) for rec, item in csw.records.items(): print(item.title) # <markdowncell>