def parse_remote_metadata(self, timeout=30): """Parse remote metadata for MetadataURL and add it as metadataUrl['metadata']""" for metadataUrl in self.metadataUrls: if metadataUrl["url"] is not None: try: content = openURL(metadataUrl["url"], timeout=timeout, headers=self.headers, auth=self.auth) doc = etree.fromstring(content.read()) mdelem = doc.find(".//metadata") if mdelem is not None: metadataUrl["metadata"] = Metadata(mdelem) continue mdelem = doc.find(".//" + util.nspath_eval( "gmd:MD_Metadata", n.get_namespaces( ["gmd"]))) or doc.find(".//" + util.nspath_eval( "gmi:MI_Metadata", n.get_namespaces(["gmi"]))) if mdelem is not None: metadataUrl["metadata"] = MD_Metadata(mdelem) continue except Exception: metadataUrl["metadata"] = None
def _parserecords(self, outputschema, esn): if outputschema == namespaces['gmd']: # iso 19139 for i in self._exml.findall('.//' + util.nspath_eval( 'gmd:MD_Metadata', namespaces)) or self._exml.findall( './/' + util.nspath_eval('gmi:MI_Metadata', namespaces)): val = i.find( util.nspath_eval('gmd:fileIdentifier/gco:CharacterString', namespaces)) identifier = self._setidentifierkey(util.testXMLValue(val)) self.records[identifier] = MD_Metadata(i) elif outputschema == namespaces['fgdc']: # fgdc csdgm for i in self._exml.findall('.//metadata'): val = i.find('idinfo/datasetid') identifier = self._setidentifierkey(util.testXMLValue(val)) self.records[identifier] = Metadata(i) elif outputschema == namespaces['dif']: # nasa dif for i in self._exml.findall( './/' + util.nspath_eval('dif:DIF', namespaces)): val = i.find(util.nspath_eval('dif:Entry_ID', namespaces)) identifier = self._setidentifierkey(util.testXMLValue(val)) self.records[identifier] = DIF(i) elif outputschema == namespaces['gm03']: # GM03 for i in self._exml.findall( './/' + util.nspath_eval('gm03:TRANSFER', namespaces)): val = i.find( util.nspath_eval('gm03:fileIdentifier', namespaces)) identifier = self._setidentifierkey(util.testXMLValue(val)) self.records[identifier] = GM03(i) else: # process default for i in self._exml.findall('.//' + util.nspath_eval( 'csw:%s' % self._setesnel(esn), namespaces)): val = i.find(util.nspath_eval('dc:identifier', namespaces)) identifier = self._setidentifierkey(util.testXMLValue(val)) self.records[identifier] = CswRecord(i)
def parse_remote_metadata(self, timeout=30): """Parse remote metadata for MetadataURL of format 'text/xml' and add it as metadataUrl['metadata']""" for metadataUrl in self.metadataUrls: if metadataUrl['url'] is not None \ and metadataUrl['format'].lower() == 'text/xml': try: content = openURL(metadataUrl['url'], timeout=timeout) doc = etree.fromstring(content.read()) if metadataUrl['type'] == 'FGDC': mdelem = doc.find('.//metadata') if mdelem is not None: metadataUrl['metadata'] = Metadata(mdelem) else: metadataUrl['metadata'] = None elif metadataUrl['type'] in ['TC211', '19115', '19139']: mdelem = doc.find( './/' + util.nspath_eval('gmd:MD_Metadata', namespaces) ) or doc.find( './/' + util.nspath_eval('gmi:MI_Metadata', namespaces)) if mdelem is not None: metadataUrl['metadata'] = MD_Metadata(mdelem) else: metadataUrl['metadata'] = None except: metadataUrl['metadata'] = None
def parse_remote_metadata(self, timeout=30): """Parse remote metadata for MetadataURL of format 'text/xml' and add it as metadataUrl['metadata']""" for metadataUrl in self.metadataUrls: if (metadataUrl["url"] is not None and metadataUrl["format"].lower() == "text/xml"): try: content = openURL(metadataUrl["url"], timeout=timeout, headers=self.headers, auth=self.auth) doc = etree.fromstring(content.read()) if metadataUrl["type"] == "FGDC": mdelem = doc.find(".//metadata") if mdelem is not None: metadataUrl["metadata"] = Metadata(mdelem) else: metadataUrl["metadata"] = None elif metadataUrl["type"] in ["TC211", "19115", "19139"]: mdelem = doc.find(".//" + nspath_eval( "gmd:MD_Metadata", namespaces)) or doc.find( ".//" + nspath_eval("gmi:MI_Metadata", namespaces)) if mdelem is not None: metadataUrl["metadata"] = MD_Metadata(mdelem) else: metadataUrl["metadata"] = None except Exception: metadataUrl["metadata"] = None
def parse_remote_metadata(self, timeout=30): """Parse remote metadata for MetadataURL and add it as metadataUrl['metadata']""" for metadataUrl in self.metadataUrls: if metadataUrl['url'] is not None \ and metadataUrl['format'].lower() in ['application/xml', 'text/xml']: # download URL try: content = openURL(metadataUrl['url'], timeout=timeout, auth=self.auth) doc = etree.fromstring(content.read()) if metadataUrl['type'] == 'FGDC': mdelem = doc.find('.//metadata') if mdelem is not None: metadataUrl['metadata'] = Metadata(mdelem) continue if metadataUrl['type'] == 'TC211': mdelem = doc.find('.//' + nspath_eval('gmd:MD_Metadata', n.get_namespaces(['gmd']))) \ or doc.find('.//' + nspath_eval('gmi:MI_Metadata', n.get_namespaces(['gmi']))) if mdelem is not None: metadataUrl['metadata'] = MD_Metadata(mdelem) continue except Exception: metadataUrl['metadata'] = None
def fgdc2dict(exml): """generate dict of properties from FGDC metadata""" vals = {} regions = [] keywords = [] mdata = Metadata(exml) if hasattr(mdata.idinfo, 'citation'): if hasattr(mdata.idinfo.citation, 'citeinfo'): vals['spatial_representation_type'] = \ mdata.idinfo.citation.citeinfo['geoform'] vals['title'] = mdata.idinfo.citation.citeinfo['title'] if hasattr(mdata.idinfo, 'descript'): vals['abstract'] = mdata.idinfo.descript.abstract vals['purpose'] = mdata.idinfo.descript.purpose if mdata.idinfo.descript.supplinf is not None: vals['supplemental_information'] = mdata.idinfo.descript.supplinf if hasattr(mdata.idinfo, 'keywords'): if mdata.idinfo.keywords.theme: for theme in mdata.idinfo.keywords.theme: if theme['themekt'] is not None: lowered_themekt = theme['themekt'].lower() # Owslib doesn't support extracting the Topic Category # from FGDC. So we add support here. # http://www.fgdc.gov/metadata/geospatial-metadata-standards if all( ss in lowered_themekt for ss in [ 'iso', '19115', 'topic']) and any( ss in lowered_themekt for ss in [ 'category', 'categories']): vals['topic_category'] = theme['themekey'][0] keywords.extend(theme['themekey']) if mdata.idinfo.keywords.place: for place in mdata.idinfo.keywords.place: if 'placekey' in place: regions.extend(place['placekey']) if hasattr(mdata.idinfo.timeperd, 'timeinfo'): if hasattr(mdata.idinfo.timeperd.timeinfo, 'rngdates'): vals['temporal_extent_start'] = \ sniff_date(mdata.idinfo.timeperd.timeinfo.rngdates.begdate) vals['temporal_extent_end'] = \ sniff_date(mdata.idinfo.timeperd.timeinfo.rngdates.enddate) vals['constraints_other'] = mdata.idinfo.useconst raw_date = mdata.metainfo.metd if raw_date is not None: vals['date'] = sniff_date(raw_date) return [vals, regions, keywords]
def parse_remote_metadata(self, timeout=30): """Parse remote metadata for MetadataURL of format 'XML' and add it as metadataUrl['metadata']""" for metadataUrl in self.metadataUrls: if metadataUrl['url'] is not None \ and metadataUrl['format'].lower() == 'xml': try: content = openURL(metadataUrl['url'], timeout=timeout, auth=self.auth) doc = etree.fromstring(content.read()) if metadataUrl['type'] == 'FGDC': mdelem = doc.find('.//metadata') if mdelem is not None: metadataUrl['metadata'] = Metadata(mdelem) else: metadataUrl['metadata'] = None elif metadataUrl['type'] == 'TC211': mdelem = doc.find('.//' + util.nspath_eval('gmd:MD_Metadata', n.get_namespaces(['gmd']))) \ or doc.find('.//' + util.nspath_eval('gmi:MI_Metadata', n.get_namespaces(['gmi']))) if mdelem is not None: metadataUrl['metadata'] = MD_Metadata(mdelem) else: metadataUrl['metadata'] = None except Exception: metadataUrl['metadata'] = None
def __init__(self, elem, parent, parse_remote_metadata=False): """.""" self.id = testXMLValue(elem.find(nspath('Name'))) self.title = testXMLValue(elem.find(nspath('Title'))) self.abstract = testXMLValue(elem.find(nspath('Abstract'))) self.keywords = [f.text for f in elem.findall(nspath('Keywords'))] # bboxes self.boundingBox = None b = elem.find(nspath('BoundingBox')) if b is not None: self.boundingBox = (float(b.attrib['minx']),float(b.attrib['miny']), float(b.attrib['maxx']), float(b.attrib['maxy']), b.attrib['SRS']) self.boundingBoxWGS84 = None b = elem.find(nspath('LatLongBoundingBox')) if b is not None: self.boundingBoxWGS84 = ( float(b.attrib['minx']),float(b.attrib['miny']), float(b.attrib['maxx']), float(b.attrib['maxy']), ) # crs options self.crsOptions = [Crs(srs.text) for srs in elem.findall(nspath('SRS'))] # verbs self.verbOptions = [op.tag for op \ in parent.findall(nspath('Operations/*'))] self.verbOptions + [op.tag for op \ in elem.findall(nspath('Operations/*')) \ if op.tag not in self.verbOptions] #others not used but needed for iContentMetadata harmonisation self.styles=None self.timepositions=None self.defaulttimeposition=None # MetadataURLs self.metadataUrls = [] for m in elem.findall(nspath('MetadataURL')): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': testXMLValue(m.find('Format')), 'url': testXMLValue(m) } if metadataUrl['url'] is not None and parse_remote_metadata: # download URL try: content = urlopen(metadataUrl['url']) doc = etree.parse(content) if metadataUrl['type'] is not None: if metadataUrl['type'] == 'FGDC': metadataUrl['metadata'] = Metadata(doc) if metadataUrl['type'] == 'TC211': metadataUrl['metadata'] = MD_Metadata(doc) except Exception, err: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl)
def __init__(self, elem, parse_remote_metadata=False, timeout=30): """.""" self.id = testXMLValue(elem.find(nspath_eval('wfs:Name', namespaces))) self.title = testXMLValue(elem.find(nspath_eval('wfs:Title', namespaces))) self.abstract = testXMLValue(elem.find(nspath_eval('wfs:Abstract', namespaces))) self.keywords = [f.text for f in elem.findall(nspath_eval('ows:Keywords/ows:Keyword', namespaces))] # bbox self.boundingBoxWGS84 = None b = BoundingBox(elem.find(nspath_eval('ows:WGS84BoundingBox', namespaces)), namespaces['ows']) if b is not None: try: self.boundingBoxWGS84 = ( float(b.minx), float(b.miny), float(b.maxx), float(b.maxy), ) except TypeError: self.boundingBoxWGS84 = None # crs options self.crsOptions = [Crs(srs.text) for srs in elem.findall(nspath_eval('wfs:OtherSRS', namespaces))] dsrs = testXMLValue(elem.find(nspath_eval('wfs:DefaultSRS', namespaces))) if dsrs is not None: # first element is default srs self.crsOptions.insert(0, Crs(dsrs)) # verbs self.verbOptions = [op.text for op in elem.findall(nspath_eval('wfs:Operations/wfs:Operation', namespaces))] # output formats self.outputFormats = [op.text for op in elem.findall(nspath_eval('wfs:OutputFormats/wfs:Format', namespaces))] # MetadataURLs self.metadataUrls = [] for m in elem.findall(nspath_eval('wfs:MetadataURL', namespaces)): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': testXMLValue(m.find('Format')), 'url': testXMLValue(m) } if metadataUrl['url'] is not None and parse_remote_metadata: # download URL try: content = openURL(metadataUrl['url'], timeout=timeout) doc = etree.parse(content) if metadataUrl['type'] is not None: if metadataUrl['type'] == 'FGDC': metadataUrl['metadata'] = Metadata(doc) if metadataUrl['type'] in ['TC211', '19115', '19139']: metadataUrl['metadata'] = MD_Metadata(doc) except Exception: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl) #others not used but needed for iContentMetadata harmonisation self.styles=None self.timepositions=None self.defaulttimeposition=None
def parse_remote_metadata(self, timeout=30): """Parse remote metadata for MetadataURL and add it as metadataUrl['metadata']""" for metadataUrl in self.metadataUrls: if metadataUrl['url'] is not None: try: content = openURL(metadataUrl['url'], timeout=timeout) doc = etree.fromstring(content.read()) mdelem = doc.find('.//metadata') if mdelem is not None: metadataUrl['metadata'] = Metadata(mdelem) continue mdelem = doc.find('.//' + util.nspath_eval('gmd:MD_Metadata', n.get_namespaces(['gmd']))) \ or doc.find('.//' + util.nspath_eval('gmi:MI_Metadata', n.get_namespaces(['gmi']))) if mdelem is not None: metadataUrl['metadata'] = MD_Metadata(mdelem) continue except: metadataUrl['metadata'] = None
def fgdc2dict(exml): """generate dict of properties from FGDC metadata""" vals = {} keywords = [] mdata = Metadata(exml) vals['csw_typename'] = 'fgdc:metadata' vals['csw_schema'] = 'http://www.opengis.net/cat/csw/csdgm' if hasattr(mdata.idinfo, 'citation'): if hasattr(mdata.idinfo.citation, 'citeinfo'): vals['spatial_representation_type'] = \ mdata.idinfo.citation.citeinfo['geoform'] vals['title'] = mdata.idinfo.citation.citeinfo['title'] if hasattr(mdata.idinfo, 'descript'): vals['abstract'] = mdata.idinfo.descript.abstract vals['purpose'] = mdata.idinfo.descript.purpose if mdata.idinfo.descript.supplinf is not None: vals['supplemental_information'] = mdata.idinfo.descript.supplinf if hasattr(mdata.idinfo, 'keywords'): if mdata.idinfo.keywords.theme: keywords = mdata.idinfo.keywords.theme[0]['themekey'] if hasattr(mdata.idinfo.timeperd, 'timeinfo'): if hasattr(mdata.idinfo.timeperd.timeinfo, 'rngdates'): vals['temporal_extent_start'] = \ sniff_date(mdata.idinfo.timeperd.timeinfo.rngdates.begdate) vals['temporal_extent_end'] = \ sniff_date(mdata.idinfo.timeperd.timeinfo.rngdates.enddate) vals['constraints_other'] = mdata.idinfo.useconst raw_date = mdata.metainfo.metd if raw_date is not None: vals['date'] = sniff_date(raw_date) return [vals, keywords]
def __init__(self, elem, parent=None, children=None, index=0, parse_remote_metadata=False, timeout=30): if xmltag_split(elem.tag) != 'Layer': raise ValueError('%s should be a Layer' % (elem, )) self.parent = parent if parent: self.index = "%s.%d" % (parent.index, index) else: self.index = str(index) self._children = children self.id = self.name = testXMLValue( elem.find(nspath('Name', WMS_NAMESPACE))) # layer attributes self.queryable = int(elem.attrib.get('queryable', 0)) self.cascaded = int(elem.attrib.get('cascaded', 0)) self.opaque = int(elem.attrib.get('opaque', 0)) self.noSubsets = int(elem.attrib.get('noSubsets', 0)) self.fixedWidth = int(elem.attrib.get('fixedWidth', 0)) self.fixedHeight = int(elem.attrib.get('fixedHeight', 0)) # title is mandatory property self.title = None title = testXMLValue(elem.find(nspath('Title', WMS_NAMESPACE))) if title is not None: self.title = title.strip() self.abstract = testXMLValue( elem.find(nspath('Abstract', WMS_NAMESPACE))) # TODO: what is the preferred response to esri's handling of custom projections # in the spatial ref definitions? see http://resources.arcgis.com/en/help/main/10.1/index.html#//00sq000000m1000000 # and an example (20150812) http://maps.ngdc.noaa.gov/arcgis/services/firedetects/MapServer/WMSServer?request=GetCapabilities&service=WMS # bboxes b = elem.find(nspath('EX_GeographicBoundingBox', WMS_NAMESPACE)) self.boundingBoxWGS84 = None if b is not None: minx = b.find(nspath('westBoundLongitude', WMS_NAMESPACE)) miny = b.find(nspath('southBoundLatitude', WMS_NAMESPACE)) maxx = b.find(nspath('eastBoundLongitude', WMS_NAMESPACE)) maxy = b.find(nspath('northBoundLatitude', WMS_NAMESPACE)) box = tuple( map(float, [ minx.text if minx is not None else None, miny.text if miny is not None else None, maxx.text if maxx is not None else None, maxy.text if maxy is not None else None ])) self.boundingBoxWGS84 = tuple(box) elif self.parent: if hasattr(self.parent, 'boundingBoxWGS84'): self.boundingBoxWGS84 = self.parent.boundingBoxWGS84 # make a bbox list (of tuples) crs_list = [] for bb in elem.findall(nspath('BoundingBox', WMS_NAMESPACE)): srs_str = bb.attrib.get('CRS', None) srs = Crs(srs_str) box = tuple( map(float, [ bb.attrib['minx'], bb.attrib['miny'], bb.attrib['maxx'], bb.attrib['maxy'] ])) minx, miny, maxx, maxy = box[0], box[1], box[2], box[3] # handle the ordering so that it always # returns (minx, miny, maxx, maxy) if srs and srs.axisorder == 'yx': # reverse things minx, miny, maxx, maxy = box[1], box[0], box[3], box[2] crs_list.append(( minx, miny, maxx, maxy, srs_str, )) self.crs_list = crs_list # and maintain the original boundingBox attribute (first in list) # or the wgs84 bbox (to handle cases of incomplete parentage) self.boundingBox = crs_list[0] if crs_list else self.boundingBoxWGS84 # ScaleHint sh = elem.find(nspath('ScaleHint', WMS_NAMESPACE)) self.scaleHint = None if sh is not None: if 'min' in sh.attrib and 'max' in sh.attrib: self.scaleHint = { 'min': sh.attrib['min'], 'max': sh.attrib['max'] } attribution = elem.find(nspath('Attribution', WMS_NAMESPACE)) if attribution is not None: self.attribution = dict() title = attribution.find(nspath('Title', WMS_NAMESPACE)) url = attribution.find(nspath('OnlineResource', WMS_NAMESPACE)) logo = attribution.find(nspath('LogoURL', WMS_NAMESPACE)) if title is not None: self.attribution['title'] = title.text if url is not None: self.attribution['url'] = url.attrib[ '{http://www.w3.org/1999/xlink}href'] if logo is not None: self.attribution['logo_size'] = (int(logo.attrib['width']), int(logo.attrib['height'])) self.attribution['logo_url'] = logo.find( nspath('OnlineResource', WMS_NAMESPACE) ).attrib['{http://www.w3.org/1999/xlink}href'] # TODO: get this from the bbox attributes instead (deal with parents) # SRS options self.crsOptions = [] # Copy any parent SRS options (they are inheritable properties) if self.parent: self.crsOptions = list(self.parent.crsOptions) # Look for SRS option attached to this layer if elem.find(nspath('CRS', WMS_NAMESPACE)) is not None: # some servers found in the wild use a single SRS # tag containing a whitespace separated list of SRIDs # instead of several SRS tags. hence the inner loop for srslist in [ x.text for x in elem.findall(nspath('CRS', WMS_NAMESPACE)) ]: if srslist: for srs in srslist.split(): self.crsOptions.append(srs) # Get rid of duplicate entries self.crsOptions = list(set(self.crsOptions)) # Set self.crsOptions to None if the layer (and parents) had no SRS options if len(self.crsOptions) == 0: # raise ValueError('%s no SRS available!?' % (elem,)) # Comment by D Lowe. # Do not raise ValueError as it is possible that a layer is purely a parent layer and does not have SRS specified. Instead set crsOptions to None # Comment by Jachym: # Do not set it to None, but to [], which will make the code # work further. Fixed by anthonybaxter self.crsOptions = [] # Styles self.styles = {} # Copy any parent styles (they are inheritable properties) if self.parent: self.styles = self.parent.styles.copy() # Get the styles for this layer (items with the same name are replaced) for s in elem.findall(nspath('Style', WMS_NAMESPACE)): name = s.find(nspath('Name', WMS_NAMESPACE)) title = s.find(nspath('Title', WMS_NAMESPACE)) if name is None or title is None: raise ValueError('%s missing name or title' % (s, )) style = {'title': title.text} # legend url legend = s.find(nspath('LegendURL/OnlineResource', WMS_NAMESPACE)) if legend is not None: style['legend'] = legend.attrib[ '{http://www.w3.org/1999/xlink}href'] lgd = s.find(nspath('LegendURL', WMS_NAMESPACE)) if lgd is not None: if 'width' in list(lgd.attrib.keys()): style['legend_width'] = lgd.attrib.get('width') if 'height' in list(lgd.attrib.keys()): style['legend_height'] = lgd.attrib.get('height') lgd_format = lgd.find(nspath('Format', WMS_NAMESPACE)) if lgd_format is not None: style['legend_format'] = lgd_format.text.strip() self.styles[name.text] = style # keywords self.keywords = [ f.text for f in elem.findall(nspath('KeywordList/Keyword', WMS_NAMESPACE)) ] # extents replaced by dimensions of name # comment by Soren Scott # <Dimension name="elevation" units="meters" default="500" multipleValues="1" # nearestValue="0" current="true" unitSymbol="m">500, 490, 480</Dimension> # it can be repeated with the same name so ? this assumes a single one to match 1.1.1 self.timepositions = None self.defaulttimeposition = None time_dimension = None for dim in elem.findall(nspath('Dimension', WMS_NAMESPACE)): dim_name = dim.attrib.get('name') if dim_name is not None and dim_name.lower() == 'time': time_dimension = dim if time_dimension is not None: self.timepositions = time_dimension.text.split( ',') if time_dimension.text else None self.defaulttimeposition = time_dimension.attrib.get( 'default', None) # Elevations - available vertical levels self.elevations = None elev_dimension = None for dim in elem.findall(nspath('Dimension', WMS_NAMESPACE)): if dim.attrib.get('elevation') is not None: elev_dimension = dim if elev_dimension is not None: self.elevations = [ e.strip() for e in elev_dimension.text.split(',') ] if elev_dimension.text else None # and now capture the dimensions as more generic things (and custom things) self.dimensions = {} for dim in elem.findall(nspath('Dimension', WMS_NAMESPACE)): dim_name = dim.attrib.get('name') dim_data = {} for k, v in six.iteritems(dim.attrib): if k != 'name': dim_data[k] = v # single values and ranges are not differentiated here dim_data['values'] = dim.text.strip().split( ',') if dim.text.strip() else None self.dimensions[dim_name] = dim_data # MetadataURLs self.metadataUrls = [] for m in elem.findall(nspath('MetadataURL', WMS_NAMESPACE)): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': testXMLValue(m.find(nspath('Format', WMS_NAMESPACE))), 'url': testXMLValue(m.find(nspath('OnlineResource', WMS_NAMESPACE)). attrib['{http://www.w3.org/1999/xlink}href'], attrib=True) } if metadataUrl[ 'url'] is not None and parse_remote_metadata: # download URL try: content = openURL(metadataUrl['url'], timeout=timeout) doc = etree.parse(content) if metadataUrl['type'] is not None: if metadataUrl['type'] == 'FGDC': metadataUrl['metadata'] = Metadata(doc) if metadataUrl['type'] == 'TC211': metadataUrl['metadata'] = MD_Metadata(doc) except Exception: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl) # DataURLs self.dataUrls = [] for m in elem.findall(nspath('DataURL', WMS_NAMESPACE)): dataUrl = { 'format': m.find(nspath('Format', WMS_NAMESPACE)).text.strip(), 'url': m.find(nspath('OnlineResource', WMS_NAMESPACE)). attrib['{http://www.w3.org/1999/xlink}href'] } self.dataUrls.append(dataUrl) # FeatureListURLs self.featureListUrls = [] for m in elem.findall(nspath('FeatureListURL', WMS_NAMESPACE)): featureUrl = { 'format': m.find(nspath('Format', WMS_NAMESPACE)).text.strip(), 'url': m.find(nspath('OnlineResource', WMS_NAMESPACE)). attrib['{http://www.w3.org/1999/xlink}href'] } self.featureListUrls.append(featureUrl) self.layers = [] for child in elem.findall(nspath('Layer', WMS_NAMESPACE)): self.layers.append(ContentMetadata(child, self))
def _parse_fgdc(context, repos, exml): from owslib.fgdc import Metadata recobj = repos.dataset() links = [] md = Metadata(exml) if md.idinfo.datasetid is not None: # we need an identifier _set(context, recobj, 'pycsw:Identifier', md.idinfo.datasetid) else: # generate one ourselves _set(context, recobj, 'pycsw:Identifier', uuid.uuid1().get_urn()) _set(context, recobj, 'pycsw:Typename', 'fgdc:metadata') _set(context, recobj, 'pycsw:Schema', context.namespaces['fgdc']) _set(context, recobj, 'pycsw:MdSource', 'local') _set(context, recobj, 'pycsw:InsertDate', util.get_today_and_now()) _set(context, recobj, 'pycsw:XML', md.xml) _set(context, recobj, 'pycsw:AnyText', util.get_anytext(exml)) _set(context, recobj, 'pycsw:Language', 'en-US') if hasattr(md.idinfo, 'descript'): _set(context, recobj, 'pycsw:Abstract', md.idinfo.descript.abstract) if hasattr(md.idinfo, 'keywords'): if md.idinfo.keywords.theme: _set(context, recobj, 'pycsw:Keywords', \ ','.join(md.idinfo.keywords.theme[0]['themekey'])) if hasattr(md.idinfo.timeperd, 'timeinfo'): if hasattr(md.idinfo.timeperd.timeinfo, 'rngdates'): _set(context, recobj, 'pycsw:TempExtent_begin', md.idinfo.timeperd.timeinfo.rngdates.begdate) _set(context, recobj, 'pycsw:TempExtent_end', md.idinfo.timeperd.timeinfo.rngdates.enddate) if hasattr(md.idinfo, 'origin'): _set(context, recobj, 'pycsw:Creator', md.idinfo.origin) _set(context, recobj, 'pycsw:Publisher', md.idinfo.origin) _set(context, recobj, 'pycsw:Contributor', md.idinfo.origin) if hasattr(md.idinfo, 'ptcontac'): _set(context, recobj, 'pycsw:OrganizationName', md.idinfo.ptcontac.cntorg) _set(context, recobj, 'pycsw:AccessConstraints', md.idinfo.accconst) _set(context, recobj, 'pycsw:OtherConstraints', md.idinfo.useconst) _set(context, recobj, 'pycsw:Date', md.metainfo.metd) if hasattr(md.idinfo, 'spdom') and hasattr(md.idinfo.spdom, 'bbox'): bbox = md.idinfo.spdom.bbox else: bbox = None if hasattr(md.idinfo, 'citation'): if hasattr(md.idinfo.citation, 'citeinfo'): _set(context, recobj, 'pycsw:Type', md.idinfo.citation.citeinfo['geoform']) _set(context, recobj, 'pycsw:Title', md.idinfo.citation.citeinfo['title']) _set(context, recobj, 'pycsw:PublicationDate', md.idinfo.citation.citeinfo['pubdate']) _set(context, recobj, 'pycsw:Format', md.idinfo.citation.citeinfo['geoform']) if md.idinfo.citation.citeinfo['onlink']: for link in md.idinfo.citation.citeinfo['onlink']: tmp = ',,,%s' % link links.append(tmp) if hasattr(md, 'distinfo') and hasattr(md.distinfo, 'stdorder'): for link in md.distinfo.stdorder['digform']: tmp = ',%s,,%s' % (link['name'], link['url']) links.append(tmp) if len(links) > 0: _set(context, recobj, 'pycsw:Links', '^'.join(links)) if bbox is not None: try: tmp = '%s,%s,%s,%s' % (bbox.minx, bbox.miny, bbox.maxx, bbox.maxy) _set(context, recobj, 'pycsw:BoundingBox', util.bbox2wktpolygon(tmp)) except: # coordinates are corrupted, do not include _set(context, recobj, 'pycsw:BoundingBox', None) else: _set(context, recobj, 'pycsw:BoundingBox', None) return recobj
def __init__(self, elem, parent=None, children=None, index=0, parse_remote_metadata=False, timeout=30): if elem.tag != 'Layer': raise ValueError('%s should be a Layer' % (elem, )) self.parent = parent if parent: self.index = "%s.%d" % (parent.index, index) else: self.index = str(index) self._children = children self.id = self.name = testXMLValue(elem.find('Name')) # layer attributes self.queryable = int(elem.attrib.get('queryable', 0)) self.cascaded = int(elem.attrib.get('cascaded', 0)) self.opaque = int(elem.attrib.get('opaque', 0)) self.noSubsets = int(elem.attrib.get('noSubsets', 0)) self.fixedWidth = int(elem.attrib.get('fixedWidth', 0)) self.fixedHeight = int(elem.attrib.get('fixedHeight', 0)) # title is mandatory property self.title = None title = testXMLValue(elem.find('Title')) if title is not None: self.title = title.strip() self.abstract = testXMLValue(elem.find('Abstract')) # bboxes b = elem.find('BoundingBox') self.boundingBox = None if b is not None: try: # sometimes the SRS attribute is (wrongly) not provided srs = b.attrib['SRS'] except KeyError: srs = None self.boundingBox = ( float(b.attrib['minx']), float(b.attrib['miny']), float(b.attrib['maxx']), float(b.attrib['maxy']), srs, ) elif self.parent: if hasattr(self.parent, 'boundingBox'): self.boundingBox = self.parent.boundingBox # ScaleHint sh = elem.find('ScaleHint') self.scaleHint = None if sh is not None: if 'min' in sh.attrib and 'max' in sh.attrib: self.scaleHint = { 'min': sh.attrib['min'], 'max': sh.attrib['max'] } attribution = elem.find('Attribution') if attribution is not None: self.attribution = dict() title = attribution.find('Title') url = attribution.find('OnlineResource') logo = attribution.find('LogoURL') if title is not None: self.attribution['title'] = title.text if url is not None: self.attribution['url'] = url.attrib[ '{http://www.w3.org/1999/xlink}href'] if logo is not None: self.attribution['logo_size'] = (int(logo.attrib['width']), int(logo.attrib['height'])) self.attribution['logo_url'] = logo.find( 'OnlineResource' ).attrib['{http://www.w3.org/1999/xlink}href'] b = elem.find('LatLonBoundingBox') if b is not None: self.boundingBoxWGS84 = ( float(b.attrib['minx']), float(b.attrib['miny']), float(b.attrib['maxx']), float(b.attrib['maxy']), ) elif self.parent: self.boundingBoxWGS84 = self.parent.boundingBoxWGS84 else: self.boundingBoxWGS84 = None # SRS options self.crsOptions = [] # Copy any parent SRS options (they are inheritable properties) if self.parent: self.crsOptions = list(self.parent.crsOptions) # Look for SRS option attached to this layer if elem.find('SRS') is not None: ## some servers found in the wild use a single SRS ## tag containing a whitespace separated list of SRIDs ## instead of several SRS tags. hence the inner loop for srslist in [x.text for x in elem.findall('SRS')]: if srslist: for srs in srslist.split(): self.crsOptions.append(srs) #Get rid of duplicate entries self.crsOptions = list(set(self.crsOptions)) #Set self.crsOptions to None if the layer (and parents) had no SRS options if len(self.crsOptions) == 0: #raise ValueError('%s no SRS available!?' % (elem,)) #Comment by D Lowe. #Do not raise ValueError as it is possible that a layer is purely a parent layer and does not have SRS specified. Instead set crsOptions to None # Comment by Jachym: # Do not set it to None, but to [], which will make the code # work further. Fixed by anthonybaxter self.crsOptions = [] #Styles self.styles = {} #Copy any parent styles (they are inheritable properties) if self.parent: self.styles = self.parent.styles.copy() #Get the styles for this layer (items with the same name are replaced) for s in elem.findall('Style'): name = s.find('Name') title = s.find('Title') if name is None or title is None: raise ValueError('%s missing name or title' % (s, )) style = {'title': title.text} # legend url legend = s.find('LegendURL/OnlineResource') if legend is not None: style['legend'] = legend.attrib[ '{http://www.w3.org/1999/xlink}href'] self.styles[name.text] = style # keywords self.keywords = [f.text for f in elem.findall('KeywordList/Keyword')] # timepositions - times for which data is available. self.timepositions = None self.defaulttimeposition = None for extent in elem.findall('Extent'): if extent.attrib.get("name").lower() == 'time': if extent.text: self.timepositions = extent.text.split(',') self.defaulttimeposition = extent.attrib.get("default") break # Elevations - available vertical levels self.elevations = None for extent in elem.findall('Extent'): if extent.attrib.get("name").lower() == 'elevation': if extent.text: self.elevations = extent.text.split(',') break # MetadataURLs self.metadataUrls = [] for m in elem.findall('MetadataURL'): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': testXMLValue(m.find('Format')), 'url': testXMLValue(m.find('OnlineResource'). attrib['{http://www.w3.org/1999/xlink}href'], attrib=True) } if metadataUrl[ 'url'] is not None and parse_remote_metadata: # download URL try: content = openURL(metadataUrl['url'], timeout=timeout) doc = etree.parse(content) if metadataUrl['type'] is not None: if metadataUrl['type'] == 'FGDC': metadataUrl['metadata'] = Metadata(doc) if metadataUrl['type'] == 'TC211': metadataUrl['metadata'] = MD_Metadata(doc) except Exception: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl) # DataURLs self.dataUrls = [] for m in elem.findall('DataURL'): dataUrl = { 'format': m.find('Format').text.strip(), 'url': m.find('OnlineResource'). attrib['{http://www.w3.org/1999/xlink}href'] } self.dataUrls.append(dataUrl) self.layers = [] for child in elem.findall('Layer'): self.layers.append(ContentMetadata(child, self))
def __init__(self, elem, parent, parse_remote_metadata=False, timeout=30): """.""" self.id = testXMLValue(elem.find(nspath('Name'))) self.title = testXMLValue(elem.find(nspath('Title'))) self.abstract = testXMLValue(elem.find(nspath('Abstract'))) self.keywords = [f.text for f in elem.findall(nspath('Keywords'))] # bboxes self.boundingBox = None b = elem.find(nspath('LatLongBoundingBox')) srs = elem.find(nspath('SRS')) if b is not None: self.boundingBox = (float(b.attrib['minx']), float( b.attrib['miny']), float(b.attrib['maxx']), float(b.attrib['maxy']), Crs(srs.text)) # transform wgs84 bbox from given default bboxt self.boundingBoxWGS84 = None if b is not None and srs is not None: wgs84 = pyproj.Proj(init="epsg:4326") try: src_srs = pyproj.Proj(init=srs.text) mincorner = pyproj.transform(src_srs, wgs84, b.attrib['minx'], b.attrib['miny']) maxcorner = pyproj.transform(src_srs, wgs84, b.attrib['maxx'], b.attrib['maxy']) self.boundingBoxWGS84 = (mincorner[0], mincorner[1], maxcorner[0], maxcorner[1]) except RuntimeError as e: pass # crs options self.crsOptions = [ Crs(srs.text) for srs in elem.findall(nspath('SRS')) ] # verbs self.verbOptions = [op.tag for op \ in parent.findall(nspath('Operations/*'))] self.verbOptions + [op.tag for op \ in elem.findall(nspath('Operations/*')) \ if op.tag not in self.verbOptions] #others not used but needed for iContentMetadata harmonisation self.styles = None self.timepositions = None self.defaulttimeposition = None # MetadataURLs self.metadataUrls = [] for m in elem.findall(nspath('MetadataURL')): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': testXMLValue(m.find('Format')), 'url': testXMLValue(m) } if metadataUrl[ 'url'] is not None and parse_remote_metadata: # download URL try: content = openURL(metadataUrl['url'], timeout=timeout) doc = etree.parse(content) if metadataUrl['type'] is not None: if metadataUrl['type'] == 'FGDC': metadataUrl['metadata'] = Metadata(doc) if metadataUrl['type'] == 'TC211': metadataUrl['metadata'] = MD_Metadata(doc) except Exception: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl)