def _parse_metadata(xml): try: exml = etree.fromstring(xml) except ValueError: return None mdata = MD_Metadata(exml) # rimuovo xml mdata.xml = None return mdata
def get_record(self, uuid): results = self._csw_local_dispatch(identifier=uuid) if len(results) < 1: return None result = etree.fromstring(results).find('{http://www.isotc211.org/2005/gmd}MD_Metadata') if result is None: return None record = MD_Metadata(result) record.keywords = [] if hasattr(record, 'identification') and hasattr(record.identification, 'keywords'): for kw in record.identification.keywords: record.keywords.extend(kw['keywords']) record.links = {} record.links['metadata'] = self.catalogue.urls_for_uuid(uuid) record.links['download'] = self.catalogue.extract_links(record) return record
def get_record(self, uuid): results = self._csw_local_dispatch(identifier=uuid) if len(results) < 1: return None result = etree.fromstring(results).find( '{http://www.isotc211.org/2005/gmd}MD_Metadata') if result is None: return None record = MD_Metadata(result) record.keywords = [] if hasattr(record, 'identification') and hasattr( record.identification, 'keywords'): for kw in record.identification.keywords: record.keywords.extend(kw['keywords']) record.links = {} record.links['metadata'] = self.catalogue.urls_for_uuid(uuid) record.links['download'] = self.catalogue.extract_links(record) return record
def __init__(self, elem, parse_remote_metadata=False, timeout=30): """.""" self.id = testXMLValue(elem.find(nspath_eval('wfs:Name', namespaces))) self.title = testXMLValue( elem.find(nspath_eval('wfs:Title', namespaces))) self.abstract = testXMLValue( elem.find(nspath_eval('wfs:Abstract', namespaces))) self.keywords = [ f.text for f in elem.findall( nspath_eval('ows:Keywords/ows:Keyword', namespaces)) ] # bbox self.boundingBoxWGS84 = None b = BoundingBox( elem.find(nspath_eval('ows:WGS84BoundingBox', namespaces)), namespaces['ows']) if b is not None: self.boundingBoxWGS84 = ( float(b.minx), float(b.miny), float(b.maxx), float(b.maxy), ) # crs options self.crsOptions = [ Crs(srs.text) for srs in elem.findall(nspath_eval('wfs:OtherSRS', namespaces)) ] dsrs = testXMLValue( elem.find(nspath_eval('wfs:DefaultSRS', namespaces))) if dsrs is not None: # first element is default srs self.crsOptions.insert(0, Crs(dsrs)) # verbs self.verbOptions = [ op.text for op in elem.findall( nspath_eval('wfs:Operations/wfs:Operation', namespaces)) ] # output formats self.outputFormats = [ op.text for op in elem.findall( nspath_eval('wfs:OutputFormats/wfs:Format', namespaces)) ] # MetadataURLs self.metadataUrls = [] for m in elem.findall(nspath_eval('wfs:MetadataURL', namespaces)): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': testXMLValue(m.find('Format')), 'url': testXMLValue(m) } if metadataUrl[ 'url'] is not None and parse_remote_metadata: # download URL try: content = urlopen(metadataUrl['url'], timeout=timeout) doc = etree.parse(content) if metadataUrl['type'] is not None: if metadataUrl['type'] == 'FGDC': metadataUrl['metadata'] = Metadata(doc) if metadataUrl['type'] in ['TC211', '19115', '19139']: metadataUrl['metadata'] = MD_Metadata(doc) except Exception, err: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl)
def _parse_iso(context, repos, exml): from owslib.iso import MD_Metadata recobj = repos.dataset() links = [] md = MD_Metadata(exml) _set(context, recobj, 'pycsw:Identifier', md.identifier) _set(context, recobj, 'pycsw:Typename', 'gmd:MD_Metadata') _set(context, recobj, 'pycsw:Schema', context.namespaces['gmd']) _set(context, recobj, 'pycsw:MdSource', 'local') _set(context, recobj, 'pycsw:InsertDate', util.get_today_and_now()) _set(context, recobj, 'pycsw:XML', md.xml) _set(context, recobj, 'pycsw:AnyText', util.get_anytext(exml)) _set(context, recobj, 'pycsw:Language', md.language) _set(context, recobj, 'pycsw:Type', md.hierarchy) _set(context, recobj, 'pycsw:ParentIdentifier', md.parentidentifier) _set(context, recobj, 'pycsw:Date', md.datestamp) _set(context, recobj, 'pycsw:Source', md.dataseturi) if md.referencesystem is not None: _set(context, recobj, 'pycsw:CRS', 'urn:ogc:def:crs:EPSG:6.11:%s' % md.referencesystem.code) if hasattr(md, 'identification'): _set(context, recobj, 'pycsw:Title', md.identification.title) _set(context, recobj, 'pycsw:AlternateTitle', md.identification.alternatetitle) _set(context, recobj, 'pycsw:Abstract', md.identification.abstract) _set(context, recobj, 'pycsw:Relation', md.identification.aggregationinfo) if hasattr(md.identification, 'temporalextent_start'): _set(context, recobj, 'pycsw:TempExtent_begin', md.identification.temporalextent_start) if hasattr(md.identification, 'temporalextent_end'): _set(context, recobj, 'pycsw:TempExtent_end', md.identification.temporalextent_end) if len(md.identification.topiccategory) > 0: _set(context, recobj, 'pycsw:TopicCategory', md.identification.topiccategory[0]) if len(md.identification.resourcelanguage) > 0: _set(context, recobj, 'pycsw:ResourceLanguage', md.identification.resourcelanguage[0]) if hasattr(md.identification, 'bbox'): bbox = md.identification.bbox else: bbox = None if (hasattr(md.identification, 'keywords') and len(md.identification.keywords) > 0): if None not in md.identification.keywords[0]['keywords']: _set(context, recobj, 'pycsw:Keywords', ','.join(md.identification.keywords[0]['keywords'])) _set(context, recobj, 'pycsw:KeywordType', md.identification.keywords[0]['type']) if hasattr(md.identification, 'creator'): _set(context, recobj, 'pycsw:Creator', md.identification.creator) if hasattr(md.identification, 'publisher'): _set(context, recobj, 'pycsw:Publisher', md.identification.publisher) if hasattr(md.identification, 'contributor'): _set(context, recobj, 'pycsw:Contributor', md.identification.contributor) if (hasattr(md.identification, 'contact') and hasattr(md.identification.contact, 'organization')): _set(context, recobj, 'pycsw:OrganizationName', md.identification.contact.organization) if len(md.identification.securityconstraints) > 0: _set(context, recobj, 'pycsw:SecurityConstraints', md.identification.securityconstraints[0]) if len(md.identification.accessconstraints) > 0: _set(context, recobj, 'pycsw:AccessConstraints', md.identification.accessconstraints[0]) if len(md.identification.otherconstraints) > 0: _set(context, recobj, 'pycsw:OtherConstraints', md.identification.otherconstraints[0]) if hasattr(md.identification, 'date'): for datenode in md.identification.date: if datenode.type == 'revision': _set(context, recobj, 'pycsw:RevisionDate', datenode.date) elif datenode.type == 'creation': _set(context, recobj, 'pycsw:CreationDate', datenode.date) elif datenode.type == 'publication': _set(context, recobj, 'pycsw:PublicationDate', datenode.date) if hasattr(md.identification, 'extent') and hasattr( md.identification.extent, 'description_code'): _set(context, recobj, 'pycsw:GeographicDescriptionCode', md.identification.extent.description_code) if len(md.identification.denominators) > 0: _set(context, recobj, 'pycsw:Denominator', md.identification.denominators[0]) if len(md.identification.distance) > 0: _set(context, recobj, 'pycsw:DistanceValue', md.identification.distance[0]) if len(md.identification.uom) > 0: _set(context, recobj, 'pycsw:DistanceUOM', md.identification.uom[0]) if len(md.identification.classification) > 0: _set(context, recobj, 'pycsw:Classification', md.identification.classification[0]) if len(md.identification.uselimitation) > 0: _set(context, recobj, 'pycsw:ConditionApplyingToAccessAndUse', md.identification.uselimitation[0]) if hasattr(md.identification, 'format'): _set(context, recobj, 'pycsw:Format', md.distribution.format) if md.serviceidentification is not None: _set(context, recobj, 'pycsw:ServiceType', md.serviceidentification.type) _set(context, recobj, 'pycsw:ServiceTypeVersion', md.serviceidentification.version) _set(context, recobj, 'pycsw:CouplingType', md.serviceidentification.couplingtype) #if len(md.serviceidentification.operateson) > 0: # _set(context, recobj, 'pycsw:operateson = VARCHAR(32), #_set(context, recobj, 'pycsw:operation VARCHAR(32), #_set(context, recobj, 'pycsw:operatesonidentifier VARCHAR(32), #_set(context, recobj, 'pycsw:operatesoname VARCHAR(32), if hasattr(md.identification, 'dataquality'): _set(context, recobj, 'pycsw:Degree', md.dataquality.conformancedegree) _set(context, recobj, 'pycsw:Lineage', md.dataquality.lineage) _set(context, recobj, 'pycsw:SpecificationTitle', md.dataquality.specificationtitle) if hasattr(md.dataquality, 'specificationdate'): _set(context, recobj, 'pycsw:specificationDate', md.dataquality.specificationdate[0].date) _set(context, recobj, 'pycsw:SpecificationDateType', md.dataquality.specificationdate[0].datetype) if hasattr(md, 'contact') and len(md.contact) > 0: _set(context, recobj, 'pycsw:ResponsiblePartyRole', md.contact[0].role) if hasattr(md, 'distribution') and hasattr(md.distribution, 'online'): for link in md.distribution.online: linkstr = '%s,%s,%s,%s' % \ (link.name, link.description, link.protocol, link.url) links.append(linkstr) if len(links) > 0: _set(context, recobj, 'pycsw:Links', '^'.join(links)) if bbox is not None: try: tmp = '%s,%s,%s,%s' % (bbox.minx, bbox.miny, bbox.maxx, bbox.maxy) _set(context, recobj, 'pycsw:BoundingBox', util.bbox2wktpolygon(tmp)) except: # coordinates are corrupted, do not include _set(context, recobj, 'pycsw:BoundingBox', None) else: _set(context, recobj, 'pycsw:BoundingBox', None) return recobj
def __init__(self, elem, parent=None, children=None, index=0, parse_remote_metadata=False, timeout=30): if elem.tag != 'Layer': raise ValueError('%s should be a Layer' % (elem, )) self.parent = parent if parent: self.index = "%s.%d" % (parent.index, index) else: self.index = str(index) self._children = children self.id = self.name = testXMLValue(elem.find('Name')) # layer attributes self.queryable = int(elem.attrib.get('queryable', 0)) self.cascaded = int(elem.attrib.get('cascaded', 0)) self.opaque = int(elem.attrib.get('opaque', 0)) self.noSubsets = int(elem.attrib.get('noSubsets', 0)) self.fixedWidth = int(elem.attrib.get('fixedWidth', 0)) self.fixedHeight = int(elem.attrib.get('fixedHeight', 0)) # title is mandatory property self.title = None title = testXMLValue(elem.find('Title')) if title is not None: self.title = title.strip() self.abstract = testXMLValue(elem.find('Abstract')) # bboxes b = elem.find('BoundingBox') self.boundingBox = None if b is not None: try: # sometimes the SRS attribute is (wrongly) not provided srs = b.attrib['SRS'] except KeyError: srs = None self.boundingBox = ( float(b.attrib['minx']), float(b.attrib['miny']), float(b.attrib['maxx']), float(b.attrib['maxy']), srs, ) elif self.parent: if hasattr(self.parent, 'boundingBox'): self.boundingBox = self.parent.boundingBox # ScaleHint sh = elem.find('ScaleHint') self.scaleHint = None if sh is not None: if 'min' in sh.attrib and 'max' in sh.attrib: self.scaleHint = { 'min': sh.attrib['min'], 'max': sh.attrib['max'] } attribution = elem.find('Attribution') if attribution is not None: self.attribution = dict() title = attribution.find('Title') url = attribution.find('OnlineResource') logo = attribution.find('LogoURL') if title is not None: self.attribution['title'] = title.text if url is not None: self.attribution['url'] = url.attrib[ '{http://www.w3.org/1999/xlink}href'] if logo is not None: self.attribution['logo_size'] = (int(logo.attrib['width']), int(logo.attrib['height'])) self.attribution['logo_url'] = logo.find( 'OnlineResource' ).attrib['{http://www.w3.org/1999/xlink}href'] b = elem.find('LatLonBoundingBox') if b is not None: self.boundingBoxWGS84 = ( float(b.attrib['minx']), float(b.attrib['miny']), float(b.attrib['maxx']), float(b.attrib['maxy']), ) elif self.parent: self.boundingBoxWGS84 = self.parent.boundingBoxWGS84 else: self.boundingBoxWGS84 = None # SRS options self.crsOptions = [] # Copy any parent SRS options (they are inheritable properties) if self.parent: self.crsOptions = list(self.parent.crsOptions) # Look for SRS option attached to this layer if elem.find('SRS') is not None: ## some servers found in the wild use a single SRS ## tag containing a whitespace separated list of SRIDs ## instead of several SRS tags. hence the inner loop for srslist in [x.text for x in elem.findall('SRS')]: if srslist: for srs in srslist.split(): self.crsOptions.append(srs) #Get rid of duplicate entries self.crsOptions = list(set(self.crsOptions)) #Set self.crsOptions to None if the layer (and parents) had no SRS options if len(self.crsOptions) == 0: #raise ValueError('%s no SRS available!?' % (elem,)) #Comment by D Lowe. #Do not raise ValueError as it is possible that a layer is purely a parent layer and does not have SRS specified. Instead set crsOptions to None # Comment by Jachym: # Do not set it to None, but to [], which will make the code # work further. Fixed by anthonybaxter self.crsOptions = [] #Styles self.styles = {} #Copy any parent styles (they are inheritable properties) if self.parent: self.styles = self.parent.styles.copy() #Get the styles for this layer (items with the same name are replaced) for s in elem.findall('Style'): name = s.find('Name') title = s.find('Title') if name is None or title is None: raise ValueError('%s missing name or title' % (s, )) style = {'title': title.text} # legend url legend = s.find('LegendURL/OnlineResource') if legend is not None: style['legend'] = legend.attrib[ '{http://www.w3.org/1999/xlink}href'] self.styles[name.text] = style # keywords self.keywords = [f.text for f in elem.findall('KeywordList/Keyword')] # timepositions - times for which data is available. self.timepositions = None self.defaulttimeposition = None for extent in elem.findall('Extent'): if extent.attrib.get("name").lower() == 'time': if extent.text: self.timepositions = extent.text.split(',') self.defaulttimeposition = extent.attrib.get("default") break # Elevations - available vertical levels self.elevations = None for extent in elem.findall('Extent'): if extent.attrib.get("name").lower() == 'elevation': if extent.text: self.elevations = extent.text.split(',') break # MetadataURLs self.metadataUrls = [] for m in elem.findall('MetadataURL'): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': testXMLValue(m.find('Format')), 'url': testXMLValue(m.find('OnlineResource'). attrib['{http://www.w3.org/1999/xlink}href'], attrib=True) } if metadataUrl[ 'url'] is not None and parse_remote_metadata: # download URL try: content = openURL(metadataUrl['url'], timeout=timeout) doc = etree.parse(content) if metadataUrl['type'] is not None: if metadataUrl['type'] == 'FGDC': metadataUrl['metadata'] = Metadata(doc) if metadataUrl['type'] == 'TC211': metadataUrl['metadata'] = MD_Metadata(doc) except Exception: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl) # DataURLs self.dataUrls = [] for m in elem.findall('DataURL'): dataUrl = { 'format': m.find('Format').text.strip(), 'url': m.find('OnlineResource'). attrib['{http://www.w3.org/1999/xlink}href'] } self.dataUrls.append(dataUrl) self.layers = [] for child in elem.findall('Layer'): self.layers.append(ContentMetadata(child, self))
def from_esa_iso_xml(self, esa_xml: bytes, inspire_xml: bytes, collections: list, ows_url: str, stac_id: str) -> str: mcf = deepcopy(self.mcf) exml = etree.fromstring(esa_xml) ixml = etree.fromstring(inspire_xml) product_type = exml.xpath('//PRODUCT_TYPE/text()')[0] m = MD_Metadata(ixml) product_manifest = exml.xpath('//PRODUCT_URI/text()')[0] product_manifest_link = urljoin(self.base_url, product_manifest) if stac_id: mcf['metadata']['identifier'] = stac_id else: mcf['metadata']['identifier'] = product_manifest mcf['metadata']['hierarchylevel'] = m.hierarchy or 'dataset' mcf['metadata']['datestamp'] = exml.xpath( '//Product_Info/GENERATION_TIME/text()')[0] if product_type in collections: mcf['metadata']['parentidentifier'] = product_type gfp = exml.xpath('//Global_Footprint/EXT_POS_LIST/text()')[0].split() minx = gfp[1] miny = gfp[0] maxx = gfp[5] maxy = gfp[4] mcf['identification']['extents'] = { 'spatial': [{ 'bbox': [minx, miny, maxx, maxy], 'crs': 4326 }], 'temporal': [{ 'begin': exml.xpath('//Product_Info/PRODUCT_START_TIME/text()')[0], 'end': exml.xpath('//Product_Info/PRODUCT_STOP_TIME/text()')[0] }] } mcf['identification']['title'] = product_manifest mcf['identification']['abstract'] = product_manifest mcf['identification']['dates'] = { 'creation': mcf['metadata']['datestamp'], 'publication': mcf['metadata']['datestamp'] } for i, kws in enumerate(m.identification.keywords): kw_set = f'kw{i}' mcf['identification']['keywords'][kw_set] = { 'keywords': kws['keywords'] } mcf['identification']['keywords'][kw_set][ 'keywords_type'] = kws['type'] or 'theme' keyword_xpaths = { 'eo:productType': '//PRODUCT_TYPE/text()', 'eo:orbitNumber': '//SENSING_ORBIT_NUMBER/text()', 'eo:orbitDirection': '//SENSING_ORBIT_DIRECTION/text()', 'eo:snowCover': '//SNOW_ICE_PERCENTAGE/text()' } mcf['identification']['keywords']['product'] = { 'keywords': [], 'keywords_type': 'theme' } for key, value in keyword_xpaths.items(): if len(exml.xpath(value)) > 0: keyword = value[0] mcf['identification']['keywords']['product'][ 'keywords'].append(f"{key}:{keyword}") mcf['identification']['topiccategory'] = [ m.identification.topiccategory[0] ] mcf['identification']['status'] = 'onGoing' mcf['identification']['maintenancefrequency'] = 'continual' mcf['identification'][ 'accessconstraints'] = m.identification.accessconstraints[0] if len(exml.xpath('//Cloud_Coverage_Assessment/text()')) > 0: mcf['content_info']['cloud_cover'] = exml.xpath( '//Cloud_Coverage_Assessment/text()')[0] mcf['content_info']['processing_level'] = exml.xpath( '//PROCESSING_LEVEL/text()')[0] for d in exml.xpath( '//Spectral_Information_List/Spectral_Information'): mcf['content_info']['dimensions'].append({ 'name': d.attrib.get('physicalBand'), 'units': d.xpath('//CENTRAL')[0].attrib.get('unit'), 'min': d.xpath('//MIN/text()')[0], 'max': d.xpath('//MAX/text()')[0] }) mcf['distribution'][product_manifest] = { 'url': self.base_url, 'type': 'enclosure', 'name': 'product', 'description': 'product', 'function': 'download' } product_format = exml.xpath('//Granule_List/Granule/@imageFormat')[0] if product_format == 'JPEG2000': mime_type = 'image/jp2' file_extension = 'jp2' elif product_format == 'TIFF': mime_type = 'image/x.geotiff' file_extension = 'tif' else: logger.warning(f'unknown product format: {product_format}') mime_type = 'NA' file_extension = 'NA' for image_file in exml.xpath( '//Product_Organisation//IMAGE_FILE/text()'): dist = { 'url': urljoin(product_manifest_link, f'{image_file}.{file_extension}'), 'type': mime_type, 'name': 'granule', 'description': 'granule', 'function': 'download' } mcf['distribution'][image_file] = dist logger.debug('Adding WMS/WCS links') wms_link_params = { 'service': 'WMS', 'version': '1.3.0', 'request': 'GetCapabilities', 'cql': f'identifier="{product_manifest}"' } mcf['distribution']['wms_link'] = { 'url': f'{ows_url}?{urlencode(wms_link_params)}', 'type': 'OGC:WMS', 'name': product_manifest, 'description': f'WMS URL for {product_manifest}', } wcs_link_params = { 'service': 'WCS', 'version': '2.0.1', 'request': 'DescribeEOCoverageSet', 'eoid': product_manifest } mcf['distribution']['wcs_link'] = { 'url': f'{ows_url}?{urlencode(wcs_link_params)}', 'type': 'OGC:WCS', 'name': product_manifest, 'description': f'WCS URL for {product_manifest}', } mcf['acquisition'] = { 'platforms': [{ 'identifier': exml.xpath('//SPACECRAFT_NAME/text()')[0], 'description': exml.xpath('//SPACECRAFT_NAME/text()')[0], 'instruments': [{ 'identifier': exml.xpath('//DATATAKE_TYPE/text()')[0], 'type': product_type }] }] } logger.debug(f'MCF: {mcf}') iso_os = ISO19139_2OutputSchema() return iso_os.write(mcf)
def __init__(self, elem, parent, parse_remote_metadata=False, timeout=30): """.""" self.id = testXMLValue(elem.find(nspath('Name'))) self.title = testXMLValue(elem.find(nspath('Title'))) self.abstract = testXMLValue(elem.find(nspath('Abstract'))) self.keywords = [f.text for f in elem.findall(nspath('Keywords'))] # bboxes self.boundingBox = None b = elem.find(nspath('LatLongBoundingBox')) srs = elem.find(nspath('SRS')) if b is not None: self.boundingBox = (float(b.attrib['minx']), float( b.attrib['miny']), float(b.attrib['maxx']), float(b.attrib['maxy']), Crs(srs.text)) # transform wgs84 bbox from given default bboxt self.boundingBoxWGS84 = None if b is not None and srs is not None: wgs84 = pyproj.Proj(init="epsg:4326") try: src_srs = pyproj.Proj(init=srs.text) mincorner = pyproj.transform(src_srs, wgs84, b.attrib['minx'], b.attrib['miny']) maxcorner = pyproj.transform(src_srs, wgs84, b.attrib['maxx'], b.attrib['maxy']) self.boundingBoxWGS84 = (mincorner[0], mincorner[1], maxcorner[0], maxcorner[1]) except RuntimeError as e: pass # crs options self.crsOptions = [ Crs(srs.text) for srs in elem.findall(nspath('SRS')) ] # verbs self.verbOptions = [op.tag for op \ in parent.findall(nspath('Operations/*'))] self.verbOptions + [op.tag for op \ in elem.findall(nspath('Operations/*')) \ if op.tag not in self.verbOptions] #others not used but needed for iContentMetadata harmonisation self.styles = None self.timepositions = None self.defaulttimeposition = None # MetadataURLs self.metadataUrls = [] for m in elem.findall(nspath('MetadataURL')): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': testXMLValue(m.find('Format')), 'url': testXMLValue(m) } if metadataUrl[ 'url'] is not None and parse_remote_metadata: # download URL try: content = openURL(metadataUrl['url'], timeout=timeout) doc = etree.parse(content) if metadataUrl['type'] is not None: if metadataUrl['type'] == 'FGDC': metadataUrl['metadata'] = Metadata(doc) if metadataUrl['type'] == 'TC211': metadataUrl['metadata'] = MD_Metadata(doc) except Exception: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl)
def import_(self, metadata: str) -> dict: """ Import metadata into MCF :param metadata: string of metadata content :returns: `dict` of MCF content """ mcf = { 'mcf': { 'version': '1.0', }, 'metadata': {}, 'identification': {}, 'contact': {}, 'distribution': {} } LOGGER.debug('Parsing ISO metadata') try: m = MD_Metadata(etree.fromstring(metadata)) except ValueError: m = MD_Metadata(etree.fromstring(bytes(metadata, 'utf-8'))) LOGGER.debug('Setting metadata') mcf['metadata']['identifier'] = m.identifier mcf['metadata']['hierarchylevel'] = m.hierarchy mcf['metadata']['datestamp'] = m.datestamp LOGGER.debug('Setting identification') mcf['identification']['title'] = m.identification.title mcf['identification']['abstract'] = m.identification.abstract if m.identification.date: mcf['identification']['dates'] = {} for date_ in m.identification.date: mcf['identification']['dates'][date_.type] = date_.date if m.identification.keywords2: mcf['identification']['keywords'] = {} for count, value in enumerate(m.identification.keywords2): key = f'keywords-{count}' mcf['identification']['keywords'][key] = { 'type': value.type, 'keywords': value.keywords } mcf['identification'][ 'topiccategory'] = m.identification.topiccategory # noqa mcf['identification']['extents'] = { 'spatial': [{ 'bbox': [ ast.literal_eval(m.identification.extent.boundingBox.minx), ast.literal_eval(m.identification.extent.boundingBox.miny), ast.literal_eval(m.identification.extent.boundingBox.maxx), ast.literal_eval(m.identification.extent.boundingBox.maxy) ] }], 'temporal': [] } temp_extent = {} if m.identification.temporalextent_start: temp_extent['begin'] = m.identification.temporalextent_start if m.identification.temporalextent_end: temp_extent['end'] = m.identification.temporalextent_end mcf['identification']['extents']['temporal'].append(temp_extent) if m.identification.accessconstraints: mcf['identification'][ 'accessconstraints'] = m.identification.accessconstraints[ 0] # noqa mcf['identification']['status'] = m.identification.status LOGGER.debug('Setting contact') if m.contact: for c in m.contact: mcf['contact'].update(get_contact(c)) if m.distribution.distributor: for d in m.distribution.distributor: mcf['contact'].update(get_contact(d.contact)) LOGGER.debug('Setting distribution') if m.distribution: for count, value in enumerate(m.distribution.online): key = f'link-{count}' mcf['distribution'][key] = get_link(value) return mcf
else: # xml text_bag = [] if isinstance(bag, (bytes, str)): # serialize to lxml bag = etree.fromstring(bag) for t in bag.xpath('//gco:CharacterString', namespaces=namespaces): if t.text is not None: text_bag.append(t.text.strip()) return ' '.join(text_bag) for xml_file in glob('{}/*.xml'.format(xml_dir)): m = MD_Metadata(etree.parse(xml_file)) _raw_metadata = m.xml.decode('utf-8') _anytext = get_anytext(_raw_metadata) identifier = m.identifier type_ = m.hierarchy title = m.identification.title description = m.identification.abstract contact = m.identification.contact issued = m.datestamp links = [] if m.distribution and m.distribution.online: for ln in m.distribution.online:
def _parse_iso(context, repos, exml): from owslib.iso import MD_Metadata recobj = repos.dataset() links = [] md = MD_Metadata(exml) _set(context, recobj, 'pycsw:Identifier', md.identifier) _set(context, recobj, 'pycsw:Typename', 'gmd:MD_Metadata') _set(context, recobj, 'pycsw:Schema', context.namespaces['gmd']) _set(context, recobj, 'pycsw:MdSource', 'local') _set(context, recobj, 'pycsw:InsertDate', util.get_today_and_now()) _set(context, recobj, 'pycsw:XML', md.xml) _set(context, recobj, 'pycsw:AnyText', util.get_anytext(exml)) _set(context, recobj, 'pycsw:Language', md.language) _set(context, recobj, 'pycsw:Type', md.hierarchy) _set(context, recobj, 'pycsw:ParentIdentifier', md.parentidentifier) _set(context, recobj, 'pycsw:Date', md.datestamp) _set(context, recobj, 'pycsw:Modified', md.datestamp) _set(context, recobj, 'pycsw:Source', md.dataseturi) if md.referencesystem is not None: _set(context, recobj, 'pycsw:CRS', 'urn:ogc:def:crs:EPSG:6.11:%s' % md.referencesystem.code) if hasattr(md, 'identification'): _set(context, recobj, 'pycsw:Title', md.identification.title) _set(context, recobj, 'pycsw:AlternateTitle', md.identification.alternatetitle) _set(context, recobj, 'pycsw:Abstract', md.identification.abstract) _set(context, recobj, 'pycsw:Relation', md.identification.aggregationinfo) if hasattr(md.identification, 'temporalextent_start'): _set(context, recobj, 'pycsw:TempExtent_begin', md.identification.temporalextent_start) if hasattr(md.identification, 'temporalextent_end'): _set(context, recobj, 'pycsw:TempExtent_end', md.identification.temporalextent_end) if len(md.identification.topiccategory) > 0: _set(context, recobj, 'pycsw:TopicCategory', md.identification.topiccategory[0]) if len(md.identification.resourcelanguage) > 0: _set(context, recobj, 'pycsw:ResourceLanguage', md.identification.resourcelanguage[0]) if hasattr(md.identification, 'bbox'): bbox = md.identification.bbox else: bbox = None if (hasattr(md.identification, 'keywords') and len(md.identification.keywords) > 0): all_keywords = [ item for sublist in md.identification.keywords for item in sublist['keywords'] if item is not None ] _set(context, recobj, 'pycsw:Keywords', ','.join(all_keywords)) _set(context, recobj, 'pycsw:KeywordType', md.identification.keywords[0]['type']) if hasattr(md.identification, 'creator'): _set(context, recobj, 'pycsw:Creator', md.identification.creator) if hasattr(md.identification, 'publisher'): _set(context, recobj, 'pycsw:Publisher', md.identification.publisher) if hasattr(md.identification, 'contributor'): _set(context, recobj, 'pycsw:Contributor', md.identification.contributor) if (hasattr(md.identification, 'contact') and hasattr(md.identification.contact, 'organization')): _set(context, recobj, 'pycsw:OrganizationName', md.identification.contact.organization) if len(md.identification.securityconstraints) > 0: _set(context, recobj, 'pycsw:SecurityConstraints', md.identification.securityconstraints[0]) if len(md.identification.accessconstraints) > 0: _set(context, recobj, 'pycsw:AccessConstraints', md.identification.accessconstraints[0]) if len(md.identification.otherconstraints) > 0: _set(context, recobj, 'pycsw:OtherConstraints', md.identification.otherconstraints[0]) if hasattr(md.identification, 'date'): for datenode in md.identification.date: if datenode.type == 'revision': _set(context, recobj, 'pycsw:RevisionDate', datenode.date) elif datenode.type == 'creation': _set(context, recobj, 'pycsw:CreationDate', datenode.date) elif datenode.type == 'publication': _set(context, recobj, 'pycsw:PublicationDate', datenode.date) if hasattr(md.identification, 'extent') and hasattr( md.identification.extent, 'description_code'): _set(context, recobj, 'pycsw:GeographicDescriptionCode', md.identification.extent.description_code) if len(md.identification.denominators) > 0: _set(context, recobj, 'pycsw:Denominator', md.identification.denominators[0]) if len(md.identification.distance) > 0: _set(context, recobj, 'pycsw:DistanceValue', md.identification.distance[0]) if len(md.identification.uom) > 0: _set(context, recobj, 'pycsw:DistanceUOM', md.identification.uom[0]) if len(md.identification.classification) > 0: _set(context, recobj, 'pycsw:Classification', md.identification.classification[0]) if len(md.identification.uselimitation) > 0: _set(context, recobj, 'pycsw:ConditionApplyingToAccessAndUse', md.identification.uselimitation[0]) if hasattr(md.identification, 'format'): _set(context, recobj, 'pycsw:Format', md.distribution.format) if md.serviceidentification is not None: _set(context, recobj, 'pycsw:ServiceType', md.serviceidentification.type) _set(context, recobj, 'pycsw:ServiceTypeVersion', md.serviceidentification.version) _set(context, recobj, 'pycsw:CouplingType', md.serviceidentification.couplingtype) #if len(md.serviceidentification.operateson) > 0: # _set(context, recobj, 'pycsw:operateson = VARCHAR(32), #_set(context, recobj, 'pycsw:operation VARCHAR(32), #_set(context, recobj, 'pycsw:operatesonidentifier VARCHAR(32), #_set(context, recobj, 'pycsw:operatesoname VARCHAR(32), if hasattr(md.identification, 'dataquality'): _set(context, recobj, 'pycsw:Degree', md.dataquality.conformancedegree) _set(context, recobj, 'pycsw:Lineage', md.dataquality.lineage) _set(context, recobj, 'pycsw:SpecificationTitle', md.dataquality.specificationtitle) if hasattr(md.dataquality, 'specificationdate'): _set(context, recobj, 'pycsw:specificationDate', md.dataquality.specificationdate[0].date) _set(context, recobj, 'pycsw:SpecificationDateType', md.dataquality.specificationdate[0].datetype) if hasattr(md, 'contact') and len(md.contact) > 0: _set(context, recobj, 'pycsw:ResponsiblePartyRole', md.contact[0].role) LOGGER.info('Scanning for links') if hasattr(md, 'distribution'): dist_links = [] if hasattr(md.distribution, 'online'): LOGGER.debug('Scanning for gmd:transferOptions element(s)') dist_links.extend(md.distribution.online) if hasattr(md.distribution, 'distributor'): LOGGER.debug( 'Scanning for gmd:distributorTransferOptions element(s)') for dist_member in md.distribution.distributor: dist_links.extend(dist_member.online) for link in dist_links: if link.url is not None and link.protocol is None: # take a best guess link.protocol = sniff_link(link.url) linkstr = '%s,%s,%s,%s' % \ (link.name, link.description, link.protocol, link.url) links.append(linkstr) try: LOGGER.debug('Scanning for srv:SV_ServiceIdentification links') for sident in md.identificationinfo: if hasattr(sident, 'operations'): for sops in sident.operations: for scpt in sops['connectpoint']: LOGGER.debug('adding srv link %s', scpt.url) linkstr = '%s,%s,%s,%s' % \ (scpt.name, scpt.description, scpt.protocol, scpt.url) links.append(linkstr) except Exception, err: # srv: identification does not exist LOGGER.debug('no srv:SV_ServiceIdentification links found')
def __init__(self, elem, parent, parse_remote_metadata=False, timeout=30): """.""" self.id = testXMLValue(elem.find(nspath('Name'))) self.title = testXMLValue(elem.find(nspath('Title'))) self.abstract = testXMLValue(elem.find(nspath('Abstract'))) self.keywords = [f.text for f in elem.findall(nspath('Keywords'))] # bboxes self.boundingBox = None b = elem.find(nspath('BoundingBox')) if b is not None: self.boundingBox = (float(b.attrib['minx']), float( b.attrib['miny']), float(b.attrib['maxx']), float(b.attrib['maxy']), b.attrib['SRS']) self.boundingBoxWGS84 = None b = elem.find(nspath('LatLongBoundingBox')) if b is not None: self.boundingBoxWGS84 = ( float(b.attrib['minx']), float(b.attrib['miny']), float(b.attrib['maxx']), float(b.attrib['maxy']), ) # crs options self.crsOptions = [ Crs(srs.text) for srs in elem.findall(nspath('SRS')) ] # verbs self.verbOptions = [op.tag for op \ in parent.findall(nspath('Operations/*'))] self.verbOptions + [op.tag for op \ in elem.findall(nspath('Operations/*')) \ if op.tag not in self.verbOptions] #others not used but needed for iContentMetadata harmonisation self.styles = None self.timepositions = None self.defaulttimeposition = None # MetadataURLs self.metadataUrls = [] for m in elem.findall(nspath('MetadataURL')): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': testXMLValue(m.find('Format')), 'url': testXMLValue(m) } if metadataUrl[ 'url'] is not None and parse_remote_metadata: # download URL try: content = urlopen(metadataUrl['url'], timeout=timeout) doc = etree.parse(content) if metadataUrl['type'] is not None: if metadataUrl['type'] == 'FGDC': metadataUrl['metadata'] = Metadata(doc) if metadataUrl['type'] == 'TC211': metadataUrl['metadata'] = MD_Metadata(doc) except Exception, err: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl)
def test_md_parsing_dov(): """Test the parsing of a metadatarecord from DOV GetRecordById response available in tests/resources/csw_dov_getrecordbyid.xml """ md_resource = get_md_resource('tests/resources/csw_dov_getrecordbyid.xml') md = MD_Metadata(md_resource) assert type(md) is MD_Metadata assert md.identifier == '6c39d716-aecc-4fbc-bac8-4f05a49a78d5' assert md.dataseturi is None assert md.parentidentifier is None assert md.language is None assert md.languagecode == 'dut' assert md.charset == 'utf8' assert md.datestamp == '2018-02-21T16:14:24' assert md.hierarchy == 'dataset' assert_list(md.contact, 1) contact = md.contact[0] assert contact.organization == 'Vlaamse overheid - Vlaamse ' \ 'MilieuMaatschappij - Afdeling ' \ 'Operationeel Waterbeheer' assert contact.address == 'Koning Albert II-laan 20 bus 16' assert contact.city == 'Brussel' assert contact.postcode == '1000' assert contact.country == u'België' assert contact.email == '*****@*****.**' assert contact.onlineresource.url == 'https://www.vmm.be' assert contact.role == 'pointOfContact' assert md.stdname == 'ISO 19115/2003/Cor.1:2006' assert md.stdver == 'GDI-Vlaanderen Best Practices - versie 1.0' assert md.referencesystem.code == '31370' assert md.referencesystem.codeSpace == 'EPSG' assert_list(md.identificationinfo, 1) iden = md.identificationinfo[0] assert iden.title == 'Grondwatermeetnetten' assert iden.alternatetitle == 'Grondwatermeetnetten beschikbaar op DOV' assert_list(iden.date, 2) assert iden.date[0].date == '2002-05-22' assert iden.date[0].type == 'creation' assert iden.date[1].date == '2002-05-22' assert iden.date[1].type == 'publication' assert_list(iden.uricode, 1) assert iden.uricode[0] == 'A64F073B-9FBE-91DD-36FDE7462BBAFA61' assert_list(iden.uricodespace, 1) assert iden.uricodespace[0] == 'DOV-be' assert_list(iden.uselimitation, 3) assert "Zie 'Overige beperkingen'" in iden.uselimitation assert "Bij het gebruik van de informatie die DOV aanbiedt, dient steeds " \ "volgende standaardreferentie gebruikt te worden: Databank " \ "Ondergrond Vlaanderen - (vermelding van de beheerder en de " \ "specifieke geraadpleegde gegevens) - Geraadpleegd op dd/mm/jjjj, " \ "op https://www.dov.vlaanderen.be" in iden.uselimitation assert "Volgende aansprakelijkheidsbepalingen gelden: " \ "https://www.dov.vlaanderen.be/page/disclaimer" in iden.uselimitation assert_list(iden.uselimitation_url, 0) assert_list(iden.accessconstraints, 1) assert iden.accessconstraints[0] == 'otherRestrictions' assert_list(iden.classification, 0) assert_list(iden.otherconstraints, 1) assert iden.otherconstraints[ 0] == "Data beschikbaar voor hergebruik volgens de " \ "Modellicentie Gratis Hergebruik. Toelichting " \ "beschikbaar op " \ "https://www.dov.vlaanderen.be/page/gebruiksvoorwaarden-dov-services" assert_list(iden.securityconstraints, 1) assert iden.securityconstraints[0] == 'unclassified' assert_list(iden.useconstraints, 0) assert_list(iden.denominators, 1) assert iden.denominators[0] == '10000' assert_list(iden.distance, 0) assert_list(iden.uom, 0) assert_list(iden.resourcelanguage, 0) assert_list(iden.resourcelanguagecode, 1) assert iden.resourcelanguagecode[0] == 'dut' assert_list(iden.creator, 0) assert_list(iden.publisher, 0) assert_list(iden.contributor, 0) assert iden.edition is None assert iden.abstract.startswith("In de Databank Ondergrond Vlaanderen " "zijn verschillende grondwatermeetnetten " "opgenomen.") assert iden.purpose.startswith( "Het doel van de meetnetten is inzicht krijgen in de kwaliteit en " "kwantiteit van de watervoerende lagen in de ondergrond van " "Vlaanderen. Algemeen kan gesteld worden dat de grondwatermeetnetten " "een belangrijk beleidsinstrument vormen") assert iden.status == 'onGoing' assert_list(iden.contact, 2) assert iden.contact[ 0].organization == 'Vlaamse overheid - Vlaamse MilieuMaatschappij - Afdeling Operationeel Waterbeheer' assert iden.contact[0].address == 'Koning Albert II-laan 20 bus 16' assert iden.contact[0].city == 'Brussel' assert iden.contact[0].postcode == '1000' assert iden.contact[0].country == u'België' assert iden.contact[0].email == '*****@*****.**' assert iden.contact[0].onlineresource.url == 'https://www.vmm.be' assert iden.contact[0].role == 'pointOfContact' assert iden.contact[1].organization == 'Databank Ondergrond Vlaanderen (' \ 'DOV)' assert iden.contact[1].address == 'Technologiepark Gebouw 905' assert iden.contact[1].city == 'Zwijnaarde' assert iden.contact[1].postcode == '9052' assert iden.contact[1].country == u'België' assert iden.contact[1].email == '*****@*****.**' assert iden.contact[1].onlineresource.url == \ 'https://www.dov.vlaanderen.be' assert iden.contact[1].role == 'distributor' assert_list(iden.spatialrepresentationtype, 1) assert iden.spatialrepresentationtype[0] == 'vector' assert_list(iden.keywords, 5) assert type(iden.keywords[0]) is dict assert iden.keywords[0]['type'] == '' assert iden.keywords[0]['thesaurus'][ 'title'] == "GEMET - INSPIRE thema's, versie 1.0" assert iden.keywords[0]['thesaurus']['date'] == '2008-06-01' assert iden.keywords[0]['thesaurus']['datetype'] == 'publication' assert_list(iden.keywords[0]['keywords'], 1) assert iden.keywords[0]['keywords'] == ['Geologie'] assert type(iden.keywords[1]) is dict assert iden.keywords[1]['type'] == '' assert iden.keywords[1]['thesaurus'][ 'title'] == "GEMET - Concepten, versie 2.4" assert iden.keywords[1]['thesaurus']['date'] == '2010-01-13' assert iden.keywords[1]['thesaurus']['datetype'] == 'publication' assert_list(iden.keywords[1]['keywords'], 2) assert iden.keywords[1]['keywords'] == ['grondwater', 'meetnet(werk)'] assert type(iden.keywords[2]) is dict assert iden.keywords[2]['type'] == '' assert iden.keywords[2]['thesaurus']['title'] == "Vlaamse regio's" assert iden.keywords[2]['thesaurus']['date'] == '2013-09-25' assert iden.keywords[2]['thesaurus']['datetype'] == 'publication' assert_list(iden.keywords[2]['keywords'], 1) assert iden.keywords[2]['keywords'] == ['Vlaams Gewest'] assert type(iden.keywords[3]) is dict assert iden.keywords[3]['type'] is None assert iden.keywords[3]['thesaurus'][ 'title'] == "GDI-Vlaanderen Trefwoorden" assert iden.keywords[3]['thesaurus']['date'] == '2014-02-26' assert iden.keywords[3]['thesaurus']['datetype'] == 'publication' assert_list(iden.keywords[3]['keywords'], 7) assert iden.keywords[3]['keywords'] == [ 'Toegevoegd GDI-Vl', 'Herbruikbaar', 'Vlaamse Open data', 'Kosteloos', 'Lijst M&R INSPIRE', 'Metadata INSPIRE-conform', 'Metadata GDI-Vl-conform' ] assert type(iden.keywords[4]) is dict assert iden.keywords[4]['type'] is None assert iden.keywords[4]['thesaurus']['title'] == "DOV" assert iden.keywords[4]['thesaurus']['date'] == '2010-12-01' assert iden.keywords[4]['thesaurus']['datetype'] == 'publication' assert_list(iden.keywords[4]['keywords'], 7) assert iden.keywords[4]['keywords'] == [ 'Ondergrond', 'DOV', 'Vlaanderen', 'monitoring', 'meetnetten', 'Kaderrichtlijn Water', 'Decreet Integraal waterbeleid' ] assert_list(iden.keywords2, 5) assert iden.keywords2[0].type == '' assert iden.keywords2[0].thesaurus[ 'title'] == "GEMET - INSPIRE thema's, versie 1.0" assert iden.keywords2[0].thesaurus['date'] == '2008-06-01' assert iden.keywords2[0].thesaurus['datetype'] == 'publication' assert_list(iden.keywords2[0].keywords, 1) assert iden.keywords2[0].keywords == ['Geologie'] assert iden.keywords2[1].type == '' assert iden.keywords2[1].thesaurus[ 'title'] == "GEMET - Concepten, versie 2.4" assert iden.keywords2[1].thesaurus['date'] == '2010-01-13' assert iden.keywords2[1].thesaurus['datetype'] == 'publication' assert_list(iden.keywords2[1].keywords, 2) assert iden.keywords2[1].keywords == ['grondwater', 'meetnet(werk)'] assert iden.keywords2[2].type == '' assert iden.keywords2[2].thesaurus['title'] == "Vlaamse regio's" assert iden.keywords2[2].thesaurus['date'] == '2013-09-25' assert iden.keywords2[2].thesaurus['datetype'] == 'publication' assert_list(iden.keywords2[2].keywords, 1) assert iden.keywords2[2].keywords == ['Vlaams Gewest'] assert iden.keywords2[3].type is None assert iden.keywords2[3].thesaurus['title'] == "GDI-Vlaanderen Trefwoorden" assert iden.keywords2[3].thesaurus['date'] == '2014-02-26' assert iden.keywords2[3].thesaurus['datetype'] == 'publication' assert_list(iden.keywords2[3].keywords, 7) assert iden.keywords2[3].keywords == [ 'Toegevoegd GDI-Vl', 'Herbruikbaar', 'Vlaamse Open data', 'Kosteloos', 'Lijst M&R INSPIRE', 'Metadata INSPIRE-conform', 'Metadata GDI-Vl-conform' ] assert iden.keywords2[4].type is None assert iden.keywords2[4].thesaurus['title'] == "DOV" assert iden.keywords2[4].thesaurus['date'] == '2010-12-01' assert iden.keywords2[4].thesaurus['datetype'] == 'publication' assert_list(iden.keywords2[4].keywords, 7) assert iden.keywords2[4].keywords == [ 'Ondergrond', 'DOV', 'Vlaanderen', 'monitoring', 'meetnetten', 'Kaderrichtlijn Water', 'Decreet Integraal waterbeleid' ] assert_list(iden.topiccategory, 1) assert iden.topiccategory[0] == 'geoscientificInformation' assert iden.supplementalinformation == \ "https://www.dov.vlaanderen.be/page/grondwatermeetnet" assert_list(md.contentinfo, 1) ci = md.contentinfo[0] assert ci.compliancecode is None assert_list(ci.language, 0) assert ci.includedwithdataset == True assert_list(ci.featuretypenames, 0) assert_list(ci.featurecatalogues, 1) assert ci.featurecatalogues[0] == 'b142965f-b2aa-429e-86ff-a7cb0e065d48'
def test_md_parsing_geobretagne(): """Test the parsing of a metadatarecord from GéoBretagne MD_Metadata record available in tests/resources/csw_geobretagne_mdmetadata.xml """ md_resource = get_md_resource( 'tests/resources/csw_geobretagne_mdmetadata.xml') md = MD_Metadata(md_resource) assert type(md) is MD_Metadata assert md.identifier == '955c3e47-411e-4969-b61b-3556d1b9f879' assert md.dataseturi is None assert md.parentidentifier is None assert md.language == 'fre' assert md.languagecode is None assert md.charset == 'utf8' assert md.datestamp == '2018-07-30T14:19:40' assert md.hierarchy == 'dataset' assert_list(md.contact, 1) contact = md.contact[0] assert contact.organization == 'DIRECTION GENERALE DES FINANCES ' \ 'PUBLIQUES BUREAU GF-3A' assert contact.address is None assert contact.city is None assert contact.postcode is None assert contact.country is None assert contact.email == '*****@*****.**' assert contact.onlineresource is None assert contact.role == 'pointOfContact' assert md.stdname == 'ISO 19115' assert md.stdver == '1.0' assert md.referencesystem.code == 'RGF93 / CC48 (EPSG:3948)' assert md.referencesystem.codeSpace == 'EPSG' assert_list(md.identificationinfo, 1) iden = md.identificationinfo[0] assert iden.title == 'Cadastre 2018 en Bretagne' assert iden.alternatetitle is None assert_list(iden.date, 1) assert iden.date[0].date == '2018-09-01' assert iden.date[0].type == 'revision' assert_list(iden.uricode, 1) assert iden.uricode[ 0] == 'https://geobretagne.fr/geonetwork/apps/georchestra/?uuid=363e3a8e-d0ce-497d-87a9-2a2d58d82772' assert_list(iden.uricodespace, 0) assert_list(iden.uselimitation, 2) assert u"le plan cadastral décrit les limites apparentes de la " \ u"propriété." in iden.uselimitation assert_list(iden.uselimitation_url, 0) assert_list(iden.accessconstraints, 1) assert iden.accessconstraints[0] == 'otherRestrictions' assert_list(iden.classification, 0) assert_list(iden.otherconstraints, 1) assert iden.otherconstraints[ 0] == u'Usage libre sous réserve des mentions obligatoires ' \ u'sur tout document de diffusion : "Source : DGFIP"' assert_list(iden.securityconstraints, 0) assert_list(iden.useconstraints, 1) assert iden.useconstraints[0] == 'copyright' assert_list(iden.denominators, 1) assert iden.denominators[0] == '500' assert_list(iden.distance, 0) assert_list(iden.uom, 0) assert_list(iden.resourcelanguage, 1) assert iden.resourcelanguage[0] == 'fre' assert_list(iden.resourcelanguagecode, 0) assert_list(iden.creator, 0) assert_list(iden.publisher, 0) assert_list(iden.contributor, 0) assert iden.edition is None assert iden.abstract.startswith( u"Le plan du cadastre est un document administratif qui propose " u"l’unique plan parcellaire à grande échelle couvrant le territoire " u"national.") assert iden.purpose.startswith( u"Le but premier du plan cadastral est d'identifier, de localiser et " u"représenter la propriété foncière, ainsi que de servir à l'assise " u"de la fiscalité locale des propriétés non bâties.") assert iden.status == 'completed' assert_list(iden.contact, 1) assert iden.contact[0].organization == 'DGFIP Bretagne' assert iden.contact[0].name == 'DIRECTION GENERALE DES FINANCES PUBLIQUES' assert iden.contact[0].address is None assert iden.contact[0].city is None assert iden.contact[0].postcode is None assert iden.contact[0].country is None assert iden.contact[0].email == '*****@*****.**' assert iden.contact[0].onlineresource is None assert iden.contact[0].role == 'pointOfContact' assert_list(iden.spatialrepresentationtype, 1) assert iden.spatialrepresentationtype[0] == 'vector' assert_list(iden.keywords, 7) assert type(iden.keywords[0]) is dict assert iden.keywords[0]['type'] == 'place' assert iden.keywords[0]['thesaurus']['title'] is None assert iden.keywords[0]['thesaurus']['date'] is None assert iden.keywords[0]['thesaurus']['datetype'] is None assert_list(iden.keywords[0]['keywords'], 1) assert iden.keywords[0]['keywords'] == ['France'] assert type(iden.keywords[1]) is dict assert iden.keywords[1]['type'] is None assert iden.keywords[1]['thesaurus']['title'] is None assert iden.keywords[1]['thesaurus']['date'] is None assert iden.keywords[1]['thesaurus']['datetype'] is None assert_list(iden.keywords[1]['keywords'], 0) assert type(iden.keywords[2]) is dict assert iden.keywords[2]['type'] == 'theme' assert iden.keywords[2]['thesaurus']['title'] is None assert iden.keywords[2]['thesaurus']['date'] is None assert iden.keywords[2]['thesaurus']['datetype'] is None assert_list(iden.keywords[2]['keywords'], 7) assert iden.keywords[2]['keywords'] == [ u'bâtiments', 'adresses', 'parcelles cadastrales', 'hydrographie', u'réseaux de transport', u'unités administratives', u'référentiels de coordonnées' ] assert type(iden.keywords[3]) is dict assert iden.keywords[3]['type'] == 'theme' assert iden.keywords[3]['thesaurus']['title'] is None assert iden.keywords[3]['thesaurus']['date'] is None assert iden.keywords[3]['thesaurus']['datetype'] is None assert_list(iden.keywords[3]['keywords'], 5) assert iden.keywords[3]['keywords'] == [ u'bâtis', 'sections', 'parcelles', 'cadastre', 'cadastrale' ] assert type(iden.keywords[4]) is dict assert iden.keywords[4]['type'] == 'theme' assert iden.keywords[4]['thesaurus']['title'] == u"GéoBretagne v 2.0" assert iden.keywords[4]['thesaurus']['date'] == '2014-01-13' assert iden.keywords[4]['thesaurus']['datetype'] == 'publication' assert_list(iden.keywords[4]['keywords'], 1) assert iden.keywords[4]['keywords'] == [u'référentiels : cadastre'] assert type(iden.keywords[5]) is dict assert iden.keywords[5]['type'] == 'theme' assert iden.keywords[5]['thesaurus']['title'] == "INSPIRE themes" assert iden.keywords[5]['thesaurus']['date'] == '2008-06-01' assert iden.keywords[5]['thesaurus']['datetype'] == 'publication' assert_list(iden.keywords[5]['keywords'], 1) assert iden.keywords[5]['keywords'] == ['Parcelles cadastrales'] assert type(iden.keywords[6]) is dict assert iden.keywords[6]['type'] == 'theme' assert iden.keywords[6]['thesaurus']['title'] == "GEMET" assert iden.keywords[6]['thesaurus']['date'] == '2012-07-20' assert iden.keywords[6]['thesaurus']['datetype'] == 'publication' assert_list(iden.keywords[6]['keywords'], 2) assert iden.keywords[6]['keywords'] == ['cadastre', u'bâtiment'] assert_list(iden.keywords2, 6) assert iden.keywords2[0].type == 'place' assert iden.keywords2[0].thesaurus is None assert_list(iden.keywords2[0].keywords, 1) assert iden.keywords2[0].keywords == ['France'] assert iden.keywords2[1].type == 'theme' assert iden.keywords2[1].thesaurus is None assert_list(iden.keywords2[1].keywords, 7) assert iden.keywords2[1].keywords == [ u'bâtiments', 'adresses', 'parcelles cadastrales', 'hydrographie', u'réseaux de transport', u'unités administratives', u'référentiels de coordonnées' ] assert iden.keywords2[2].type == 'theme' assert iden.keywords2[2].thesaurus is None assert_list(iden.keywords2[2].keywords, 5) assert iden.keywords2[2].keywords == [ u'bâtis', 'sections', 'parcelles', 'cadastre', 'cadastrale' ] assert iden.keywords2[3].type == 'theme' assert iden.keywords2[3].thesaurus['title'] == u"GéoBretagne v 2.0" assert iden.keywords2[3].thesaurus['date'] == '2014-01-13' assert iden.keywords2[3].thesaurus['datetype'] == 'publication' assert_list(iden.keywords2[3].keywords, 1) assert iden.keywords2[3].keywords == [u'référentiels : cadastre'] assert iden.keywords2[4].type == 'theme' assert iden.keywords2[4].thesaurus['title'] == "INSPIRE themes" assert iden.keywords2[4].thesaurus['date'] == '2008-06-01' assert iden.keywords2[4].thesaurus['datetype'] == 'publication' assert_list(iden.keywords2[4].keywords, 1) assert iden.keywords2[4].keywords == ['Parcelles cadastrales'] assert iden.keywords2[5].type == 'theme' assert iden.keywords2[5].thesaurus['title'] == "GEMET" assert iden.keywords2[5].thesaurus['date'] == '2012-07-20' assert iden.keywords2[5].thesaurus['datetype'] == 'publication' assert_list(iden.keywords2[5].keywords, 2) assert iden.keywords2[5].keywords == ['cadastre', u'bâtiment'] assert_list(iden.topiccategory, 1) assert iden.topiccategory[0] == 'planningCadastre' assert iden.supplementalinformation == \ u"La légende du plan cadastral est consultable sur: " \ "http://www.cadastre.gouv.fr/scpc/pdf/legendes/FR_fr/Legende%20du" \ "%20plan%20sur%20internet.pdf" assert_list(md.contentinfo, 1) ci = md.contentinfo[0] assert ci.compliancecode is None assert_list(ci.language, 0) assert ci.includedwithdataset == False assert_list(ci.featuretypenames, 0) assert_list(ci.featurecatalogues, 0)
def __init__(self, elem, parent=None, children=None, index=0, parse_remote_metadata=False, timeout=30): if xmltag_split(elem.tag) != 'Layer': raise ValueError('%s should be a Layer' % (elem, )) self.parent = parent if parent: self.index = "%s.%d" % (parent.index, index) else: self.index = str(index) self._children = children self.id = self.name = testXMLValue( elem.find(nspath('Name', WMS_NAMESPACE))) # layer attributes self.queryable = int(elem.attrib.get('queryable', 0)) self.cascaded = int(elem.attrib.get('cascaded', 0)) self.opaque = int(elem.attrib.get('opaque', 0)) self.noSubsets = int(elem.attrib.get('noSubsets', 0)) self.fixedWidth = int(elem.attrib.get('fixedWidth', 0)) self.fixedHeight = int(elem.attrib.get('fixedHeight', 0)) # title is mandatory property self.title = None title = testXMLValue(elem.find(nspath('Title', WMS_NAMESPACE))) if title is not None: self.title = title.strip() self.abstract = testXMLValue( elem.find(nspath('Abstract', WMS_NAMESPACE))) # TODO: what is the preferred response to esri's handling of custom projections # in the spatial ref definitions? see http://resources.arcgis.com/en/help/main/10.1/index.html#//00sq000000m1000000 # and an example (20150812) http://maps.ngdc.noaa.gov/arcgis/services/firedetects/MapServer/WMSServer?request=GetCapabilities&service=WMS # bboxes b = elem.find(nspath('EX_GeographicBoundingBox', WMS_NAMESPACE)) self.boundingBoxWGS84 = None if b is not None: minx = b.find(nspath('westBoundLongitude', WMS_NAMESPACE)) miny = b.find(nspath('southBoundLatitude', WMS_NAMESPACE)) maxx = b.find(nspath('eastBoundLongitude', WMS_NAMESPACE)) maxy = b.find(nspath('northBoundLatitude', WMS_NAMESPACE)) box = tuple( map(float, [ minx.text if minx is not None else None, miny.text if miny is not None else None, maxx.text if maxx is not None else None, maxy.text if maxy is not None else None ])) self.boundingBoxWGS84 = tuple(box) elif self.parent: if hasattr(self.parent, 'boundingBoxWGS84'): self.boundingBoxWGS84 = self.parent.boundingBoxWGS84 # make a bbox list (of tuples) crs_list = [] for bb in elem.findall(nspath('BoundingBox', WMS_NAMESPACE)): srs_str = bb.attrib.get('CRS', None) srs = Crs(srs_str) box = tuple( map(float, [ bb.attrib['minx'], bb.attrib['miny'], bb.attrib['maxx'], bb.attrib['maxy'] ])) minx, miny, maxx, maxy = box[0], box[1], box[2], box[3] # handle the ordering so that it always # returns (minx, miny, maxx, maxy) if srs and srs.axisorder == 'yx': # reverse things minx, miny, maxx, maxy = box[1], box[0], box[3], box[2] crs_list.append(( minx, miny, maxx, maxy, srs_str, )) self.crs_list = crs_list # and maintain the original boundingBox attribute (first in list) # or the wgs84 bbox (to handle cases of incomplete parentage) self.boundingBox = crs_list[0] if crs_list else self.boundingBoxWGS84 # ScaleHint sh = elem.find(nspath('ScaleHint', WMS_NAMESPACE)) self.scaleHint = None if sh is not None: if 'min' in sh.attrib and 'max' in sh.attrib: self.scaleHint = { 'min': sh.attrib['min'], 'max': sh.attrib['max'] } attribution = elem.find(nspath('Attribution', WMS_NAMESPACE)) if attribution is not None: self.attribution = dict() title = attribution.find(nspath('Title', WMS_NAMESPACE)) url = attribution.find(nspath('OnlineResource', WMS_NAMESPACE)) logo = attribution.find(nspath('LogoURL', WMS_NAMESPACE)) if title is not None: self.attribution['title'] = title.text if url is not None: self.attribution['url'] = url.attrib[ '{http://www.w3.org/1999/xlink}href'] if logo is not None: self.attribution['logo_size'] = (int(logo.attrib['width']), int(logo.attrib['height'])) self.attribution['logo_url'] = logo.find( nspath('OnlineResource', WMS_NAMESPACE) ).attrib['{http://www.w3.org/1999/xlink}href'] # TODO: get this from the bbox attributes instead (deal with parents) # SRS options self.crsOptions = [] # Copy any parent SRS options (they are inheritable properties) if self.parent: self.crsOptions = list(self.parent.crsOptions) # Look for SRS option attached to this layer if elem.find(nspath('CRS', WMS_NAMESPACE)) is not None: # some servers found in the wild use a single SRS # tag containing a whitespace separated list of SRIDs # instead of several SRS tags. hence the inner loop for srslist in [ x.text for x in elem.findall(nspath('CRS', WMS_NAMESPACE)) ]: if srslist: for srs in srslist.split(): self.crsOptions.append(srs) # Get rid of duplicate entries self.crsOptions = list(set(self.crsOptions)) # Set self.crsOptions to None if the layer (and parents) had no SRS options if len(self.crsOptions) == 0: # raise ValueError('%s no SRS available!?' % (elem,)) # Comment by D Lowe. # Do not raise ValueError as it is possible that a layer is purely a parent layer and does not have SRS specified. Instead set crsOptions to None # Comment by Jachym: # Do not set it to None, but to [], which will make the code # work further. Fixed by anthonybaxter self.crsOptions = [] # Styles self.styles = {} # Copy any parent styles (they are inheritable properties) if self.parent: self.styles = self.parent.styles.copy() # Get the styles for this layer (items with the same name are replaced) for s in elem.findall(nspath('Style', WMS_NAMESPACE)): name = s.find(nspath('Name', WMS_NAMESPACE)) title = s.find(nspath('Title', WMS_NAMESPACE)) if name is None or title is None: raise ValueError('%s missing name or title' % (s, )) style = {'title': title.text} # legend url legend = s.find(nspath('LegendURL/OnlineResource', WMS_NAMESPACE)) if legend is not None: style['legend'] = legend.attrib[ '{http://www.w3.org/1999/xlink}href'] lgd = s.find(nspath('LegendURL', WMS_NAMESPACE)) if lgd is not None: if 'width' in list(lgd.attrib.keys()): style['legend_width'] = lgd.attrib.get('width') if 'height' in list(lgd.attrib.keys()): style['legend_height'] = lgd.attrib.get('height') lgd_format = lgd.find(nspath('Format', WMS_NAMESPACE)) if lgd_format is not None: style['legend_format'] = lgd_format.text.strip() self.styles[name.text] = style # keywords self.keywords = [ f.text for f in elem.findall(nspath('KeywordList/Keyword', WMS_NAMESPACE)) ] # extents replaced by dimensions of name # comment by Soren Scott # <Dimension name="elevation" units="meters" default="500" multipleValues="1" # nearestValue="0" current="true" unitSymbol="m">500, 490, 480</Dimension> # it can be repeated with the same name so ? this assumes a single one to match 1.1.1 self.timepositions = None self.defaulttimeposition = None time_dimension = None for dim in elem.findall(nspath('Dimension', WMS_NAMESPACE)): dim_name = dim.attrib.get('name') if dim_name is not None and dim_name.lower() == 'time': time_dimension = dim if time_dimension is not None: self.timepositions = time_dimension.text.split( ',') if time_dimension.text else None self.defaulttimeposition = time_dimension.attrib.get( 'default', None) # Elevations - available vertical levels self.elevations = None elev_dimension = None for dim in elem.findall(nspath('Dimension', WMS_NAMESPACE)): if dim.attrib.get('elevation') is not None: elev_dimension = dim if elev_dimension is not None: self.elevations = [ e.strip() for e in elev_dimension.text.split(',') ] if elev_dimension.text else None # and now capture the dimensions as more generic things (and custom things) self.dimensions = {} for dim in elem.findall(nspath('Dimension', WMS_NAMESPACE)): dim_name = dim.attrib.get('name') dim_data = {} for k, v in six.iteritems(dim.attrib): if k != 'name': dim_data[k] = v # single values and ranges are not differentiated here dim_data['values'] = dim.text.strip().split( ',') if dim.text.strip() else None self.dimensions[dim_name] = dim_data # MetadataURLs self.metadataUrls = [] for m in elem.findall(nspath('MetadataURL', WMS_NAMESPACE)): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': testXMLValue(m.find(nspath('Format', WMS_NAMESPACE))), 'url': testXMLValue(m.find(nspath('OnlineResource', WMS_NAMESPACE)). attrib['{http://www.w3.org/1999/xlink}href'], attrib=True) } if metadataUrl[ 'url'] is not None and parse_remote_metadata: # download URL try: content = openURL(metadataUrl['url'], timeout=timeout) doc = etree.parse(content) if metadataUrl['type'] is not None: if metadataUrl['type'] == 'FGDC': metadataUrl['metadata'] = Metadata(doc) if metadataUrl['type'] == 'TC211': metadataUrl['metadata'] = MD_Metadata(doc) except Exception: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl) # DataURLs self.dataUrls = [] for m in elem.findall(nspath('DataURL', WMS_NAMESPACE)): dataUrl = { 'format': m.find(nspath('Format', WMS_NAMESPACE)).text.strip(), 'url': m.find(nspath('OnlineResource', WMS_NAMESPACE)). attrib['{http://www.w3.org/1999/xlink}href'] } self.dataUrls.append(dataUrl) # FeatureListURLs self.featureListUrls = [] for m in elem.findall(nspath('FeatureListURL', WMS_NAMESPACE)): featureUrl = { 'format': m.find(nspath('Format', WMS_NAMESPACE)).text.strip(), 'url': m.find(nspath('OnlineResource', WMS_NAMESPACE)). attrib['{http://www.w3.org/1999/xlink}href'] } self.featureListUrls.append(featureUrl) self.layers = [] for child in elem.findall(nspath('Layer', WMS_NAMESPACE)): self.layers.append(ContentMetadata(child, self))
def from_xml(self, e): '''Build and return an InspireMetadata object from a (serialized) etree Element e. ''' def to_date(s): return strptime(s, '%Y-%m-%d').date() if isinstance(s, str) else None def to_responsible_party(alist): result = [] for it in alist: result.append( ResponsibleParty(organization=unicode(it.organization), email=unicode(it.email), role=it.role)) return result # Parse object md = MD_Metadata(e) datestamp = to_date(md.datestamp) id_list = md.identification.uricode url_list = [] if md.distribution: for it in md.distribution.online: url_list.append(it.url) topic_list = [] for topic in md.identification.topiccategory: topic_list.append(topic) free_keywords = [] keywords = {} for it in md.identification.keywords: thes_title = it['thesaurus']['title'] # Lookup and instantiate a named thesaurus thes = None if thes_title: try: thes_title, thes_version = thes_title.split(',') except: thes_version = None else: thes_version = re.sub(r'^[ ]*version[ ]+(\d\.\d)$', r'\1', thes_version) # Note thes_version can be used to enforce a specific thesaurus version try: thes = Thesaurus.lookup(title=thes_title, for_keywords=True) except ValueError: thes = None # Treat present keywords depending on if they belong to a thesaurus if thes: # Treat as thesaurus terms; discard unknown terms terms = [] for keyword in it['keywords']: term = thes.vocabulary.by_value.get(keyword) if not term: term = thes.vocabulary.by_token.get(keyword) if term: terms.append(term.value) keywords[thes.name] = ThesaurusTerms(thesaurus=thes, terms=terms) else: # Treat as free keywords (not really a thesaurus) vocab_date = to_date(it['thesaurus']['date']) vocab_datetype = it['thesaurus']['datetype'] if thes_title: thes_title = unicode(thes_title) for keyword in it['keywords']: free_keywords.append( FreeKeyword(value=keyword, reference_date=vocab_date, date_type=vocab_datetype, originating_vocabulary=thes_title)) temporal_extent = [] if md.identification.temporalextent_start or md.identification.temporalextent_end: temporal_extent = [ TemporalExtent( start=to_date(md.identification.temporalextent_start), end=to_date(md.identification.temporalextent_end)) ] bbox = [] if md.identification.extent: if md.identification.extent.boundingBox: bbox = [ GeographicBoundingBox( nblat=float(md.identification.extent.boundingBox.maxy), sblat=float(md.identification.extent.boundingBox.miny), eblng=float(md.identification.extent.boundingBox.maxx), wblng=float(md.identification.extent.boundingBox.minx)) ] creation_date = None publication_date = None revision_date = None for it in md.identification.date: if it.type == 'creation': creation_date = to_date(it.date) elif it.type == 'publication': publication_date = to_date(it.date) elif it.type == 'revision': revision_date = to_date(it.date) spatial_list = [] if len(md.identification.distance) != len(md.identification.uom): raise Exception( _('Found unequal list lengths distance,uom (%s, %s)' % (md.identification.distance, md.identification.uom))) else: for i in range(0, len(md.identification.distance)): spatial_list.append( SpatialResolution(distance=int( md.identification.distance[i]), uom=unicode(md.identification.uom[i]))) for i in range(0, len(md.identification.denominators)): spatial_list.append( SpatialResolution( denominator=int(md.identification.denominators[i]))) conf_list = [] invalid_degree = False #if md.referencesystem.codeSpace: # code_space = md.referenceSystem.codeSpace reference_system = None if md.referencesystem: code = md.referencesystem.code reference_systems = vocabularies.by_name('reference-systems').get( 'vocabulary') if code in reference_systems: # Check whether the URI is provided reference_system = ReferenceSystem(code=code) else: # Check whether just the EPSG code suffix is provided code_full = 'http://www.opengis.net/def/crs/EPSG/0/{code}'.format( code=code) if code_full in reference_systems: reference_system = ReferenceSystem(code=code_full) else: raise Exception(_('Reference system not recognizable')) if md.referencesystem.codeSpace: reference_system.code_space = md.referencesystem.codeSpace if md.referencesystem.version: reference_system.version = md.referencesystem.version if len(md.dataquality.conformancedate) != len( md.dataquality.conformancedatetype): # Date list is unequal to datetype list, this means wrong XML so exception is thrown raise Exception( _('Found unequal list lengths: conformance date, conformancedatetype' )) if len(md.dataquality.conformancedegree) != len( md.dataquality.conformancedate): # Degree list is unequal to date/datetype lists, so we are unable to conclude # to which conformity item each degree value corresponds, so all are set to # not-evaluated (Todo: MD_Metadata bug #63) invalid_degree = True if md.dataquality.conformancedate: #and len(md.dataquality.conformancedate) == len(md.dataquality.degree): for i in range(0, len(md.dataquality.conformancedate)): date = to_date(md.dataquality.conformancedate[i]) date_type = md.dataquality.conformancedatetype[i] # TODO md.dataquality.conformancedatetype returns empty if invalid_degree: degree = 'not-evaluated' else: try: if md.dataquality.conformancedegree[i] == 'true': degree = 'conformant' elif md.dataquality.conformancedegree[i] == 'false': degree = 'not-conformant' except: degree = "not-evaluated" title = unicode(md.dataquality.conformancetitle[i]) if title != 'None': conf_list.append( Conformity(title=title, date=date, date_type=date_type, degree=degree)) # TODO: is title required fields? If so the following is unnecessary else: conf_list.append( Conformity(date=date, date_type=date_type, degree=degree)) limit_list = [] for it in md.identification.uselimitation: limit_list.append(unicode(it)) constr_list = [] for it in md.identification.otherconstraints: constr_list.append(unicode(it)) obj = InspireMetadata() obj.contact = to_responsible_party(md.contact) obj.datestamp = datestamp obj.languagecode = md.languagecode obj.title = unicode(md.identification.title) obj.abstract = unicode(md.identification.abstract) obj.identifier = id_list[0] obj.locator = url_list #obj.resource_language = md.identification.resourcelanguage obj.topic_category = topic_list obj.keywords = keywords obj.free_keywords = free_keywords obj.bounding_box = bbox obj.temporal_extent = temporal_extent obj.creation_date = creation_date obj.publication_date = publication_date obj.revision_date = revision_date obj.lineage = unicode(md.dataquality.lineage) obj.spatial_resolution = spatial_list obj.reference_system = reference_system obj.conformity = conf_list obj.access_constraints = limit_list obj.limitations = constr_list obj.responsible_party = to_responsible_party(md.identification.contact) return obj
def from_xml(self, e): '''Build and return an InspireMetadata object serialized as an etree Element e. ''' def to_date(string): if isinstance(string, str): return datetime.datetime.strptime(string,'%Y-%m-%d').date() else: return None def to_resp_party(alist): result = [] for it in alist: result.append(ResponsibleParty( organization = unicode(it.organization), email = unicode(it.email), role = it.role)) return result md = MD_Metadata(e) datestamp = to_date(md.datestamp) id_list = md.identification.uricode url_list = [] if md.distribution: for it in md.distribution.online: url_list.append(it.url) topic_list = [] for topic in md.identification.topiccategory: topic_list.append(topic) keywords_dict = {} for it in md.identification.keywords: thes_title = it['thesaurus']['title'] if thes_title is not None: thes_split = thes_title.split(',') # TODO thes_split[1] (=version) can be used in a get_by_title_and_version() # to enforce a specific thesaurus version. thes_title = thes_split[0] try: thes_name = vocabularies.munge('Keywords-' + thes_title) term_list = [] for t in it['keywords']: term_list.append(t) thes = Thesaurus.make(thes_name) if thes: kw = ThesaurusTerms(thesaurus=thes, terms=term_list) keywords_dict.update({thes_name:kw}) except: pass temporal_extent = [] if md.identification.temporalextent_start or md.identification.temporalextent_end: temporal_extent = [TemporalExtent( start = to_date(md.identification.temporalextent_start), end = to_date(md.identification.temporalextent_end))] bbox = [] if md.identification.extent: if md.identification.extent.boundingBox: bbox = [GeographicBoundingBox( nblat = float(md.identification.extent.boundingBox.maxy), sblat = float(md.identification.extent.boundingBox.miny), eblng = float(md.identification.extent.boundingBox.maxx), wblng = float(md.identification.extent.boundingBox.minx))] creation_date = None publication_date = None revision_date = None for it in md.identification.date: if it.type == 'creation': creation_date = to_date(it.date) elif it.type == 'publication': publication_date = to_date(it.date) elif it.type == 'revision': revision_date = to_date(it.date) #if not creation_date: # raise Exception('creation date not present','') #elif not publication_date: # raise Exception('publication date not present','') #elif not revision_date: # raise Exception('revision date not present','') spatial_list = [] if len(md.identification.distance) != len(md.identification.uom): raise Exception( 'Found unequal list lengths distance,uom (%s, %s)' % ( md.identification.distance,md.identification.uom)) else: for i in range(0,len(md.identification.distance)): spatial_list.append(SpatialResolution( distance = int(md.identification.distance[i]), uom = unicode(md.identification.uom[i]))) for i in range(0, len(md.identification.denominators)): spatial_list.append(SpatialResolution( denominator = int(md.identification.denominators[i]))) conf_list = [] invalid_degree = False if len(md.dataquality.conformancedate) != len(md.dataquality.conformancedatetype): # Date list is unequal to datetype list, this means wrong XML so exception is thrown raise Exception('Found unequal list lengths: conformance date, conformancedatetype') if len(md.dataquality.conformancedegree) != len(md.dataquality.conformancedate): # Degree list is unequal to date/datetype lists, so we are unable to conclude # to which conformity item each degree value corresponds, so all are set to # not-evaluated (Todo: MD_Metadata bug #63) invalid_degree = True if md.dataquality.conformancedate: #and len(md.dataquality.conformancedate) == len(md.dataquality.degree): for i in range(0,len(md.dataquality.conformancedate)): date = to_date(md.dataquality.conformancedate[i]) date_type = md.dataquality.conformancedatetype[i] # TODO md.dataquality.conformancedatetype returns empty if invalid_degree: degree = 'not-evaluated' else: try: if md.dataquality.conformancedegree[i] == 'true': degree = 'conformant' elif md.dataquality.conformancedegree[i] == 'false': degree = 'not-conformant' except: degree = "not-evaluated" title = unicode(md.dataquality.conformancetitle[i]) if title != 'None': conf_list.append(Conformity(title=title, date=date, date_type=date_type, degree=degree)) # TODO: is title required fields? If so the following is unnecessary else: conf_list.append(Conformity(date=date, date_type=date_type, degree=degree)) limit_list = [] for it in md.identification.uselimitation: limit_list.append(unicode(it)) constr_list = [] for it in md.identification.otherconstraints: constr_list.append(unicode(it)) obj = InspireMetadata() obj.contact = to_resp_party(md.contact) obj.datestamp = datestamp obj.languagecode = md.languagecode obj.title = unicode(md.identification.title) obj.abstract = unicode(md.identification.abstract) obj.identifier = id_list[0] obj.locator = url_list #obj.resource_language = md.identification.resourcelanguage obj.topic_category = topic_list obj.keywords = keywords_dict obj.bounding_box = bbox obj.temporal_extent = temporal_extent obj.creation_date = creation_date obj.publication_date = publication_date obj.revision_date = revision_date obj.lineage = unicode(md.dataquality.lineage) obj.spatial_resolution = spatial_list obj.conformity = conf_list obj.access_constraints = limit_list obj.limitations = constr_list obj.responsible_party = to_resp_party(md.identification.contact) return obj