def get_number_records(request): parsed = etree.fromstring(request.content, etree.XMLParser(resolve_entities=False)) search_param = '{http://www.opengis.net/cat/csw/2.0.2}SearchResults' search_results = parsed.findall(search_param)[0] return int(search_results.attrib['numberOfRecordsMatched'])
def _render_xslt(self, res): ''' Validate and render XSLT ''' LOGGER.debug('Rendering XSLT') try: input_os = res.schema output_os = self.kvp['outputschema'] xslt_id = 'xslt:%s,%s' % (input_os, output_os) xslt_dict = next(d for i, d in enumerate(self.xslts) if xslt_id in d) LOGGER.debug('XSLT ID: %s' % xslt_id) LOGGER.debug('Found matching XSLT transformation') xslt = xslt_dict[xslt_id] transform = etree.XSLT(etree.parse(xslt)) doc = etree.fromstring(res.xml, self.context.parser) result_tree = transform(doc).getroot() return result_tree except StopIteration: LOGGER.debug('No matching XSLT found') pass except Exception as err: LOGGER.warning('XSLT transformation failed: %s' % str(err)) raise RuntimeError()
def _parse_metadata(context, repos, record): """parse metadata formats""" if isinstance(record, str): exml = etree.fromstring(record, context.parser) else: # already serialized to lxml if hasattr(record, 'getroot'): # standalone document exml = record.getroot() else: # part of a larger document exml = record root = exml.tag LOGGER.debug('Serialized metadata, parsing content model') if root == '{%s}MD_Metadata' % context.namespaces['gmd']: # ISO return [_parse_iso(context, repos, exml)] elif root == '{http://www.isotc211.org/2005/gmi}MI_Metadata': # ISO Metadata for Imagery return [_parse_iso(context, repos, exml)] elif root == 'metadata': # FGDC return [_parse_fgdc(context, repos, exml)] elif root == '{%s}Record' % context.namespaces['csw']: # Dublin Core CSW return [_parse_dc(context, repos, exml)] elif root == '{%s}RDF' % context.namespaces['rdf']: # Dublin Core RDF return [_parse_dc(context, repos, exml)] elif root == '{%s}DIF' % context.namespaces['dif']: # DIF pass # TODO else: raise RuntimeError('Unsupported metadata format')
def write_record(self, result, esn, outputschema, queryables): ''' Return csw:SearchResults child as lxml.etree.Element ''' identifier = util.getqattr(result, self.context.md_core_model['mappings']['pycsw:Identifier']) typename = util.getqattr(result, self.context.md_core_model['mappings']['pycsw:Typename']) if esn == 'full' and typename == 'rim:RegistryObject': # dump record as is and exit return etree.fromstring(util.getqattr(result, queryables['pycsw:XML']['dbcol']), self.context.parser) if typename == 'csw:Record': # transform csw:Record -> rim:RegistryObject model mappings util.transform_mappings(queryables, self.repository['mappings']['csw:Record']) node = etree.Element(util.nspath_eval('rim:ExtrinsicObject', self.namespaces)) node.attrib[util.nspath_eval('xsi:schemaLocation', self.context.namespaces)] = \ '%s %s/csw/2.0.2/profiles/ebrim/1.0/csw-ebrim.xsd' % (self.namespaces['wrs'], self.ogc_schemas_base) node.attrib['id'] = identifier node.attrib['lid'] = identifier node.attrib['objectType'] = str(util.getqattr(result, self.context.md_core_model['mappings']['pycsw:Type'])) node.attrib['status'] = 'urn:oasis:names:tc:ebxml-regrep:StatusType:Submitted' etree.SubElement(node, util.nspath_eval('rim:VersionInfo', self.namespaces), versionName='') if esn in ['summary', 'full']: etree.SubElement(node, util.nspath_eval('rim:ExternalIdentifier', self.namespaces), value=identifier, identificationScheme='foo', registryObject=str(util.getqattr(result, self.context.md_core_model['mappings']['pycsw:Relation'])), id=identifier) name = etree.SubElement(node, util.nspath_eval('rim:Name', self.namespaces)) etree.SubElement(name, util.nspath_eval('rim:LocalizedString', self.namespaces), value=unicode(util.getqattr(result, self.context.md_core_model['mappings']['pycsw:Title']))) description = etree.SubElement(node, util.nspath_eval('rim:Description', self.namespaces)) etree.SubElement(description, util.nspath_eval('rim:LocalizedString', self.namespaces), value=unicode(util.getqattr(result, self.context.md_core_model['mappings']['pycsw:Abstract']))) val = util.getqattr(result, self.context.md_core_model['mappings']['pycsw:BoundingBox']) bboxel = write_boundingbox(val, self.context.namespaces) if bboxel is not None: bboxslot = etree.SubElement(node, util.nspath_eval('rim:Slot', self.namespaces), slotType='urn:ogc:def:dataType:ISO-19107:2003:GM_Envelope') valuelist = etree.SubElement(bboxslot, util.nspath_eval('rim:ValueList', self.namespaces)) value = etree.SubElement(valuelist, util.nspath_eval('rim:Value', self.namespaces)) value.append(bboxel) rkeywords = util.getqattr(result, self.context.md_core_model['mappings']['pycsw:Keywords']) if rkeywords is not None: subjectslot = etree.SubElement(node, util.nspath_eval('rim:Slot', self.namespaces), name='http://purl.org/dc/elements/1.1/subject') valuelist = etree.SubElement(subjectslot, util.nspath_eval('rim:ValueList', self.namespaces)) for keyword in rkeywords.split(','): etree.SubElement(valuelist, util.nspath_eval('rim:Value', self.namespaces)).text = keyword return node
def get_anytext(bag): """ generate bag of text for free text searches accepts list of words, string of XML, or etree.Element """ if isinstance(bag, list): # list of words return ' '.join(filter(None, bag)).strip() else: # xml if isinstance(bag, unicode) or isinstance(bag, str): # not serialized yet bag = etree.fromstring(bag, PARSER) # get all XML element content return ' '.join([value.strip() for value in bag.xpath('//text()')])
def get_anytext(bag): """ generate bag of text for free text searches accepts list of words, string of XML, or etree.Element """ if isinstance(bag, list): # list of words return ' '.join([_f for _f in bag if _f]).strip() else: # xml if isinstance(bag, binary_type) or isinstance(bag, text_type): # serialize to lxml bag = etree.fromstring(bag, PARSER) # get all XML element content return ' '.join([value.strip() for value in bag.xpath('//text()')])
def get_anytext(bag): """ generate bag of text for free text searches accepts list of words, string of XML, or etree.Element """ if isinstance(bag, list): # list of words return ' '.join([_f for _f in bag if _f]).strip() else: # xml if isinstance(bag, bytes) or isinstance(bag, str): # serialize to lxml bag = etree.fromstring(bag, PARSER) # get all XML element content return ' '.join([value.strip() for value in bag.xpath('//text()')])
def _parse_waf(context, repos, record, identifier): recobjs = [] content = util.http_request('GET', record) LOGGER.debug(content) try: parser = etree.HTMLParser() tree = etree.fromstring(content, parser) except Exception as err: raise Exception('Could not parse WAF: %s' % str(err)) up = urlparse(record) links = [] LOGGER.debug('collecting links') for link in tree.xpath('//a/@href'): link = link.strip() if not link: continue if link.find('?') != -1: continue if not link.endswith('.xml'): LOGGER.debug('Skipping, not .xml') continue if '/' in link: # path is embedded in link if link[-1] == '/': # directory, skip continue if link[0] == '/': # strip path of WAF URL link = '%s://%s%s' % (up.scheme, up.netloc, link) else: # tack on href to WAF URL link = '%s/%s' % (record, link) LOGGER.debug('URL is: %s', link) links.append(link) LOGGER.debug('%d links found', len(links)) for link in links: LOGGER.debug('Processing link %s', link) # fetch and parse linkcontent = util.http_request('GET', link) recobj = _parse_metadata(context, repos, linkcontent)[0] recobj.source = link recobj.mdsource = link recobjs.append(recobj) return recobjs
def update_xpath(nsmap, xml, recprop): """Update XML document XPath values""" if isinstance(xml, unicode): # not lxml serialized yet xml = etree.fromstring(xml, PARSER) recprop = eval(recprop) nsmap = eval(nsmap) try: nodes = xml.xpath(recprop['rp']['xpath'], namespaces=nsmap) if len(nodes) > 0: # matches for node1 in nodes: if node1.text != recprop['value']: # values differ, update node1.text = recprop['value'] except Exception as err: raise RuntimeError('ERROR: %s' % str(err)) return etree.tostring(xml)
def update_xpath(nsmap, xml, recprop): """Update XML document XPath values""" if isinstance(xml, bytes) or isinstance(xml, str): # serialize to lxml xml = etree.fromstring(xml, PARSER) recprop = eval(recprop) nsmap = eval(nsmap) try: nodes = xml.xpath(recprop['rp']['xpath'], namespaces=nsmap) if len(nodes) > 0: # matches for node1 in nodes: if node1.text != recprop['value']: # values differ, update node1.text = recprop['value'] except Exception as err: LOGGER.warning('update_xpath error', exc_info=True) raise RuntimeError('ERROR: %s' % str(err)) from err return etree.tostring(xml)
def get_service(raw_xml): """ Set a service object based on the XML metadata <dct:references scheme="OGC:WMS">http://ngamaps.geointapps.org/arcgis /services/RIO/Rio_Foundation_Transportation/MapServer/WMSServer </dct:references> :param instance: :return: Layer """ from pycsw.core.etree import etree parsed = etree.fromstring(raw_xml, etree.XMLParser(resolve_entities=False)) # <dc:format>OGC:WMS</dc:format> source_tag = parsed.find("{http://purl.org/dc/elements/1.1/}source") # <dc:source> # http://ngamaps.geointapps.org/arcgis/services/RIO/Rio_Foundation_Transportation/MapServer/WMSServer # </dc:source> format_tag = parsed.find("{http://purl.org/dc/elements/1.1/}format") service_url = None service_type = None if hasattr(source_tag, 'text'): service_url = source_tag.text if hasattr(format_tag, 'text'): service_type = format_tag.text if hasattr(format_tag, 'text'): service_type = format_tag.text service, created = Service.objects.get_or_create(url=service_url, is_monitored=False, type=service_type) # TODO: dont hardcode SRS, get them from the parsed XML. srs, created = SpatialReferenceSystem.objects.get_or_create( code="EPSG:4326") service.srs.add(srs) return service
def get_service(raw_xml): """ Set a service object based on the XML metadata <dct:references scheme="OGC:WMS">http://ngamaps.geointapps.org/arcgis /services/RIO/Rio_Foundation_Transportation/MapServer/WMSServer </dct:references> :param instance: :return: Layer """ from pycsw.core.etree import etree parsed = etree.fromstring(raw_xml, etree.XMLParser(resolve_entities=False)) # <dc:format>OGC:WMS</dc:format> source_tag = parsed.find("{http://purl.org/dc/elements/1.1/}source") # <dc:source> # http://ngamaps.geointapps.org/arcgis/services/RIO/Rio_Foundation_Transportation/MapServer/WMSServer # </dc:source> format_tag = parsed.find("{http://purl.org/dc/elements/1.1/}format") service_url = None service_type = None if hasattr(source_tag, 'text'): service_url = source_tag.text if hasattr(format_tag, 'text'): service_type = format_tag.text if hasattr(format_tag, 'text'): service_type = format_tag.text service, created = Service.objects.get_or_create(url=service_url, is_monitored=False, type=service_type) # TODO: dont hardcode SRS, get them from the parsed XML. srs, created = SpatialReferenceSystem.objects.get_or_create(code="EPSG:4326") service.srs.add(srs) return service
def test_load_records(client): test_create_catalog(client) repository = registry.RegistryRepository() repository.catalog = catalog_slug payload = construct_payload(layers_list=layers_list) xml_records = etree.fromstring(payload) context = config.StaticContext() registry.load_records(repository, xml_records, context) # Provisional hack to refresh documents in elasticsearch. es_client = rawes.Elastic(registry.REGISTRY_SEARCH_URL) es_client.post('/_refresh') records_number = int(repository.query('')[0]) assert len(layers_list) == records_number # Verify records added into elasticsearch using the search api. response = client.get(catalog_search_api) assert 200 == response.status_code search_response = json.loads(response.content.decode('utf-8')) assert len(layers_list) == search_response['a.matchDocs'] test_clear_records(client)
def write_record(recobj, esn, context, url=None): ''' Return csw:SearchResults child as lxml.etree.Element ''' typename = util.getqattr( recobj, context.md_core_model['mappings']['pycsw:Typename']) if esn == 'full' and typename == 'fgdc:metadata': # dump record as is and exit return etree.fromstring( util.getqattr(recobj, context.md_core_model['mappings']['pycsw:XML'])) node = etree.Element('metadata') node.attrib[util.nspath_eval('xsi:noNamespaceSchemaLocation', context.namespaces)] = \ 'http://www.fgdc.gov/metadata/fgdc-std-001-1998.xsd' idinfo = etree.SubElement(node, 'idinfo') # identifier etree.SubElement(idinfo, 'datasetid').text = util.getqattr( recobj, context.md_core_model['mappings']['pycsw:Identifier']) citation = etree.SubElement(idinfo, 'citation') citeinfo = etree.SubElement(citation, 'citeinfo') # title val = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:Title']) etree.SubElement(citeinfo, 'title').text = val # publisher publinfo = etree.SubElement(citeinfo, 'publinfo') val = util.getqattr( recobj, context.md_core_model['mappings']['pycsw:Publisher']) or '' etree.SubElement(publinfo, 'publish').text = val # origin val = util.getqattr( recobj, context.md_core_model['mappings']['pycsw:Creator']) or '' etree.SubElement(citeinfo, 'origin').text = val # keywords val = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:Keywords']) if val: keywords = etree.SubElement(idinfo, 'keywords') theme = etree.SubElement(keywords, 'theme') for v in val.split(','): etree.SubElement(theme, 'themekey').text = v # accessconstraints val = util.getqattr( recobj, context.md_core_model['mappings']['pycsw:AccessConstraints']) or '' etree.SubElement(idinfo, 'accconst').text = val # abstract descript = etree.SubElement(idinfo, 'descript') val = util.getqattr( recobj, context.md_core_model['mappings']['pycsw:Abstract']) or '' etree.SubElement(descript, 'abstract').text = val # time datebegin = util.getqattr( recobj, context.md_core_model['mappings']['pycsw:TempExtent_begin']) dateend = util.getqattr( recobj, context.md_core_model['mappings']['pycsw:TempExtent_end']) if all([datebegin, dateend]): timeperd = etree.SubElement(idinfo, 'timeperd') timeinfo = etree.SubElement(timeperd, 'timeinfo') rngdates = etree.SubElement(timeinfo, 'timeinfo') begdate = etree.SubElement(rngdates, 'begdate').text = datebegin enddate = etree.SubElement(rngdates, 'enddate').text = dateend # bbox extent val = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:BoundingBox']) bboxel = write_extent(val) if bboxel is not None: idinfo.append(bboxel) # contributor val = util.getqattr( recobj, context.md_core_model['mappings']['pycsw:Contributor']) or '' etree.SubElement(idinfo, 'datacred').text = val # direct spdoinfo = etree.SubElement(idinfo, 'spdoinfo') val = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:Type']) or '' etree.SubElement(spdoinfo, 'direct').text = val # formname distinfo = etree.SubElement(node, 'distinfo') stdorder = etree.SubElement(distinfo, 'stdorder') digform = etree.SubElement(stdorder, 'digform') digtinfo = etree.SubElement(digform, 'digtinfo') val = util.getqattr( recobj, context.md_core_model['mappings']['pycsw:Format']) or '' etree.SubElement(digtinfo, 'formname').text = val etree.SubElement(citeinfo, 'geoform').text = val # source lineage = etree.SubElement(node, 'lineage') srcinfo = etree.SubElement(lineage, 'srcinfo') srccite = etree.SubElement(srcinfo, 'srccite') sciteinfo = etree.SubElement(srccite, 'citeinfo') val = util.getqattr( recobj, context.md_core_model['mappings']['pycsw:Source']) or '' etree.SubElement(sciteinfo, 'title').text = val val = util.getqattr( recobj, context.md_core_model['mappings']['pycsw:Relation']) or '' etree.SubElement(citeinfo, 'onlink').text = val # links rlinks = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:Links']) if rlinks: for link in rlinks.split('^'): linkset = link.split(',') etree.SubElement(citeinfo, 'onlink', type=linkset[2]).text = linkset[-1] # metd metainfo = etree.SubElement(node, 'metainfo') val = util.getqattr( recobj, context.md_core_model['mappings']['pycsw:Modified']) or '' etree.SubElement(metainfo, 'metd').text = val return node
def write_record(result, esn, context, url=None): ''' Return csw:SearchResults child as lxml.etree.Element ''' typename = util.getqattr( result, context.md_core_model['mappings']['pycsw:Typename']) if esn == 'full' and typename == 'dif:DIF': # dump record as is and exit return etree.fromstring( util.getqattr(result, context.md_core_model['mappings']['pycsw:XML']), context.parser) node = etree.Element(util.nspath_eval('dif:DIF', NAMESPACES)) node.attrib[util.nspath_eval('xsi:schemaLocation', context.namespaces)] = \ '%s http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/dif.xsd' % NAMESPACE # identifier etree.SubElement(node, util.nspath_eval( 'dif:Entry_ID', NAMESPACES)).text = util.getqattr( result, context.md_core_model['mappings']['pycsw:Identifier']) # title val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Title']) if not val: val = '' etree.SubElement(node, util.nspath_eval('dif:Entry_Title', NAMESPACES)).text = val # citation citation = etree.SubElement( node, util.nspath_eval('dif:Data_Set_Citation', NAMESPACES)) # creator val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Creator']) etree.SubElement(citation, util.nspath_eval('dif:Dataset_Creator', NAMESPACES)).text = val # date val = util.getqattr( result, context.md_core_model['mappings']['pycsw:PublicationDate']) etree.SubElement(citation, util.nspath_eval('dif:Dataset_Release_Date', NAMESPACES)).text = val # publisher val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Publisher']) etree.SubElement(citation, util.nspath_eval('dif:Dataset_Publisher', NAMESPACES)).text = val # format val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Format']) etree.SubElement( citation, util.nspath_eval('dif:Data_Presentation_Form', NAMESPACES)).text = val # iso topic category val = util.getqattr( result, context.md_core_model['mappings']['pycsw:TopicCategory']) etree.SubElement(node, util.nspath_eval('dif:ISO_Topic_Category', NAMESPACES)).text = val # keywords val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Keywords']) if val: for kw in val.split(','): etree.SubElement(node, util.nspath_eval('dif:Keyword', NAMESPACES)).text = kw # temporal temporal = etree.SubElement( node, util.nspath_eval('dif:Temporal_Coverage', NAMESPACES)) val = util.getqattr( result, context.md_core_model['mappings']['pycsw:TempExtent_begin']) val2 = util.getqattr( result, context.md_core_model['mappings']['pycsw:TempExtent_end']) etree.SubElement(temporal, util.nspath_eval('dif:Start_Date', NAMESPACES)).text = val etree.SubElement(temporal, util.nspath_eval('dif:End_Date', NAMESPACES)).text = val2 # bbox extent val = util.getqattr(result, context.md_core_model['mappings']['pycsw:BoundingBox']) bboxel = write_extent(val, NAMESPACES) if bboxel is not None: node.append(bboxel) # access constraints val = util.getqattr( result, context.md_core_model['mappings']['pycsw:AccessConstraints']) etree.SubElement(node, util.nspath_eval('dif:Access_Constraints', NAMESPACES)).text = val # language val = util.getqattr( result, context.md_core_model['mappings']['pycsw:ResourceLanguage']) etree.SubElement(node, util.nspath_eval('dif:Data_Set_Language', NAMESPACES)).text = val # contributor val = util.getqattr( result, context.md_core_model['mappings']['pycsw:OrganizationName']) etree.SubElement(node, util.nspath_eval('dif:Originating_Center', NAMESPACES)).text = val # abstract val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Abstract']) if not val: val = '' etree.SubElement(node, util.nspath_eval('dif:Summary', NAMESPACES)).text = val # date val = util.getqattr( result, context.md_core_model['mappings']['pycsw:CreationDate']) etree.SubElement(node, util.nspath_eval('dif:DIF_Creation_Date', NAMESPACES)).text = val # URL val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Relation']) url = etree.SubElement(node, util.nspath_eval('dif:Related_URL', NAMESPACES)) etree.SubElement(url, util.nspath_eval('dif:URL', NAMESPACES)).text = val rlinks = util.getqattr(result, context.md_core_model['mappings']['pycsw:Links']) if rlinks: for link in util.jsonify_links(rlinks): url2 = etree.SubElement( node, util.nspath_eval('dif:Related_URL', NAMESPACES)) urltype = etree.SubElement( url2, util.nspath_eval('dif:URL_Content_Type', NAMESPACES)) etree.SubElement(urltype, util.nspath_eval( 'dif:Type', NAMESPACES)).text = link['protocol'] etree.SubElement(url2, util.nspath_eval('dif:URL', NAMESPACES)).text = link['url'] etree.SubElement( url2, util.nspath_eval('dif:Description', NAMESPACES)).text = link['description'] etree.SubElement(node, util.nspath_eval('dif:Metadata_Name', NAMESPACES)).text = 'CEOS IDN DIF' etree.SubElement(node, util.nspath_eval('dif:Metadata_Version', NAMESPACES)).text = '9.7' return node
def write_record(recobj, esn, context, url=None): ''' Return csw:SearchResults child as lxml.etree.Element ''' typename = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:Typename']) if esn == 'full' and typename == 'fgdc:metadata': # dump record as is and exit return etree.fromstring(util.getqattr(recobj, context.md_core_model['mappings']['pycsw:XML']), context.parser) node = etree.Element('metadata') node.attrib[util.nspath_eval('xsi:noNamespaceSchemaLocation', context.namespaces)] = \ 'http://www.fgdc.gov/metadata/fgdc-std-001-1998.xsd' idinfo = etree.SubElement(node, 'idinfo') # identifier etree.SubElement(idinfo, 'datasetid').text = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:Identifier']) citation = etree.SubElement(idinfo, 'citation') citeinfo = etree.SubElement(citation, 'citeinfo') # title val = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:Title']) etree.SubElement(citeinfo, 'title').text = val # publisher publinfo = etree.SubElement(citeinfo, 'publinfo') val = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:Publisher']) or '' etree.SubElement(publinfo, 'publish').text = val # origin val = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:Creator']) or '' etree.SubElement(citeinfo, 'origin').text = val # keywords val = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:Keywords']) if val: keywords = etree.SubElement(idinfo, 'keywords') theme = etree.SubElement(keywords, 'theme') for v in val.split(','): etree.SubElement(theme, 'themekey').text = v # accessconstraints val = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:AccessConstraints']) or '' etree.SubElement(idinfo, 'accconst').text = val # abstract descript = etree.SubElement(idinfo, 'descript') val = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:Abstract']) or '' etree.SubElement(descript, 'abstract').text = val # time datebegin = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:TempExtent_begin']) dateend = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:TempExtent_end']) if all([datebegin, dateend]): timeperd = etree.SubElement(idinfo, 'timeperd') timeinfo = etree.SubElement(timeperd, 'timeinfo') rngdates = etree.SubElement(timeinfo, 'timeinfo') begdate = etree.SubElement(rngdates, 'begdate').text = datebegin enddate = etree.SubElement(rngdates, 'enddate').text = dateend # bbox extent val = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:BoundingBox']) bboxel = write_extent(val) if bboxel is not None: idinfo.append(bboxel) # contributor val = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:Contributor']) or '' etree.SubElement(idinfo, 'datacred').text = val # direct spdoinfo = etree.SubElement(idinfo, 'spdoinfo') val = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:Type']) or '' etree.SubElement(spdoinfo, 'direct').text = val # formname distinfo = etree.SubElement(node, 'distinfo') stdorder = etree.SubElement(distinfo, 'stdorder') digform = etree.SubElement(stdorder, 'digform') digtinfo = etree.SubElement(digform, 'digtinfo') val = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:Format']) or '' etree.SubElement(digtinfo, 'formname').text = val etree.SubElement(citeinfo, 'geoform').text = val # source lineage = etree.SubElement(node, 'lineage') srcinfo = etree.SubElement(lineage, 'srcinfo') srccite = etree.SubElement(srcinfo, 'srccite') sciteinfo = etree.SubElement(srccite, 'citeinfo') val = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:Source']) or '' etree.SubElement(sciteinfo, 'title').text = val val = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:Relation']) or '' etree.SubElement(citeinfo, 'onlink').text = val # links rlinks = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:Links']) if rlinks: for link in rlinks.split('^'): linkset = link.split(',') etree.SubElement(citeinfo, 'onlink', type=linkset[2]).text = linkset[-1] # metd metainfo = etree.SubElement(node, 'metainfo') val = util.getqattr(recobj, context.md_core_model['mappings']['pycsw:Modified']) or '' etree.SubElement(metainfo, 'metd').text = val return node
def write_record(self, result, esn, outputschema, queryables): ''' Return csw:SearchResults child as lxml.etree.Element ''' identifier = util.getqattr( result, self.context.md_core_model['mappings']['pycsw:Identifier']) typename = util.getqattr( result, self.context.md_core_model['mappings']['pycsw:Typename']) if esn == 'full' and typename == 'rim:RegistryObject': # dump record as is and exit return etree.fromstring( util.getqattr(result, queryables['pycsw:XML']['dbcol']), self.context.parser) if typename == 'csw:Record': # transform csw:Record -> rim:RegistryObject model mappings util.transform_mappings(queryables, self.repository['mappings']['csw:Record']) node = etree.Element( util.nspath_eval('rim:ExtrinsicObject', self.namespaces)) node.attrib[util.nspath_eval('xsi:schemaLocation', self.context.namespaces)] = \ '%s %s/csw/2.0.2/profiles/ebrim/1.0/csw-ebrim.xsd' % (self.namespaces['wrs'], self.ogc_schemas_base) node.attrib['id'] = identifier node.attrib['lid'] = identifier node.attrib['objectType'] = str( util.getqattr( result, self.context.md_core_model['mappings']['pycsw:Type'])) node.attrib[ 'status'] = 'urn:oasis:names:tc:ebxml-regrep:StatusType:Submitted' etree.SubElement(node, util.nspath_eval('rim:VersionInfo', self.namespaces), versionName='') if esn in ['summary', 'full']: etree.SubElement(node, util.nspath_eval('rim:ExternalIdentifier', self.namespaces), value=identifier, identificationScheme='foo', registryObject=str( util.getqattr( result, self.context.md_core_model['mappings'] ['pycsw:Relation'])), id=identifier) name = etree.SubElement( node, util.nspath_eval('rim:Name', self.namespaces)) etree.SubElement(name, util.nspath_eval('rim:LocalizedString', self.namespaces), value=text_type( util.getqattr( result, self.context.md_core_model['mappings'] ['pycsw:Title']))) description = etree.SubElement( node, util.nspath_eval('rim:Description', self.namespaces)) etree.SubElement(description, util.nspath_eval('rim:LocalizedString', self.namespaces), value=text_type( util.getqattr( result, self.context.md_core_model['mappings'] ['pycsw:Abstract']))) val = util.getqattr( result, self.context.md_core_model['mappings']['pycsw:BoundingBox']) bboxel = write_boundingbox(val, self.context.namespaces) if bboxel is not None: bboxslot = etree.SubElement( node, util.nspath_eval('rim:Slot', self.namespaces), slotType='urn:ogc:def:dataType:ISO-19107:2003:GM_Envelope') valuelist = etree.SubElement( bboxslot, util.nspath_eval('rim:ValueList', self.namespaces)) value = etree.SubElement( valuelist, util.nspath_eval('rim:Value', self.namespaces)) value.append(bboxel) rkeywords = util.getqattr( result, self.context.md_core_model['mappings']['pycsw:Keywords']) if rkeywords is not None: subjectslot = etree.SubElement( node, util.nspath_eval('rim:Slot', self.namespaces), name='http://purl.org/dc/elements/1.1/subject') valuelist = etree.SubElement( subjectslot, util.nspath_eval('rim:ValueList', self.namespaces)) for keyword in rkeywords.split(','): etree.SubElement( valuelist, util.nspath_eval('rim:Value', self.namespaces)).text = keyword return node
def write_record(result, esn, context, url=None): ''' Return csw:SearchResults child as lxml.etree.Element ''' typename = util.getqattr(result, context.md_core_model['mappings']['pycsw:Typename']) if typename == 'gm03:TRANSFER': # dump record as is and exit # TODO: provide brief and summary elementsetname's return etree.fromstring(util.getqattr(result, context.md_core_model['mappings']['pycsw:XML']), context.parser) node = etree.Element(util.nspath_eval('gm03:TRANSFER', NAMESPACES), nsmap=NAMESPACES) header = etree.SubElement(node, util.nspath_eval('gm03:HEADERSECTION', NAMESPACES)) header.attrib['version'] = '2.3' header.attrib['sender'] = 'pycsw' etree.SubElement(header, util.nspath_eval('gm03:MODELS', NAMESPACES)) data = etree.SubElement(node, util.nspath_eval('gm03:DATASECTION', NAMESPACES)) core = etree.SubElement(data, util.nspath_eval('gm03:GM03_2_1Core.Core', NAMESPACES)) core_meta = etree.SubElement(core, util.nspath_eval('gm03:GM03_2_1Core.Core.MD_Metadata', NAMESPACES)) val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Identifier']) etree.SubElement(core_meta, util.nspath_eval('gm03:fileIdentifier', NAMESPACES)).text = val language = util.getqattr(result, context.md_core_model['mappings']['pycsw:Language']) etree.SubElement(core_meta, util.nspath_eval('gm03:language', NAMESPACES)).text = language val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Modified']) etree.SubElement(core_meta, util.nspath_eval('gm03:dateStamp', NAMESPACES)).text = val hierarchy_level_val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Type']) # metadata standard name standard = etree.SubElement(core_meta, util.nspath_eval('gm03:metadataStandardName', NAMESPACES)).text = 'GM03' # metadata standard version standardver = etree.SubElement(core_meta, util.nspath_eval('gm03:metadataStandardVersion', NAMESPACES)).text = '2.3' # hierarchy level hierarchy_level = etree.SubElement(core_meta, util.nspath_eval('gm03:hierarchyLevel', NAMESPACES)) scope_code = etree.SubElement(hierarchy_level, util.nspath_eval('gm03:GM03_2_1Core.Core.MD_ScopeCode_', NAMESPACES)) etree.SubElement(scope_code, util.nspath_eval('gm03:value', NAMESPACES)).text = hierarchy_level_val # parent identifier val = util.getqattr(result, context.md_core_model['mappings']['pycsw:ParentIdentifier']) parent_identifier = etree.SubElement(core_meta, util.nspath_eval('gm03:parentIdentifier', NAMESPACES)) scope_code = etree.SubElement(parent_identifier, util.nspath_eval('gm03:GM03_2_1Core.Core.MD_ScopeCode_', NAMESPACES)) etree.SubElement(scope_code, util.nspath_eval('gm03:value', NAMESPACES)).text = val # title val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Title']) citation = etree.SubElement(core, util.nspath_eval('gm03:GM03_2_1Core.Core.CI_Citation', NAMESPACES)) title = etree.SubElement(citation, util.nspath_eval('gm03:title', NAMESPACES)) title.append(_get_pt_freetext(val, language)) # abstract val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Abstract']) data_ident = etree.SubElement(core, util.nspath_eval('gm03:GM03_2_1Core.Core.MD_DataIdentification', NAMESPACES)) abstract = etree.SubElement(data_ident, util.nspath_eval('gm03:abstract', NAMESPACES)) abstract.append(_get_pt_freetext(val, language)) # resource language val = util.getqattr(result, context.md_core_model['mappings']['pycsw:ResourceLanguage']) if val: topicategory = etree.SubElement(data_ident, util.nspath_eval('gm03:language', NAMESPACES)) cat_code = etree.SubElement(topicategory, util.nspath_eval('gm03:CodeISO.LanguageCodeISO_', NAMESPACES)) etree.SubElement(cat_code, util.nspath_eval('gm03:value', NAMESPACES)).text = val # topic category val = util.getqattr(result, context.md_core_model['mappings']['pycsw:TopicCategory']) if val: topicategory = etree.SubElement(data_ident, util.nspath_eval('gm03:topicCategory', NAMESPACES)) cat_code = etree.SubElement(topicategory, util.nspath_eval('gm03:GM03_2_1Core.Core.MD_TopicCategoryCode_', NAMESPACES)) etree.SubElement(cat_code, util.nspath_eval('gm03:value', NAMESPACES)).text = val # keywords keywords_val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Keywords']) if keywords_val: md_keywords = etree.SubElement(core, util.nspath_eval('gm03:GM03_2_1Core.Core.MD_Keywords', NAMESPACES)) val = util.getqattr(result, context.md_core_model['mappings']['pycsw:KeywordType']) if val: etree.SubElement(md_keywords, util.nspath_eval('gm03:type', NAMESPACES)).text = val keyword = etree.SubElement(md_keywords, util.nspath_eval('gm03:keyword', NAMESPACES)) for kw in keywords_val.split(','): keyword.append(_get_pt_freetext(kw, language)) # format val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Format']) if val: md_format = etree.SubElement(core, util.nspath_eval('gm03:GM03_2_1Core.Core.MD_Format', NAMESPACES)) etree.SubElement(md_format, util.nspath_eval('gm03:name', NAMESPACES)).text = val # creation date val = util.getqattr(result, context.md_core_model['mappings']['pycsw:CreationDate']) if val: ci_date = etree.SubElement(core, util.nspath_eval('gm03:GM03_2_1Core.Core.CI_Date', NAMESPACES)) etree.SubElement(ci_date, util.nspath_eval('gm03:date', NAMESPACES)).text = val etree.SubElement(ci_date, util.nspath_eval('gm03:dateType', NAMESPACES)).text = 'creation' # revision date val = util.getqattr(result, context.md_core_model['mappings']['pycsw:RevisionDate']) if val: ci_date = etree.SubElement(core, util.nspath_eval('gm03:GM03_2_1Core.Core.CI_Date', NAMESPACES)) etree.SubElement(ci_date, util.nspath_eval('gm03:date', NAMESPACES)).text = val etree.SubElement(ci_date, util.nspath_eval('gm03:dateType', NAMESPACES)).text = 'revision' # publication date val = util.getqattr(result, context.md_core_model['mappings']['pycsw:PublicationDate']) if val: ci_date = etree.SubElement(core, util.nspath_eval('gm03:GM03_2_1Core.Core.CI_Date', NAMESPACES)) etree.SubElement(ci_date, util.nspath_eval('gm03:date', NAMESPACES)).text = val etree.SubElement(ci_date, util.nspath_eval('gm03:dateType', NAMESPACES)).text = 'publication' # bbox extent val = util.getqattr(result, context.md_core_model['mappings']['pycsw:BoundingBox']) bboxel = write_extent(val, context.namespaces) if bboxel is not None: core.append(bboxel) # geographic description val = util.getqattr(result, context.md_core_model['mappings']['pycsw:GeographicDescriptionCode']) if val: geo_desc = etree.SubElement(core, util.nspath_eval('gm03:GM03_2_1Core.Core.EX_GeographicDescription', NAMESPACES)) etree.SubElement(geo_desc, util.nspath_eval('gm03:geographicIdentifier', NAMESPACES)).text = val # crs val = util.getqattr(result, context.md_core_model['mappings']['pycsw:CRS']) if val: rs_identifier = etree.SubElement(core, util.nspath_eval('gm03:GM03_2_1Core.Core.RS_Identifier', NAMESPACES)) rs_code = etree.SubElement(rs_identifier, util.nspath_eval('gm03:code', NAMESPACES)) rs_code.append(_get_pt_freetext(val, language)) # temporal extent time_begin = util.getqattr(result, context.md_core_model['mappings']['pycsw:TempExtent_begin']) time_end = util.getqattr(result, context.md_core_model['mappings']['pycsw:TempExtent_end']) if time_begin: temp_ext = etree.SubElement(core, util.nspath_eval('gm03:GM03_2_1Core.Core.EX_TemporalExtent', NAMESPACES)) extent = etree.SubElement(temp_ext, util.nspath_eval('gm03:extent', NAMESPACES)) tm_primitive = etree.SubElement(extent, util.nspath_eval('gm03:GM03_2_1Core.Core.TM_Primitive', NAMESPACES)) etree.SubElement(tm_primitive, util.nspath_eval('gm03:begin', NAMESPACES)).text = time_begin if time_end: etree.SubElement(tm_primitive, util.nspath_eval('gm03:end', NAMESPACES)).text = time_end # links rlinks = util.getqattr(result, context.md_core_model['mappings']['pycsw:Links']) if rlinks: for link in rlinks.split('^'): name, description, protocol, url = link.split(',') online_resource = etree.SubElement(core, util.nspath_eval('gm03:GM03_2_1Core.Core.OnlineResource', NAMESPACES)) if protocol: etree.SubElement(online_resource, util.nspath_eval('gm03:protocol', NAMESPACES)).text = protocol if description: desc = etree.SubElement(online_resource, util.nspath_eval('gm03:description', NAMESPACES)) desc.append(_get_pt_freetext(description, language)) if name: name_el = etree.SubElement(online_resource, util.nspath_eval('gm03:name', NAMESPACES)) name_el.append(_get_pt_freetext(name, language)) linkage = etree.SubElement(online_resource, util.nspath_eval('gm03:linkage', NAMESPACES)) linkage.append(_get_pt_freeurl(url, language)) return node
def write_record(result, esn, context, url=None): ''' Return csw:SearchResults child as lxml.etree.Element ''' typename = util.getqattr( result, context.md_core_model['mappings']['pycsw:Typename']) if typename == 'gm03:TRANSFER': # dump record as is and exit # TODO: provide brief and summary elementsetname's return etree.fromstring( util.getqattr(result, context.md_core_model['mappings']['pycsw:XML']), context.parser) node = etree.Element(util.nspath_eval('gm03:TRANSFER', NAMESPACES), nsmap=NAMESPACES) header = etree.SubElement( node, util.nspath_eval('gm03:HEADERSECTION', NAMESPACES)) header.attrib['version'] = '2.3' header.attrib['sender'] = 'pycsw' etree.SubElement(header, util.nspath_eval('gm03:MODELS', NAMESPACES)) data = etree.SubElement(node, util.nspath_eval('gm03:DATASECTION', NAMESPACES)) core = etree.SubElement( data, util.nspath_eval('gm03:GM03_2_1Core.Core', NAMESPACES)) core_meta = etree.SubElement( core, util.nspath_eval('gm03:GM03_2_1Core.Core.MD_Metadata', NAMESPACES)) val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Identifier']) etree.SubElement(core_meta, util.nspath_eval('gm03:fileIdentifier', NAMESPACES)).text = val language = util.getqattr( result, context.md_core_model['mappings']['pycsw:Language']) etree.SubElement(core_meta, util.nspath_eval('gm03:language', NAMESPACES)).text = language val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Modified']) etree.SubElement(core_meta, util.nspath_eval('gm03:dateStamp', NAMESPACES)).text = val hierarchy_level_val = util.getqattr( result, context.md_core_model['mappings']['pycsw:Type']) # metadata standard name standard = etree.SubElement( core_meta, util.nspath_eval('gm03:metadataStandardName', NAMESPACES)).text = 'GM03' # metadata standard version standardver = etree.SubElement( core_meta, util.nspath_eval('gm03:metadataStandardVersion', NAMESPACES)).text = '2.3' # hierarchy level hierarchy_level = etree.SubElement( core_meta, util.nspath_eval('gm03:hierarchyLevel', NAMESPACES)) scope_code = etree.SubElement( hierarchy_level, util.nspath_eval('gm03:GM03_2_1Core.Core.MD_ScopeCode_', NAMESPACES)) etree.SubElement(scope_code, util.nspath_eval('gm03:value', NAMESPACES)).text = hierarchy_level_val # parent identifier val = util.getqattr( result, context.md_core_model['mappings']['pycsw:ParentIdentifier']) parent_identifier = etree.SubElement( core_meta, util.nspath_eval('gm03:parentIdentifier', NAMESPACES)) scope_code = etree.SubElement( parent_identifier, util.nspath_eval('gm03:GM03_2_1Core.Core.MD_ScopeCode_', NAMESPACES)) etree.SubElement(scope_code, util.nspath_eval('gm03:value', NAMESPACES)).text = val # title val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Title']) citation = etree.SubElement( core, util.nspath_eval('gm03:GM03_2_1Core.Core.CI_Citation', NAMESPACES)) title = etree.SubElement(citation, util.nspath_eval('gm03:title', NAMESPACES)) title.append(_get_pt_freetext(val, language)) # abstract val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Abstract']) data_ident = etree.SubElement( core, util.nspath_eval('gm03:GM03_2_1Core.Core.MD_DataIdentification', NAMESPACES)) abstract = etree.SubElement(data_ident, util.nspath_eval('gm03:abstract', NAMESPACES)) abstract.append(_get_pt_freetext(val, language)) # resource language val = util.getqattr( result, context.md_core_model['mappings']['pycsw:ResourceLanguage']) if val: topicategory = etree.SubElement( data_ident, util.nspath_eval('gm03:language', NAMESPACES)) cat_code = etree.SubElement( topicategory, util.nspath_eval('gm03:CodeISO.LanguageCodeISO_', NAMESPACES)) etree.SubElement(cat_code, util.nspath_eval('gm03:value', NAMESPACES)).text = val # topic category val = util.getqattr( result, context.md_core_model['mappings']['pycsw:TopicCategory']) if val: topicategory = etree.SubElement( data_ident, util.nspath_eval('gm03:topicCategory', NAMESPACES)) cat_code = etree.SubElement( topicategory, util.nspath_eval('gm03:GM03_2_1Core.Core.MD_TopicCategoryCode_', NAMESPACES)) etree.SubElement(cat_code, util.nspath_eval('gm03:value', NAMESPACES)).text = val # keywords keywords_val = util.getqattr( result, context.md_core_model['mappings']['pycsw:Keywords']) if keywords_val: md_keywords = etree.SubElement( core, util.nspath_eval('gm03:GM03_2_1Core.Core.MD_Keywords', NAMESPACES)) val = util.getqattr( result, context.md_core_model['mappings']['pycsw:KeywordType']) if val: etree.SubElement(md_keywords, util.nspath_eval('gm03:type', NAMESPACES)).text = val keyword = etree.SubElement( md_keywords, util.nspath_eval('gm03:keyword', NAMESPACES)) for kw in keywords_val.split(','): keyword.append(_get_pt_freetext(kw, language)) # format val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Format']) if val: md_format = etree.SubElement( core, util.nspath_eval('gm03:GM03_2_1Core.Core.MD_Format', NAMESPACES)) etree.SubElement(md_format, util.nspath_eval('gm03:name', NAMESPACES)).text = val # creation date val = util.getqattr( result, context.md_core_model['mappings']['pycsw:CreationDate']) if val: ci_date = etree.SubElement( core, util.nspath_eval('gm03:GM03_2_1Core.Core.CI_Date', NAMESPACES)) etree.SubElement(ci_date, util.nspath_eval('gm03:date', NAMESPACES)).text = val etree.SubElement(ci_date, util.nspath_eval('gm03:dateType', NAMESPACES)).text = 'creation' # revision date val = util.getqattr( result, context.md_core_model['mappings']['pycsw:RevisionDate']) if val: ci_date = etree.SubElement( core, util.nspath_eval('gm03:GM03_2_1Core.Core.CI_Date', NAMESPACES)) etree.SubElement(ci_date, util.nspath_eval('gm03:date', NAMESPACES)).text = val etree.SubElement(ci_date, util.nspath_eval('gm03:dateType', NAMESPACES)).text = 'revision' # publication date val = util.getqattr( result, context.md_core_model['mappings']['pycsw:PublicationDate']) if val: ci_date = etree.SubElement( core, util.nspath_eval('gm03:GM03_2_1Core.Core.CI_Date', NAMESPACES)) etree.SubElement(ci_date, util.nspath_eval('gm03:date', NAMESPACES)).text = val etree.SubElement(ci_date, util.nspath_eval('gm03:dateType', NAMESPACES)).text = 'publication' # bbox extent val = util.getqattr(result, context.md_core_model['mappings']['pycsw:BoundingBox']) bboxel = write_extent(val, context.namespaces) if bboxel is not None: core.append(bboxel) # geographic description val = util.getqattr( result, context.md_core_model['mappings']['pycsw:GeographicDescriptionCode']) if val: geo_desc = etree.SubElement( core, util.nspath_eval('gm03:GM03_2_1Core.Core.EX_GeographicDescription', NAMESPACES)) etree.SubElement( geo_desc, util.nspath_eval('gm03:geographicIdentifier', NAMESPACES)).text = val # crs val = util.getqattr(result, context.md_core_model['mappings']['pycsw:CRS']) if val: rs_identifier = etree.SubElement( core, util.nspath_eval('gm03:GM03_2_1Core.Core.RS_Identifier', NAMESPACES)) rs_code = etree.SubElement(rs_identifier, util.nspath_eval('gm03:code', NAMESPACES)) rs_code.append(_get_pt_freetext(val, language)) # temporal extent time_begin = util.getqattr( result, context.md_core_model['mappings']['pycsw:TempExtent_begin']) time_end = util.getqattr( result, context.md_core_model['mappings']['pycsw:TempExtent_end']) if time_begin: temp_ext = etree.SubElement( core, util.nspath_eval('gm03:GM03_2_1Core.Core.EX_TemporalExtent', NAMESPACES)) extent = etree.SubElement(temp_ext, util.nspath_eval('gm03:extent', NAMESPACES)) tm_primitive = etree.SubElement( extent, util.nspath_eval('gm03:GM03_2_1Core.Core.TM_Primitive', NAMESPACES)) etree.SubElement(tm_primitive, util.nspath_eval('gm03:begin', NAMESPACES)).text = time_begin if time_end: etree.SubElement(tm_primitive, util.nspath_eval('gm03:end', NAMESPACES)).text = time_end # links rlinks = util.getqattr(result, context.md_core_model['mappings']['pycsw:Links']) if rlinks: for link in util.jsonify_links(rlinks): online_resource = etree.SubElement( core, util.nspath_eval('gm03:GM03_2_1Core.Core.OnlineResource', NAMESPACES)) if link['protocol']: etree.SubElement( online_resource, util.nspath_eval('gm03:protocol', NAMESPACES)).text = link['protocol'] if link['description']: desc = etree.SubElement( online_resource, util.nspath_eval('gm03:description', NAMESPACES)) desc.append(_get_pt_freetext(link['description'], language)) if link['name']: name_el = etree.SubElement( online_resource, util.nspath_eval('gm03:name', NAMESPACES)) name_el.append(_get_pt_freetext(link['name'], language)) linkage = etree.SubElement( online_resource, util.nspath_eval('gm03:linkage', NAMESPACES)) linkage.append(_get_pt_freeurl(link['url'], language)) return node
def write_record(result, esn, context, url=None): ''' Return csw:SearchResults child as lxml.etree.Element ''' typename = util.getqattr(result, context.md_core_model['mappings']['pycsw:Typename']) if esn == 'full' and typename == 'dif:DIF': # dump record as is and exit return etree.fromstring(util.getqattr(result, context.md_core_model['mappings']['pycsw:XML']), context.parser) node = etree.Element(util.nspath_eval('dif:DIF', NAMESPACES)) node.attrib[util.nspath_eval('xsi:schemaLocation', context.namespaces)] = \ '%s http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/dif.xsd' % NAMESPACE # identifier etree.SubElement(node, util.nspath_eval('dif:Entry_ID', NAMESPACES)).text = util.getqattr(result, context.md_core_model['mappings']['pycsw:Identifier']) # title val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Title']) if not val: val = '' etree.SubElement(node, util.nspath_eval('dif:Entry_Title', NAMESPACES)).text = val # citation citation = etree.SubElement(node, util.nspath_eval('dif:Data_Set_Citation', NAMESPACES)) # creator val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Creator']) etree.SubElement(citation, util.nspath_eval('dif:Dataset_Creator', NAMESPACES)).text = val # date val = util.getqattr(result, context.md_core_model['mappings']['pycsw:PublicationDate']) etree.SubElement(citation, util.nspath_eval('dif:Dataset_Release_Date', NAMESPACES)).text = val # publisher val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Publisher']) etree.SubElement(citation, util.nspath_eval('dif:Dataset_Publisher', NAMESPACES)).text = val # format val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Format']) etree.SubElement(citation, util.nspath_eval('dif:Data_Presentation_Form', NAMESPACES)).text = val # iso topic category val = util.getqattr(result, context.md_core_model['mappings']['pycsw:TopicCategory']) etree.SubElement(node, util.nspath_eval('dif:ISO_Topic_Category', NAMESPACES)).text = val # keywords val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Keywords']) if val: for kw in val.split(','): etree.SubElement(node, util.nspath_eval('dif:Keyword', NAMESPACES)).text = kw # temporal temporal = etree.SubElement(node, util.nspath_eval('dif:Temporal_Coverage', NAMESPACES)) val = util.getqattr(result, context.md_core_model['mappings']['pycsw:TempExtent_begin']) val2 = util.getqattr(result, context.md_core_model['mappings']['pycsw:TempExtent_end']) etree.SubElement(temporal, util.nspath_eval('dif:Start_Date', NAMESPACES)).text = val etree.SubElement(temporal, util.nspath_eval('dif:End_Date', NAMESPACES)).text = val2 # bbox extent val = util.getqattr(result, context.md_core_model['mappings']['pycsw:BoundingBox']) bboxel = write_extent(val, NAMESPACES) if bboxel is not None: node.append(bboxel) # access constraints val = util.getqattr(result, context.md_core_model['mappings']['pycsw:AccessConstraints']) etree.SubElement(node, util.nspath_eval('dif:Access_Constraints', NAMESPACES)).text = val # language val = util.getqattr(result, context.md_core_model['mappings']['pycsw:ResourceLanguage']) etree.SubElement(node, util.nspath_eval('dif:Data_Set_Language', NAMESPACES)).text = val # contributor val = util.getqattr(result, context.md_core_model['mappings']['pycsw:OrganizationName']) etree.SubElement(node, util.nspath_eval('dif:Originating_Center', NAMESPACES)).text = val # abstract val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Abstract']) if not val: val = '' etree.SubElement(node, util.nspath_eval('dif:Summary', NAMESPACES)).text = val # date val = util.getqattr(result, context.md_core_model['mappings']['pycsw:CreationDate']) etree.SubElement(node, util.nspath_eval('dif:DIF_Creation_Date', NAMESPACES)).text = val # URL val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Relation']) url = etree.SubElement(node, util.nspath_eval('dif:Related_URL', NAMESPACES)) etree.SubElement(url, util.nspath_eval('dif:URL', NAMESPACES)).text = val rlinks = util.getqattr(result, context.md_core_model['mappings']['pycsw:Links']) if rlinks: for link in rlinks.split('^'): linkset = link.split(',') url2 = etree.SubElement(node, util.nspath_eval('dif:Related_URL', NAMESPACES)) urltype = etree.SubElement(url2, util.nspath_eval('dif:URL_Content_Type', NAMESPACES)) etree.SubElement(urltype, util.nspath_eval('dif:Type', NAMESPACES)).text = linkset[2] etree.SubElement(url2, util.nspath_eval('dif:URL', NAMESPACES)).text = linkset[-1] etree.SubElement(url2, util.nspath_eval('dif:Description', NAMESPACES)).text = linkset[1] etree.SubElement(node, util.nspath_eval('dif:Metadata_Name', NAMESPACES)).text = 'CEOS IDN DIF' etree.SubElement(node, util.nspath_eval('dif:Metadata_Version', NAMESPACES)).text = '9.7' return node
def write_record(self, result, esn, outputschema, queryables, caps=None): ''' Return csw:SearchResults child as lxml.etree.Element ''' typename = util.getqattr(result, self.context.md_core_model['mappings']['pycsw:Typename']) is_iso_anyway = False xml_blob = util.getqattr(result, self.context.md_core_model['mappings']['pycsw:XML']) if caps is None and xml_blob is not None and xml_blob.startswith('<gmd:MD_Metadata'): is_iso_anyway = True if (esn == 'full' and (typename == 'gmd:MD_Metadata' or is_iso_anyway)): # dump record as is and exit return etree.fromstring(xml_blob, self.context.parser) if typename == 'csw:Record': # transform csw:Record -> gmd:MD_Metadata model mappings util.transform_mappings(queryables, self.repository['mappings']['csw:Record']) node = etree.Element(util.nspath_eval('gmd:MD_Metadata', self.namespaces)) node.attrib[util.nspath_eval('xsi:schemaLocation', self.context.namespaces)] = \ '%s %s/csw/2.0.2/profiles/apiso/1.0.0/apiso.xsd' % (self.namespace, self.ogc_schemas_base) # identifier idval = util.getqattr(result, self.context.md_core_model['mappings']['pycsw:Identifier']) identifier = etree.SubElement(node, util.nspath_eval('gmd:fileIdentifier', self.namespaces)) etree.SubElement(identifier, util.nspath_eval('gco:CharacterString', self.namespaces)).text = idval if esn in ['summary', 'full']: # language val = util.getqattr(result, queryables['apiso:Language']['dbcol']) lang = etree.SubElement(node, util.nspath_eval('gmd:language', self.namespaces)) etree.SubElement(lang, util.nspath_eval('gco:CharacterString', self.namespaces)).text = val # hierarchyLevel mtype = util.getqattr(result, queryables['apiso:Type']['dbcol']) or None if mtype is not None: if mtype == 'http://purl.org/dc/dcmitype/Dataset': mtype = 'dataset' hierarchy = etree.SubElement(node, util.nspath_eval('gmd:hierarchyLevel', self.namespaces)) hierarchy.append(_write_codelist_element('gmd:MD_ScopeCode', mtype, self.namespaces)) if esn in ['summary', 'full']: # contact contact = etree.SubElement(node, util.nspath_eval('gmd:contact', self.namespaces)) if caps is not None: CI_resp = etree.SubElement(contact, util.nspath_eval('gmd:CI_ResponsibleParty', self.namespaces)) if hasattr(caps.provider.contact, 'name'): ind_name = etree.SubElement(CI_resp, util.nspath_eval('gmd:individualName', self.namespaces)) etree.SubElement(ind_name, util.nspath_eval('gco:CharacterString', self.namespaces)).text = caps.provider.contact.name if hasattr(caps.provider.contact, 'organization'): if caps.provider.contact.organization is not None: org_val = caps.provider.contact.organization else: org_val = caps.provider.name org_name = etree.SubElement(CI_resp, util.nspath_eval('gmd:organisationName', self.namespaces)) etree.SubElement(org_name, util.nspath_eval('gco:CharacterString', self.namespaces)).text = org_val if hasattr(caps.provider.contact, 'position'): pos_name = etree.SubElement(CI_resp, util.nspath_eval('gmd:positionName', self.namespaces)) etree.SubElement(pos_name, util.nspath_eval('gco:CharacterString', self.namespaces)).text = caps.provider.contact.position contact_info = etree.SubElement(CI_resp, util.nspath_eval('gmd:contactInfo', self.namespaces)) ci_contact = etree.SubElement(contact_info, util.nspath_eval('gmd:CI_Contact', self.namespaces)) if hasattr(caps.provider.contact, 'phone'): phone = etree.SubElement(ci_contact, util.nspath_eval('gmd:phone', self.namespaces)) ci_phone = etree.SubElement(phone, util.nspath_eval('gmd:CI_Telephone', self.namespaces)) voice = etree.SubElement(ci_phone, util.nspath_eval('gmd:voice', self.namespaces)) etree.SubElement(voice, util.nspath_eval('gco:CharacterString', self.namespaces)).text = caps.provider.contact.phone if hasattr(caps.provider.contact, 'fax'): fax = etree.SubElement(ci_phone, util.nspath_eval('gmd:facsimile', self.namespaces)) etree.SubElement(fax, util.nspath_eval('gco:CharacterString', self.namespaces)).text = caps.provider.contact.fax address = etree.SubElement(ci_contact, util.nspath_eval('gmd:address', self.namespaces)) ci_address = etree.SubElement(address, util.nspath_eval('gmd:CI_Address', self.namespaces)) if hasattr(caps.provider.contact, 'address'): delivery_point = etree.SubElement(ci_address, util.nspath_eval('gmd:deliveryPoint', self.namespaces)) etree.SubElement(delivery_point, util.nspath_eval('gco:CharacterString', self.namespaces)).text = caps.provider.contact.address if hasattr(caps.provider.contact, 'city'): city = etree.SubElement(ci_address, util.nspath_eval('gmd:city', self.namespaces)) etree.SubElement(city, util.nspath_eval('gco:CharacterString', self.namespaces)).text = caps.provider.contact.city if hasattr(caps.provider.contact, 'region'): admin_area = etree.SubElement(ci_address, util.nspath_eval('gmd:administrativeArea', self.namespaces)) etree.SubElement(admin_area, util.nspath_eval('gco:CharacterString', self.namespaces)).text = caps.provider.contact.region if hasattr(caps.provider.contact, 'postcode'): postal_code = etree.SubElement(ci_address, util.nspath_eval('gmd:postalCode', self.namespaces)) etree.SubElement(postal_code, util.nspath_eval('gco:CharacterString', self.namespaces)).text = caps.provider.contact.postcode if hasattr(caps.provider.contact, 'country'): country = etree.SubElement(ci_address, util.nspath_eval('gmd:country', self.namespaces)) etree.SubElement(country, util.nspath_eval('gco:CharacterString', self.namespaces)).text = caps.provider.contact.country if hasattr(caps.provider.contact, 'email'): email = etree.SubElement(ci_address, util.nspath_eval('gmd:electronicMailAddress', self.namespaces)) etree.SubElement(email, util.nspath_eval('gco:CharacterString', self.namespaces)).text = caps.provider.contact.email contact_url = None if hasattr(caps.provider, 'url'): contact_url = caps.provider.url if hasattr(caps.provider.contact, 'url') and caps.provider.contact.url is not None: contact_url = caps.provider.contact.url if contact_url is not None: online_resource = etree.SubElement(ci_contact, util.nspath_eval('gmd:onlineResource', self.namespaces)) gmd_linkage = etree.SubElement(online_resource, util.nspath_eval('gmd:linkage', self.namespaces)) etree.SubElement(gmd_linkage, util.nspath_eval('gmd:URL', self.namespaces)).text = contact_url if hasattr(caps.provider.contact, 'role'): role = etree.SubElement(CI_resp, util.nspath_eval('gmd:role', self.namespaces)) role_val = caps.provider.contact.role if role_val is None: role_val = 'pointOfContact' etree.SubElement(role, util.nspath_eval('gmd:CI_RoleCode', self.namespaces), codeListValue=role_val, codeList='%s#CI_RoleCode' % CODELIST).text = role_val else: val = util.getqattr(result, queryables['apiso:OrganisationName']['dbcol']) if val: CI_resp = etree.SubElement(contact, util.nspath_eval('gmd:CI_ResponsibleParty', self.namespaces)) org_name = etree.SubElement(CI_resp, util.nspath_eval('gmd:organisationName', self.namespaces)) etree.SubElement(org_name, util.nspath_eval('gco:CharacterString', self.namespaces)).text = val # date val = util.getqattr(result, queryables['apiso:Modified']['dbcol']) date = etree.SubElement(node, util.nspath_eval('gmd:dateStamp', self.namespaces)) if val and val.find('T') != -1: dateel = 'gco:DateTime' else: dateel = 'gco:Date' etree.SubElement(date, util.nspath_eval(dateel, self.namespaces)).text = val metadatastandardname = 'ISO19115' metadatastandardversion = '2003/Cor.1:2006' if mtype == 'service': metadatastandardname = 'ISO19119' metadatastandardversion = '2005/PDAM 1' # metadata standard name standard = etree.SubElement(node, util.nspath_eval('gmd:metadataStandardName', self.namespaces)) etree.SubElement(standard, util.nspath_eval('gco:CharacterString', self.namespaces)).text = metadatastandardname # metadata standard version standardver = etree.SubElement(node, util.nspath_eval('gmd:metadataStandardVersion', self.namespaces)) etree.SubElement(standardver, util.nspath_eval('gco:CharacterString', self.namespaces)).text = metadatastandardversion # title val = util.getqattr(result, queryables['apiso:Title']['dbcol']) or '' identification = etree.SubElement(node, util.nspath_eval('gmd:identificationInfo', self.namespaces)) if mtype == 'service': restagname = 'srv:SV_ServiceIdentification' else: restagname = 'gmd:MD_DataIdentification' resident = etree.SubElement(identification, util.nspath_eval(restagname, self.namespaces), id=idval) tmp2 = etree.SubElement(resident, util.nspath_eval('gmd:citation', self.namespaces)) tmp3 = etree.SubElement(tmp2, util.nspath_eval('gmd:CI_Citation', self.namespaces)) tmp4 = etree.SubElement(tmp3, util.nspath_eval('gmd:title', self.namespaces)) etree.SubElement(tmp4, util.nspath_eval('gco:CharacterString', self.namespaces)).text = val # creation date val = util.getqattr(result, queryables['apiso:CreationDate']['dbcol']) if val is not None: tmp3.append(_write_date(val, 'creation', self.namespaces)) # publication date val = util.getqattr(result, queryables['apiso:PublicationDate']['dbcol']) if val is not None: tmp3.append(_write_date(val, 'publication', self.namespaces)) # revision date val = util.getqattr(result, queryables['apiso:RevisionDate']['dbcol']) if val is not None: tmp3.append(_write_date(val, 'revision', self.namespaces)) if esn in ['summary', 'full']: # abstract val = util.getqattr(result, queryables['apiso:Abstract']['dbcol']) or '' tmp = etree.SubElement(resident, util.nspath_eval('gmd:abstract', self.namespaces)) etree.SubElement(tmp, util.nspath_eval('gco:CharacterString', self.namespaces)).text = val # keywords kw = util.getqattr(result, queryables['apiso:Subject']['dbcol']) if kw is not None: md_keywords = etree.SubElement(resident, util.nspath_eval('gmd:descriptiveKeywords', self.namespaces)) md_keywords.append(write_keywords(kw, self.namespaces)) # spatial resolution val = util.getqattr(result, queryables['apiso:Denominator']['dbcol']) if val: tmp = etree.SubElement(resident, util.nspath_eval('gmd:spatialResolution', self.namespaces)) tmp2 = etree.SubElement(tmp, util.nspath_eval('gmd:MD_Resolution', self.namespaces)) tmp3 = etree.SubElement(tmp2, util.nspath_eval('gmd:equivalentScale', self.namespaces)) tmp4 = etree.SubElement(tmp3, util.nspath_eval('gmd:MD_RepresentativeFraction', self.namespaces)) tmp5 = etree.SubElement(tmp4, util.nspath_eval('gmd:denominator', self.namespaces)) etree.SubElement(tmp5, util.nspath_eval('gco:Integer', self.namespaces)).text = str(val) # resource language val = util.getqattr(result, queryables['apiso:ResourceLanguage']['dbcol']) tmp = etree.SubElement(resident, util.nspath_eval('gmd:language', self.namespaces)) etree.SubElement(tmp, util.nspath_eval('gco:CharacterString', self.namespaces)).text = val # topic category val = util.getqattr(result, queryables['apiso:TopicCategory']['dbcol']) if val: for v in val.split(','): tmp = etree.SubElement(resident, util.nspath_eval('gmd:topicCategory', self.namespaces)) etree.SubElement(tmp, util.nspath_eval('gmd:MD_TopicCategoryCode', self.namespaces)).text = val # bbox extent val = util.getqattr(result, queryables['apiso:BoundingBox']['dbcol']) bboxel = write_extent(val, self.namespaces) if bboxel is not None and mtype != 'service': resident.append(bboxel) # service identification if mtype == 'service': # service type # service type version val = util.getqattr(result, queryables['apiso:ServiceType']['dbcol']) val2 = util.getqattr(result, queryables['apiso:ServiceTypeVersion']['dbcol']) if val is not None: tmp = etree.SubElement(resident, util.nspath_eval('srv:serviceType', self.namespaces)) etree.SubElement(tmp, util.nspath_eval('gco:LocalName', self.namespaces)).text = val tmp = etree.SubElement(resident, util.nspath_eval('srv:serviceTypeVersion', self.namespaces)) etree.SubElement(tmp, util.nspath_eval('gco:CharacterString', self.namespaces)).text = val2 kw = util.getqattr(result, queryables['apiso:Subject']['dbcol']) if kw is not None: srv_keywords = etree.SubElement(resident, util.nspath_eval('srv:keywords', self.namespaces)) srv_keywords.append(write_keywords(kw, self.namespaces)) if bboxel is not None: bboxel.tag = util.nspath_eval('srv:extent', self.namespaces) resident.append(bboxel) val = util.getqattr(result, queryables['apiso:CouplingType']['dbcol']) if val is not None: couplingtype = etree.SubElement(resident, util.nspath_eval('srv:couplingType', self.namespaces)) etree.SubElement(couplingtype, util.nspath_eval('srv:SV_CouplingType', self.namespaces), codeListValue=val, codeList='%s#SV_CouplingType' % CODELIST).text = val if esn in ['summary', 'full']: # all service resources as coupled resources coupledresources = util.getqattr(result, queryables['apiso:OperatesOn']['dbcol']) operations = util.getqattr(result, queryables['apiso:Operation']['dbcol']) if coupledresources: for val2 in coupledresources.split(','): coupledres = etree.SubElement(resident, util.nspath_eval('srv:coupledResource', self.namespaces)) svcoupledres = etree.SubElement(coupledres, util.nspath_eval('srv:SV_CoupledResource', self.namespaces)) opname = etree.SubElement(svcoupledres, util.nspath_eval('srv:operationName', self.namespaces)) etree.SubElement(opname, util.nspath_eval('gco:CharacterString', self.namespaces)).text = _get_resource_opname(operations) sid = etree.SubElement(svcoupledres, util.nspath_eval('srv:identifier', self.namespaces)) etree.SubElement(sid, util.nspath_eval('gco:CharacterString', self.namespaces)).text = val2 # service operations if operations: for i in operations.split(','): oper = etree.SubElement(resident, util.nspath_eval('srv:containsOperations', self.namespaces)) tmp = etree.SubElement(oper, util.nspath_eval('srv:SV_OperationMetadata', self.namespaces)) tmp2 = etree.SubElement(tmp, util.nspath_eval('srv:operationName', self.namespaces)) etree.SubElement(tmp2, util.nspath_eval('gco:CharacterString', self.namespaces)).text = i tmp3 = etree.SubElement(tmp, util.nspath_eval('srv:DCP', self.namespaces)) etree.SubElement(tmp3, util.nspath_eval('srv:DCPList', self.namespaces), codeList='%s#DCPList' % CODELIST, codeListValue='HTTPGet').text = 'HTTPGet' tmp4 = etree.SubElement(tmp, util.nspath_eval('srv:DCP', self.namespaces)) etree.SubElement(tmp4, util.nspath_eval('srv:DCPList', self.namespaces), codeList='%s#DCPList' % CODELIST, codeListValue='HTTPPost').text = 'HTTPPost' connectpoint = etree.SubElement(tmp, util.nspath_eval('srv:connectPoint', self.namespaces)) onlineres = etree.SubElement(connectpoint, util.nspath_eval('gmd:CI_OnlineResource', self.namespaces)) linkage = etree.SubElement(onlineres, util.nspath_eval('gmd:linkage', self.namespaces)) etree.SubElement(linkage, util.nspath_eval('gmd:URL', self.namespaces)).text = util.getqattr(result, self.context.md_core_model['mappings']['pycsw:Source']) # operates on resource(s) if coupledresources: for i in coupledresources.split(','): operates_on = etree.SubElement(resident, util.nspath_eval('srv:operatesOn', self.namespaces), uuidref=i) operates_on.attrib[util.nspath_eval('xlink:href', self.namespaces)] = '%sservice=CSW&version=2.0.2&request=GetRecordById&outputschema=http://www.isotc211.org/2005/gmd&id=%s-%s' % (util.bind_url(self.url), idval, i) rlinks = util.getqattr(result, self.context.md_core_model['mappings']['pycsw:Links']) if rlinks: distinfo = etree.SubElement(node, util.nspath_eval('gmd:distributionInfo', self.namespaces)) distinfo2 = etree.SubElement(distinfo, util.nspath_eval('gmd:MD_Distribution', self.namespaces)) transopts = etree.SubElement(distinfo2, util.nspath_eval('gmd:transferOptions', self.namespaces)) dtransopts = etree.SubElement(transopts, util.nspath_eval('gmd:MD_DigitalTransferOptions', self.namespaces)) for link in rlinks.split('^'): linkset = link.split(',') online = etree.SubElement(dtransopts, util.nspath_eval('gmd:onLine', self.namespaces)) online2 = etree.SubElement(online, util.nspath_eval('gmd:CI_OnlineResource', self.namespaces)) linkage = etree.SubElement(online2, util.nspath_eval('gmd:linkage', self.namespaces)) etree.SubElement(linkage, util.nspath_eval('gmd:URL', self.namespaces)).text = linkset[-1] protocol = etree.SubElement(online2, util.nspath_eval('gmd:protocol', self.namespaces)) etree.SubElement(protocol, util.nspath_eval('gco:CharacterString', self.namespaces)).text = linkset[2] name = etree.SubElement(online2, util.nspath_eval('gmd:name', self.namespaces)) etree.SubElement(name, util.nspath_eval('gco:CharacterString', self.namespaces)).text = linkset[0] desc = etree.SubElement(online2, util.nspath_eval('gmd:description', self.namespaces)) etree.SubElement(desc, util.nspath_eval('gco:CharacterString', self.namespaces)).text = linkset[1] return node
def write_record(result, esn, context, url=None): ''' Return csw:SearchResults child as lxml.etree.Element ''' typename = util.getqattr( result, context.md_core_model['mappings']['pycsw:Typename']) if esn == 'full' and typename == 'dif:DIF': # dump record as is and exit return etree.fromstring( util.getqattr(result, context.md_core_model['mappings']['pycsw:XML']), context.parser) node = etree.Element(util.nspath_eval('dif:DIF', NAMESPACES)) node.attrib[util.nspath_eval('xsi:schemaLocation', context.namespaces)] = \ '%s http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/dif.xsd' % NAMESPACE # identifier etree.SubElement(node, util.nspath_eval( 'dif:Entry_ID', NAMESPACES)).text = util.getqattr( result, context.md_core_model['mappings']['pycsw:Identifier']) # title val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Title']) if not val: val = '' etree.SubElement(node, util.nspath_eval('dif:Entry_Title', NAMESPACES)).text = val # citation citation = etree.SubElement( node, util.nspath_eval('dif:Data_Set_Citation', NAMESPACES)) # creator val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Creator']) etree.SubElement(citation, util.nspath_eval('dif:Dataset_Creator', NAMESPACES)).text = val # date val = util.getqattr( result, context.md_core_model['mappings']['pycsw:PublicationDate']) etree.SubElement(citation, util.nspath_eval('dif:Dataset_Release_Date', NAMESPACES)).text = val # publisher val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Publisher']) etree.SubElement(citation, util.nspath_eval('dif:Dataset_Publisher', NAMESPACES)).text = val # format val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Format']) etree.SubElement( citation, util.nspath_eval('dif:Data_Presentation_Form', NAMESPACES)).text = val # keywords dif:Parameters val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Keywords']) if val: kws = val.split(',') parameters_indexes = [] for index, kw in enumerate(kws): if "Earth Science".lower() in kw.lower() and len( kw.split(">")) >= 2: values = kw.upper().split(">") parameters = etree.SubElement(node, util.nspath_eval( 'dif:Parameters', NAMESPACES)) # .text = kw etree.SubElement( parameters, util.nspath_eval( 'dif:Category', NAMESPACES)).text = values[0].strip().upper() etree.SubElement( parameters, util.nspath_eval( 'dif:Topic', NAMESPACES)).text = values[1].strip().upper() etree.SubElement( parameters, util.nspath_eval( 'dif:Term', NAMESPACES)).text = values[2].strip().upper() for i, v in enumerate(values[3:]): etree.SubElement( parameters, util.nspath_eval(f'dif:Variable_Level_{i + 1}', NAMESPACES)).text = v.strip() parameters_indexes.append(index) # kws.pop(index) # iso topic category val = util.getqattr( result, context.md_core_model['mappings']['pycsw:TopicCategory']) etree.SubElement(node, util.nspath_eval('dif:ISO_Topic_Category', NAMESPACES)).text = val # keywords dif:keywords val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Keywords']) if val: kws = val.split(',') kws = [i for j, i in enumerate(kws) if j not in parameters_indexes] for index, kw in enumerate(kws): etree.SubElement(node, util.nspath_eval('dif:Keyword', NAMESPACES)).text = kw.strip() # temporal temporal = etree.SubElement( node, util.nspath_eval('dif:Temporal_Coverage', NAMESPACES)) val = util.getqattr( result, context.md_core_model['mappings']['pycsw:TempExtent_begin']) val2 = util.getqattr( result, context.md_core_model['mappings']['pycsw:TempExtent_end']) etree.SubElement(temporal, util.nspath_eval('dif:Start_Date', NAMESPACES)).text = val etree.SubElement(temporal, util.nspath_eval('dif:End_Date', NAMESPACES)).text = val2 # bbox extent val = util.getqattr(result, context.md_core_model['mappings']['pycsw:BoundingBox']) bboxel = write_extent(val, NAMESPACES) if bboxel is not None: node.append(bboxel) # access constraints val = util.getqattr( result, context.md_core_model['mappings']['pycsw:AccessConstraints']) etree.SubElement(node, util.nspath_eval('dif:Access_Constraints', NAMESPACES)).text = val # language val = util.getqattr( result, context.md_core_model['mappings']['pycsw:ResourceLanguage']) etree.SubElement(node, util.nspath_eval('dif:Data_Set_Language', NAMESPACES)).text = val # contributor val = util.getqattr( result, context.md_core_model['mappings']['pycsw:OrganizationName']) etree.SubElement(node, util.nspath_eval('dif:Originating_Center', NAMESPACES)).text = val # abstract val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Abstract']) if not val: val = '' etree.SubElement(node, util.nspath_eval('dif:Summary', NAMESPACES)).text = val # URL val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Relation']) if val: url = etree.SubElement(node, util.nspath_eval('dif:Related_URL', NAMESPACES)) etree.SubElement(url, util.nspath_eval('dif:URL', NAMESPACES)).text = val rlinks = util.getqattr(result, context.md_core_model['mappings']['pycsw:Links']) if rlinks: for link in util.jsonify_links(rlinks): url2 = etree.SubElement( node, util.nspath_eval('dif:Related_URL', NAMESPACES)) urltype = etree.SubElement( url2, util.nspath_eval('dif:URL_Content_Type', NAMESPACES)) if link['protocol'] == 'download': etree.SubElement(urltype, util.nspath_eval( 'dif:Type', NAMESPACES)).text = 'GET DATA' elif link['protocol'] == 'OPENDAP:OPENDAP': etree.SubElement(urltype, util.nspath_eval( 'dif:Type', NAMESPACES)).text = 'GET DATA' etree.SubElement(urltype, util.nspath_eval( 'dif:Subtype', NAMESPACES)).text = 'OPENDAP DATA (DODS)' elif link['protocol'] == 'OGC:WMS': etree.SubElement( urltype, util.nspath_eval('dif:Type', NAMESPACES)).text = 'GET SERVICE' etree.SubElement( urltype, util.nspath_eval( 'dif:Subtype', NAMESPACES)).text = 'GET WEB MAP SERVICE (WMS)' else: etree.SubElement(urltype, util.nspath_eval( 'dif:Type', NAMESPACES)).text = 'GET DATA' etree.SubElement(url2, util.nspath_eval('dif:URL', NAMESPACES)).text = link['url'] if link['description']: etree.SubElement( url2, util.nspath_eval('dif:Description', NAMESPACES)).text = link['description'] val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Source']) if val: url2 = etree.SubElement( node, util.nspath_eval('dif:Related_URL', NAMESPACES)) urltype = etree.SubElement( url2, util.nspath_eval('dif:URL_Content_Type', NAMESPACES)) etree.SubElement(urltype, util.nspath_eval( 'dif:Type', NAMESPACES)).text = 'DATASET LANDING PAGE' etree.SubElement(url2, util.nspath_eval('dif:URL', NAMESPACES)).text = val etree.SubElement(node, util.nspath_eval('dif:Metadata_Name', NAMESPACES)).text = 'CEOS IDN DIF' etree.SubElement(node, util.nspath_eval('dif:Metadata_Version', NAMESPACES)).text = '9.7' # date val = util.getqattr( result, context.md_core_model['mappings']['pycsw:CreationDate']) etree.SubElement(node, util.nspath_eval('dif:DIF_Creation_Date', NAMESPACES)).text = val return node
def parse_record(context, record, repos=None, mtype='http://www.opengis.net/cat/csw/2.0.2', identifier=None, pagesize=10): ''' parse metadata ''' if identifier is None: identifier = uuid.uuid4().get_urn() # parse web services if (mtype == 'http://www.opengis.net/cat/csw/2.0.2' and isinstance(record, str) and record.startswith('http')): LOGGER.debug('CSW service detected, fetching via HTTP') # CSW service, not csw:Record try: return _parse_csw(context, repos, record, identifier, pagesize) except Exception as err: # TODO: implement better exception handling if str(err).find('ExceptionReport') != -1: msg = 'CSW harvesting error: %s' % str(err) LOGGER.debug(msg) raise RuntimeError(msg) LOGGER.debug('Not a CSW, attempting to fetch Dublin Core') try: content = util.http_request('GET', record) except Exception as err: raise RuntimeError('HTTP error: %s' % str(err)) return [_parse_dc(context, repos, etree.fromstring(content, context.parser))] elif mtype == 'urn:geoss:waf': # WAF LOGGER.debug('WAF detected, fetching via HTTP') return _parse_waf(context, repos, record, identifier) elif mtype == 'http://www.opengis.net/wms': # WMS LOGGER.debug('WMS detected, fetching via OWSLib') return _parse_wms(context, repos, record, identifier) elif mtype == 'http://www.opengis.net/wps/1.0.0': # WPS LOGGER.debug('WPS detected, fetching via OWSLib') return [_parse_wps(context, repos, record, identifier)] elif mtype == 'http://www.opengis.net/wfs': # WFS LOGGER.debug('WFS detected, fetching via OWSLib') return _parse_wfs(context, repos, record, identifier) elif mtype == 'http://www.opengis.net/wcs': # WCS LOGGER.debug('WCS detected, fetching via OWSLib') return _parse_wcs(context, repos, record, identifier) elif mtype == 'http://www.opengis.net/sos/1.0': # SOS 1.0.0 LOGGER.debug('SOS 1.0.0 detected, fetching via OWSLib') return _parse_sos(context, repos, record, identifier, '1.0.0') elif mtype == 'http://www.opengis.net/sos/2.0': # SOS 2.0.0 LOGGER.debug('SOS 2.0.0 detected, fetching via OWSLib') return _parse_sos(context, repos, record, identifier, '2.0.0') elif (mtype == 'http://www.opengis.net/cat/csw/csdgm' and record.startswith('http')): # FGDC LOGGER.debug('FGDC detected, fetching via HTTP') record = util.http_request('GET', record) return _parse_metadata(context, repos, record)
def write_record(self, result, esn, outputschema, queryables): ''' Return csw:SearchResults child as lxml.etree.Element ''' specialPycswKeys = [ constants.PYCSW_BOUNDING_BOX, constants.PYCSW_KEYWORDS, constants.PYCSW_LINKS ] specialDbcols = [queryables[x] for x in specialPycswKeys] typename = util.getqattr( result, self.context.md_core_model['mappings'][constants.PYCSW_TYPENAME]) if typename == self.typename: # dump record as is and exit return etree.fromstring( util.getqattr(result, queryables[constants.PYCSW_XML]), self.context.parser) else: dbcol2xpath = _get_dbcol_to_xpath_dict( self.repository['queryables']) record = etree.Element( util.nspath_eval(self.typename, self.namespaces)) # Sorted for consistency for dbcol in sorted(vars(result).keys()): value = util.getqattr(result, dbcol) if not dbcol.startswith('_') and value is not None: elementName = dbcol2xpath.get(dbcol, None) if elementName is not None: if dbcol not in specialDbcols: _build_xpath(record, elementName, self.context.namespaces, value) elif dbcol == queryables[constants.PYCSW_KEYWORDS]: for keyword in value.split(','): etree.SubElement( record, util.nspath_eval(elementName, self.context.namespaces) ).text = keyword elif dbcol == queryables[constants.PYCSW_LINKS]: for link in value.split('^'): linkComponents = link.split(',') scheme = linkComponents[2] uri = linkComponents[-1] etree.SubElement(record, util.nspath_eval( elementName, self.context.namespaces), scheme=scheme).text = uri elif dbcol == queryables[constants.PYCSW_BOUNDING_BOX]: bbox = write_boundingbox(value, self.context.namespaces) record.append(bbox) return record
def write_record(result, esn, context, url=None): ''' Return csw:SearchResults child as lxml.etree.Element ''' typename = util.getqattr(result, context.md_core_model['mappings']['pycsw:Typename']) if esn == 'full' and typename == 'atom:entry': # dump record as is and exit return etree.fromstring(util.getqattr(result, context.md_core_model['mappings']['pycsw:XML']), context.parser) node = etree.Element(util.nspath_eval('atom:entry', NAMESPACES), nsmap=NAMESPACES) node.attrib[util.nspath_eval('xsi:schemaLocation', context.namespaces)] = \ '%s http://www.kbcafe.com/rss/atom.xsd.xml' % NAMESPACES['atom'] # author val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Creator']) if val: author = etree.SubElement(node, util.nspath_eval('atom:author', NAMESPACES)) etree.SubElement(author, util.nspath_eval('atom:name', NAMESPACES)).text = val # category val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Keywords']) if val: for kw in val.split(','): etree.SubElement(node, util.nspath_eval('atom:category', NAMESPACES), term=kw) for qval in ['pycsw:Contributor', 'pycsw:Identifier']: val = util.getqattr(result, context.md_core_model['mappings'][qval]) if val: etree.SubElement(node, util.nspath_eval(XPATH_MAPPINGS[qval], NAMESPACES)).text = val if qval == 'pycsw:Identifier': etree.SubElement(node, util.nspath_eval('dc:identifier', context.namespaces)).text = val rlinks = util.getqattr(result, context.md_core_model['mappings']['pycsw:Links']) if rlinks: for link in rlinks.split('^'): linkset = link.split(',') url2 = etree.SubElement(node, util.nspath_eval('atom:link', NAMESPACES), href=linkset[-1], type=linkset[2], title=linkset[1]) etree.SubElement(node, util.nspath_eval('atom:link', NAMESPACES), href='%s?service=CSW&version=2.0.2&request=GetRepositoryItem&id=%s' % (url, util.getqattr(result, context.md_core_model['mappings']['pycsw:Identifier']))) # atom:title el = etree.SubElement(node, util.nspath_eval(XPATH_MAPPINGS['pycsw:Title'], NAMESPACES)) val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Title']) if val: el.text =val # atom:updated el = etree.SubElement(node, util.nspath_eval(XPATH_MAPPINGS['pycsw:Modified'], NAMESPACES)) val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Modified']) if val: el.text =val else: val = util.getqattr(result, context.md_core_model['mappings']['pycsw:InsertDate']) el.text = val for qval in ['pycsw:PublicationDate', 'pycsw:AccessConstraints', 'pycsw:Source', 'pycsw:Abstract']: val = util.getqattr(result, context.md_core_model['mappings'][qval]) if val: etree.SubElement(node, util.nspath_eval(XPATH_MAPPINGS[qval], NAMESPACES)).text = val # bbox extent val = util.getqattr(result, context.md_core_model['mappings']['pycsw:BoundingBox']) bboxel = write_extent(val, context.namespaces) if bboxel is not None: node.append(bboxel) return node
def write_record(result, esn, context, url=None): ''' Return csw:SearchResults child as lxml.etree.Element ''' typename = util.getqattr( result, context.md_core_model['mappings']['pycsw:Typename']) if esn == 'full' and typename == 'atom:entry': # dump record as is and exit return etree.fromstring( util.getqattr(result, context.md_core_model['mappings']['pycsw:XML']), context.parser) node = etree.Element(util.nspath_eval('atom:entry', NAMESPACES), nsmap=NAMESPACES) node.attrib[util.nspath_eval('xsi:schemaLocation', context.namespaces)] = \ '%s http://www.kbcafe.com/rss/atom.xsd.xml' % NAMESPACES['atom'] # author val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Creator']) if val: author = etree.SubElement(node, util.nspath_eval('atom:author', NAMESPACES)) etree.SubElement(author, util.nspath_eval('atom:name', NAMESPACES)).text = val # category val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Keywords']) if val: for kw in val.split(','): etree.SubElement(node, util.nspath_eval('atom:category', NAMESPACES), term=kw) for qval in ['pycsw:Contributor', 'pycsw:Identifier']: val = util.getqattr(result, context.md_core_model['mappings'][qval]) if val: etree.SubElement( node, util.nspath_eval(XPATH_MAPPINGS[qval], NAMESPACES)).text = val if qval == 'pycsw:Identifier': etree.SubElement( node, util.nspath_eval('dc:identifier', context.namespaces)).text = val rlinks = util.getqattr(result, context.md_core_model['mappings']['pycsw:Links']) if rlinks: for link in util.jsonify_links(rlinks): url2 = etree.SubElement(node, util.nspath_eval('atom:link', NAMESPACES), href=link['url']) if link['description']: url2.attrib['title'] = link['description'] if link['protocol']: if link['protocol'] == 'enclosure': url2.attrib['rel'] = link['protocol'] url2.attrib['type'] = 'application/octet-stream' else: url2.attrib['type'] = link['protocol'] etree.SubElement( node, util.nspath_eval('atom:link', NAMESPACES), href='%s?service=CSW&version=2.0.2&request=GetRepositoryItem&id=%s' % (url, util.getqattr(result, context.md_core_model['mappings']['pycsw:Identifier']))) # atom:title el = etree.SubElement( node, util.nspath_eval(XPATH_MAPPINGS['pycsw:Title'], NAMESPACES)) val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Title']) if val: el.text = val # atom:updated el = etree.SubElement( node, util.nspath_eval(XPATH_MAPPINGS['pycsw:Modified'], NAMESPACES)) val = util.getqattr(result, context.md_core_model['mappings']['pycsw:Modified']) if val: el.text = val else: val = util.getqattr( result, context.md_core_model['mappings']['pycsw:InsertDate']) el.text = val for qval in [ 'pycsw:PublicationDate', 'pycsw:AccessConstraints', 'pycsw:Source', 'pycsw:Abstract' ]: val = util.getqattr(result, context.md_core_model['mappings'][qval]) if val: etree.SubElement( node, util.nspath_eval(XPATH_MAPPINGS[qval], NAMESPACES)).text = val # bbox extent val = util.getqattr(result, context.md_core_model['mappings']['pycsw:BoundingBox']) bboxel = write_extent(val, context.namespaces) if bboxel is not None: node.append(bboxel) return node
def _parse_csw(context, repos, record, identifier, pagesize=10): from owslib.csw import CatalogueServiceWeb recobjs = [] # records serviceobj = repos.dataset() # if init raises error, this might not be a CSW md = CatalogueServiceWeb(record, timeout=60) LOGGER.debug('Setting CSW service metadata') # generate record of service instance _set(context, serviceobj, 'pycsw:Identifier', identifier) _set(context, serviceobj, 'pycsw:Typename', 'csw:Record') _set(context, serviceobj, 'pycsw:Schema', 'http://www.opengis.net/cat/csw/2.0.2') _set(context, serviceobj, 'pycsw:MdSource', record) _set(context, serviceobj, 'pycsw:InsertDate', util.get_today_and_now()) _set(context, serviceobj, 'pycsw:AnyText', util.get_anytext(md._exml)) _set(context, serviceobj, 'pycsw:Type', 'service') _set(context, serviceobj, 'pycsw:Title', md.identification.title) _set(context, serviceobj, 'pycsw:Abstract', md.identification.abstract) _set(context, serviceobj, 'pycsw:Keywords', ','.join(md.identification.keywords)) _set(context, serviceobj, 'pycsw:Creator', md.provider.contact.name) _set(context, serviceobj, 'pycsw:Publisher', md.provider.name) _set(context, serviceobj, 'pycsw:Contributor', md.provider.contact.name) _set(context, serviceobj, 'pycsw:OrganizationName', md.provider.contact.name) _set(context, serviceobj, 'pycsw:AccessConstraints', md.identification.accessconstraints) _set(context, serviceobj, 'pycsw:OtherConstraints', md.identification.fees) _set(context, serviceobj, 'pycsw:Source', record) _set(context, serviceobj, 'pycsw:Format', md.identification.type) _set(context, serviceobj, 'pycsw:ServiceType', 'OGC:CSW') _set(context, serviceobj, 'pycsw:ServiceTypeVersion', md.identification.version) _set(context, serviceobj, 'pycsw:Operation', ','.join([d.name for d in md.operations])) _set(context, serviceobj, 'pycsw:CouplingType', 'tight') links = [ '%s,OGC-CSW Catalogue Service for the Web,OGC:CSW,%s' % (identifier, md.url) ] _set(context, serviceobj, 'pycsw:Links', '^'.join(links)) _set(context, serviceobj, 'pycsw:XML', caps2iso(serviceobj, md, context)) recobjs.append(serviceobj) # get all supported typenames of metadata # so we can harvest the entire CSW # try for ISO, settle for Dublin Core csw_typenames = 'csw:Record' csw_outputschema = 'http://www.opengis.net/cat/csw/2.0.2' grop = md.get_operation_by_name('GetRecords') if all(['gmd:MD_Metadata' in grop.parameters['typeNames']['values'], 'http://www.isotc211.org/2005/gmd' in grop.parameters['outputSchema']['values']]): LOGGER.info('CSW supports ISO') csw_typenames = 'gmd:MD_Metadata' csw_outputschema = 'http://www.isotc211.org/2005/gmd' # now get all records # get total number of records to loop against try: md.getrecords2(typenames=csw_typenames, resulttype='hits', outputschema=csw_outputschema) matches = md.results['matches'] except: # this is a CSW, but server rejects query raise RuntimeError(md.response) if pagesize > matches: pagesize = matches LOGGER.debug('Harvesting %d CSW records' % matches) # loop over all catalogue records incrementally for r in range(1, matches+1, pagesize): try: md.getrecords2(typenames=csw_typenames, startposition=r, maxrecords=pagesize, outputschema=csw_outputschema, esn='full') except Exception as err: # this is a CSW, but server rejects query raise RuntimeError(md.response) for k, v in md.records.iteritems(): # try to parse metadata try: LOGGER.debug('Parsing metadata record: %s', v.xml) if csw_typenames == 'gmd:MD_Metadata': recobjs.append(_parse_iso(context, repos, etree.fromstring(v.xml, context.parser))) else: recobjs.append(_parse_dc(context, repos, etree.fromstring(v.xml, context.parser))) except Exception as err: # parsing failed for some reason LOGGER.warning('Metadata parsing failed %s', err) return recobjs