def testSruRecordData(self): responseLxml = self._doQuery('dc:subject=Zeeoorlog') rdfXml = xpathFirst(responseLxml, '/srw:searchRetrieveResponse/srw:records/srw:record/srw:recordData/rdf:RDF') self.assertEquals(2, len(rdfXml.getchildren())) self.assertEquals('Verenigde Staten', xpathFirst(rdfXml, 'oa:Annotation/oa:hasBody/rdf:Description/dc:coverage/text()')) self.assertEquals(1, len(xpath(rdfXml, 'oa:Annotation/oa:hasBody/rdf:Description/dcterms:spatial/hg:PlaceInTime'))) self.assertEquals('NIOD_BBWO2:niod:3366459', xpathFirst(rdfXml, 'oa:Annotation/oa:hasTarget/@rdf:resource')) self.assertEquals(['http://data.digitalecollectie.nl/annotation/summary#TklPRF9CQldPMjpuaW9kOjMzNjY0NTk=', 'http://data.digitalecollectie.nl/annotation/erfGeoEnrichment#TklPRF9CQldPMjpuaW9kOjMzNjY0NTk='], xpath(rdfXml, 'oa:Annotation/@rdf:about'))
def testNoQuery(self): pitToAnnotation = PitToAnnotation(searchApiBaseUrl="http://example.org/search") annotation = pitToAnnotation.toAnnotation(pit=None, targetUri='the:uri', query=None) self.assertEquals('the:uri', xpathFirst(annotation, '/rdf:RDF/oa:Annotation/oa:hasTarget/@rdf:resource')) self.assertEquals('http://data.digitalecollectie.nl/ns/oa#erfGeoEnriching', xpathFirst(annotation, '/rdf:RDF/oa:Annotation/oa:motivatedBy/@rdf:resource')) self.assertEquals("http://data.digitalecollectie.nl/id/digitalecollectie", xpathFirst(annotation, '/rdf:RDF/oa:Annotation/oa:annotatedBy/@rdf:resource')) self.assertEquals(None, xpathFirst(annotation, '/rdf:RDF/oa:Annotation/dcterms:source/@rdf:resource')) self.assertEquals('No ErfGeo search API query could be constructed from target record', xpathFirst(annotation, '/rdf:RDF/oa:Annotation/dcterms:description/text()')) self.assertEquals(None, xpathFirst(annotation, '/rdf:RDF/oa:Annotation/oa:hasBody/rdf:Description'))
def testOaiListRecords(self): header, body = getRequest(self.erfGeoEnrichmentPort, '/oai', {'verb': 'ListRecords', 'metadataPrefix': 'erfGeoEnrichment'}, parse=False) bodyLxml = XML(body) self.assertEquals(4, len(xpath(bodyLxml, '/oai:OAI-PMH/oai:ListRecords/oai:record'))) d = dict(zip( xpath(bodyLxml, '/oai:OAI-PMH/oai:ListRecords/oai:record/oai:metadata/rdf:RDF/oa:Annotation/oa:hasTarget/@rdf:resource'), xpath(bodyLxml, '/oai:OAI-PMH/oai:ListRecords/oai:record/oai:metadata/rdf:RDF/oa:Annotation'))) self.assertEquals(set(['NIOD_BBWO2:niod:3366459', 'geluidVanNl:geluid_van_nederland:47954146', 'NIOD_BBWO2:niod:3441263', 'limburgs_erfgoed:oai:le:RooyNet:37']), set(d.keys())) # contains no location information to even construct a ErfGeo search API query from annotation = d['NIOD_BBWO2:niod:3441263'] self.assertEquals(None, xpathFirst(annotation, 'oa:hasBody')) self.assertEquals('No ErfGeo search API query could be constructed from target record', xpathFirst(annotation, 'dcterms:description/text()')) self.assertEquals(None, xpathFirst(annotation, 'dcterms:source/@rdf:resource')) annotation = d['NIOD_BBWO2:niod:3366459'] self.assertEquals('http://data.digitalecollectie.nl/annotation/erfGeoEnrichment#TklPRF9CQldPMjpuaW9kOjMzNjY0NTk=', xpathFirst(annotation, '@rdf:about')) self.assertEquals('http://*****:*****@rdf:resource')) self.assertEquals('NIOD_BBWO2:niod:3366459', xpathFirst(annotation, 'oa:hasTarget/@rdf:resource')) annotationBody = xpathFirst(annotation, 'oa:hasBody/rdf:Description') placeInTime = xpathFirst(annotationBody, 'dcterms:spatial/hg:PlaceInTime') self.assertEquals('http://erfgeo.nl/hg/geonames/2747032', xpathFirst(placeInTime, '@rdf:about')) self.assertEquals('Soestdijk', xpathFirst(placeInTime, 'rdfs:label/text()')) geometryWKT = xpathFirst(placeInTime, 'geos:hasGeometry/rdf:Description/geos:asWKT/text()') self.assertEquals('POINT(5.28472 52.19083)', geometryWKT)
def annotationFromSummary(self, summary): targetUri = xpathFirst(summary, 'oa:Annotation/oa:hasTarget/@rdf:resource') annotationUri = ERFGEO_ENRICHMENT_PROFILE.uriFor(targetUri) geoCoordinates = self._geoCoordinatesPresent(summary) query, expectedType = self.queryFromSummary(summary) if geoCoordinates is None else (None, None) annotation = yield self.annotationFromQuery(query, expectedType=expectedType, targetUri=targetUri, geoCoordinates=geoCoordinates) raise StopIteration((annotationUri, annotation))
def testNoQueryInCaseOfGeoLatLong(self): queries = [] def queryErfGeoApi(query, expectedType=None): queries.append(dict(query=query, expectedType=expectedType)) raise StopIteration(QUERY_RESULTS) yield def toAnnotation(pit, targetUri, query, geoCoordinates=None): return PitToAnnotation().toAnnotation(pit=pit, targetUri=targetUri, query=query, geoCoordinates=geoCoordinates) observer = CallTrace('observer', methods={'queryErfGeoApi': queryErfGeoApi, 'toAnnotation': toAnnotation}) top = be( (Observable(), (ErfGeoEnrichmentFromSummary(), (observer,) ) ) ) summary = makeSummary([], geoLatLong=('51.8', '5.0')) result = retval(top.any.annotationFromSummary(summary)) self.assertEquals([], queries) annotationUri, annotation = result self.assertEquals(ERFGEO_ENRICHMENT_PROFILE.uriFor('uri:target'), annotationUri) self.assertEquals('51.8', xpathFirst(annotation, '/rdf:RDF/oa:Annotation/oa:hasBody/rdf:Description/geo:lat/text()'))
def processResourceElement(d, element): uri = xpathFirst(element, '@rdf:about') if uri: d['@id'] = uri elementCurie = tagToCurie(element.tag) if elementCurie != 'rdf:Description' and not elementCurie in TYPES_TO_IGNORE: d['@type'] = elementCurie for child in element.iterchildren(tag=Element): processRelationElement(d, child)
def _geoCoordinatesPresent(self, summary): annotationBody = xpathFirst(summary, 'oa:Annotation/oa:hasBody/*') nodes = [annotationBody] for uri in xpath(annotationBody, 'dcterms:spatial/@rdf:resource'): node = xpathFirst(summary, '*[@rdf:about="%s"]') if not node is None: nodes.append(node) for node in xpath(annotationBody, 'dcterms:spatial/*'): nodes.append(node) for node in nodes: geoLat = xpathFirst(node, 'geo:lat/text()') geoLong = xpathFirst(node, 'geo:long/text()') if geoLat and geoLong: return (geoLat, geoLong) asWkt = xpathFirst(node, 'geos:hasGeometry/*/geos:asWKT/text()') if not asWkt is None: return Geometry.parseWkt(asWkt).pointCoordinates().next() return None
def testAnnotationFromPit(self): pitToAnnotation = PitToAnnotation(searchApiBaseUrl="http://example.org/search") pit = {'@id': 'nwb/venray-leunseweg', 'data': {'woonplaatsnaam': 'Venray'}, 'type': 'hg:Street', 'name': "Leunseweg", 'geometry': Geometry.fromGeoDict({'type':'MultiLineString', 'coordinates': [[[5.976364581846588,51.52243586973127],[5.977570822531698,51.521009542433255],[5.977641926636947,51.520937272278]],[[5.977641926636947,51.520937272278],[5.9779252893052455,51.52056729706881],[5.978463420127178,51.519845466966835]],[[5.978810297575312,51.51930414638479],[5.978780974683683,51.519300636494314],[5.978753517554276,51.51929103170512],[5.978725963940384,51.519272905985616],[5.978708102058019,51.51925108169847],[5.9786942063007675,51.51923287779468],[5.978688040122361,51.51920855828437],[5.9786858271487935,51.51918908170666],[5.97869714389736,51.519158579206554]],[[5.978463420127178,51.519845466966835],[5.978689959483037,51.51953869936622],[5.97876059153952,51.5194304755717],[5.978810297575312,51.51930414638479]],[[5.978942199072787,51.51912122045602],[5.97897402190174,51.519135261536135],[5.979001702402638,51.51916313044837],[5.979015786946229,51.51919471773031],[5.979020071895229,51.519223927694654],[5.979014461839677,51.51924343492792]],[[5.978942199072787,51.51912122045602],[5.979226543949531,51.518700018617565],[5.979439134138488,51.51842927555684],[5.979760764946663,51.517681570604],[5.979788757821533,51.517618506703975]]]})} annotation = pitToAnnotation.toAnnotation(pit=pit, targetUri='the:uri', query="Leunseweg, Leunen, Venray") self.assertEquals('the:uri', xpathFirst(annotation, '/rdf:RDF/oa:Annotation/oa:hasTarget/@rdf:resource')) self.assertEquals('http://data.digitalecollectie.nl/ns/oa#erfGeoEnriching', xpathFirst(annotation, '/rdf:RDF/oa:Annotation/oa:motivatedBy/@rdf:resource')) self.assertEquals("http://data.digitalecollectie.nl/id/digitalecollectie", xpathFirst(annotation, '/rdf:RDF/oa:Annotation/oa:annotatedBy/@rdf:resource')) self.assertEquals('http://example.org/search?q=Leunseweg%2C+Leunen%2C+Venray', xpathFirst(annotation, '/rdf:RDF/oa:Annotation/dcterms:source/@rdf:resource')) annotationBody = xpathFirst(annotation, '/rdf:RDF/oa:Annotation/oa:hasBody/rdf:Description') placeInTime = xpathFirst(annotationBody, 'dcterms:spatial/hg:PlaceInTime') self.assertEquals("nwb/venray-leunseweg", xpathFirst(placeInTime, '@rdf:about')) self.assertEquals("http://schema.histograph.io/#Street", xpathFirst(placeInTime, 'rdf:type/@rdf:resource')) self.assertEquals('Leunseweg', xpathFirst(placeInTime, 'rdfs:label/text()')) geometry = xpathFirst(placeInTime, 'geos:hasGeometry/rdf:Description/geos:asWKT/text()') self.assertTrue(geometry.startswith('MULTILINESTRING((5.97'), geometry) self.assertEquals('Venray', xpathFirst(placeInTime, 'dcterms:isPartOf/hg:Place/rdfs:label/text()'))
def processRelationElement(d, element): try: elementCurie = tagToCurie(element.tag) except KeyError: return if elementCurie == 'prov:wasDerivedFrom': return objects = [] value = element.text if value and value.strip(): value = value.strip() if elementCurie in DECIMAL_VALUE_RELATIONS: value = Decimal(value) if elementCurie in SINGLE_LITERAL_VALUE_RELATIONS: d[elementCurie] = value return objects.append(value) uri = xpathFirst(element, '@rdf:resource') if uri: if elementCurie == 'rdf:type': typeCurie = uriToCurie(uri) if not typeCurie in TYPES_TO_IGNORE: d['@type'] = typeCurie return value = uri if not uri in urisResolved: urisResolved.add(uri) descriptionElement = xpathFirst(rdf, '//*[@rdf:about="%s"]' % uri) if not descriptionElement is None: value = {} processResourceElement(value, descriptionElement) objects.append(value) for child in element.iterchildren(tag=Element): resourceDict = {} processResourceElement(resourceDict, child) objects.append(resourceDict) if objects: d.setdefault(elementCurie, []).extend(objects) prefix, _, _ = elementCurie.partition(':') context[prefix] = namespaces[prefix] if elementCurie in RESOURCE_RELATIONS: context[elementCurie] = {"@type": "@id"}
def assertSruQuery(self, expectedHits, query, path=None, additionalHeaders=None): path = path or '/sru' responseBody = self._doQuery(query=query, path=path, additionalHeaders=additionalHeaders) diagnostic = xpathFirst(responseBody, "//diag:diagnostic") if not diagnostic is None: raise RuntimeError(lxmltostring(diagnostic)) targetUris = set(xpath(responseBody, "/srw:searchRetrieveResponse/srw:records/srw:record/srw:recordData/rdf:RDF/oa:Annotation/oa:hasTarget/@rdf:resource")) count = int(xpath(responseBody, "/srw:searchRetrieveResponse/srw:numberOfRecords/text()")[0]) if type(expectedHits) is int: self.assertEquals(expectedHits, count) else: self.assertEquals(expectedHits, targetUris) self.assertEquals(len(expectedHits), count)
def testAbout(self): header, body = getRequest(self.erfGeoEnrichmentPort, '/about', {'uri': 'NIOD_BBWO2:niod:3366459', 'profile': 'erfGeoEnrichment'}, parse=False) bodyLxml = parse(StringIO(body)) rdf = xpathFirst(bodyLxml, '/rdf:RDF') self.assertEquals('http://data.digitalecollectie.nl/annotation/erfGeoEnrichment#TklPRF9CQldPMjpuaW9kOjMzNjY0NTk=', xpathFirst(rdf, 'oa:Annotation/@rdf:about')) self.assertEquals('NIOD_BBWO2:niod:3366459', xpathFirst(rdf, 'oa:Annotation/oa:hasTarget/@rdf:resource')) annotationBody = xpathFirst(rdf, 'oa:Annotation/oa:hasBody/rdf:Description') placeInTime = xpathFirst(annotationBody, 'dcterms:spatial/hg:PlaceInTime') self.assertEquals('http://erfgeo.nl/hg/geonames/2747032', xpathFirst(placeInTime, '@rdf:about')) self.assertEquals('Soestdijk', xpathFirst(placeInTime, 'rdfs:label/text()')) geometryWKT = xpathFirst(placeInTime, 'geos:hasGeometry/rdf:Description/geos:asWKT/text()') self.assertEquals('POINT(5.28472 52.19083)', geometryWKT)
def _uploadUpdateRequest(self, filename, uploadPath, uploadPorts): aPort = choice(uploadPorts) print 'http://localhost:%s%s' % (aPort, uploadPath), '<-', basename(filename)[:-len('.updateRequest')] updateRequest = open(filename).read() lxml = parse(StringIO(updateRequest)) uploadIdentifier = xpathFirst(lxml, '//ucp:recordIdentifier/text()') self.uploaded.append((uploadIdentifier, updateRequest)) header, body = postRequest(aPort, uploadPath, updateRequest, parse=False, timeOutInSeconds=18.0) if '200' not in header.split('\r\n', 1)[0]: print 'No 200 response, but:\n', header exit(123) if "srw:diagnostics" in body: print body exit(1234)
def testNoResultPit(self): pitToAnnotation = PitToAnnotation(searchApiBaseUrl="http://example.org/search") annotation = pitToAnnotation.toAnnotation(pit=None, targetUri='the:uri', query="No match to be found") self.assertEquals('the:uri', xpathFirst(annotation, '/rdf:RDF/oa:Annotation/oa:hasTarget/@rdf:resource')) self.assertEquals('http://data.digitalecollectie.nl/ns/oa#erfGeoEnriching', xpathFirst(annotation, '/rdf:RDF/oa:Annotation/oa:motivatedBy/@rdf:resource')) self.assertEquals("http://data.digitalecollectie.nl/id/digitalecollectie", xpathFirst(annotation, '/rdf:RDF/oa:Annotation/oa:annotatedBy/@rdf:resource')) self.assertEquals('http://example.org/search?q=No+match+to+be+found', xpathFirst(annotation, '/rdf:RDF/oa:Annotation/dcterms:source/@rdf:resource')) self.assertEquals('No PlaceInTime could be found for target record', xpathFirst(annotation, '/rdf:RDF/oa:Annotation/dcterms:description/text()')) self.assertEquals(None, xpathFirst(annotation, '/rdf:RDF/oa:Annotation/oa:hasBody/rdf:Description'))
def testGeoCoordinatesInsteadOfPit(self): pitToAnnotation = PitToAnnotation(searchApiBaseUrl="http://example.org/search") annotation = pitToAnnotation.toAnnotation(pit=None, targetUri='the:uri', query=None, geoCoordinates=('5.0', '51.23')) self.assertEquals('the:uri', xpathFirst(annotation, '/rdf:RDF/oa:Annotation/oa:hasTarget/@rdf:resource')) self.assertEquals('http://data.digitalecollectie.nl/ns/oa#erfGeoEnriching', xpathFirst(annotation, '/rdf:RDF/oa:Annotation/oa:motivatedBy/@rdf:resource')) self.assertEquals("http://data.digitalecollectie.nl/id/digitalecollectie", xpathFirst(annotation, '/rdf:RDF/oa:Annotation/oa:annotatedBy/@rdf:resource')) self.assertEquals(None, xpathFirst(annotation, '/rdf:RDF/oa:Annotation/dcterms:source')) self.assertEquals('Geographical coordinates were already provided in original record', xpathFirst(annotation, '/rdf:RDF/oa:Annotation/dcterms:description/text()')) self.assertEquals('5.0', xpathFirst(annotation, '/rdf:RDF/oa:Annotation/oa:hasBody/rdf:Description/geo:lat/text()')) self.assertEquals('51.23', xpathFirst(annotation, '/rdf:RDF/oa:Annotation/oa:hasBody/rdf:Description/geo:long/text()'))
def queryFromSummary(self, summary): annotationBody = xpathFirst(summary, 'oa:Annotation/oa:hasBody/*') spatialValues = [s.strip() for s in xpath(annotationBody, 'dcterms:spatial[@xml:lang="nl"]/text()') if s.strip()] if len(spatialValues) == 0: spatialValues = [s.strip() for s in xpath(annotationBody, 'dcterms:spatial/text()') if s.strip()] for uri in xpath(annotationBody, 'dcterms:spatial/@rdf:resource'): for value in xpath(summary, '*[@rdf:about="%s"]/skos:prefLabel/text()' % uri): value = value.strip() if value and not value in spatialValues: spatialValues.append(value) if spatialValues: coverageValues = spatialValues # prefer the more specific relation; helps to ignore uses of dc:coverage for time related values else: coverageValues = [s.strip() for s in xpath(annotationBody, 'dc:coverage[@xml:lang="nl"]/text()') if s.strip()] if len(coverageValues) == 0: coverageValues = [s.strip() for s in xpath(annotationBody, 'dc:coverage/text()') if s.strip()] return self._queryFromCoverageValues(coverageValues)
def makeSummary(dcCoverageValues, geoLatLong=None): summary = XML("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:oa="http://www.w3.org/ns/oa#" xmlns:dc="http://purl.org/dc/elements/1.1/"> <oa:Annotation> <oa:hasTarget rdf:resource="uri:target"/> <oa:hasBody> <rdf:Description> </rdf:Description> </oa:hasBody> </oa:Annotation> </rdf:RDF>""") bodyElement = xpathFirst(summary, '//oa:hasBody/rdf:Description') for value in dcCoverageValues: createSubElement(bodyElement, 'dc:coverage', text=value) if not geoLatLong is None: geoLat, geoLong = geoLatLong createSubElement(bodyElement, 'geo:lat', text=geoLat) createSubElement(bodyElement, 'geo:long', text=geoLong) return summary
def main(): for set in ['rijksmuseum']: #'NIOD', 'zeeuwse_bibliotheek', 'limburgs_erfgoed', 'geluidVanNl']: print 'set', set from sys import stdout; stdout.flush() setValues = defaultdict(int) for i, item in enumerate(iterateOaiPmh(baseurl="http://data.digitalecollectie.nl/oai", metadataPrefix='summary', set=set)): annotationBody = xpathFirst(item.metadata, 'oa:Annotation/oa:hasBody/*') coverageValues = xpath(annotationBody, 'dc:coverage/text()') + xpath(annotationBody, 'dcterms:spatial/text()') for value in coverageValues: print '[%s %s]: %s' % (set, i, value) from sys import stdout; stdout.flush() setValues[value] += 1 print 'set', set print 'number of different values', len(setValues) print 'highest counts:' print sorted(setValues.items(), key=lambda (k, v): v, reverse=True)[:20] from sys import stdout; stdout.flush() with open("/home/natag/digitalecollectie_%s_coverage_values.json" % set, "w") as f: dump(setValues, f, indent=4, item_sort_key=lambda (k, v): -v if type(v) == int else k)
def testAnnotationFromSummary(self): queries = [] def queryErfGeoApi(query, expectedType=None, exact=None): queries.append(dict(query=query, expectedType=expectedType, exact=exact)) raise StopIteration(QUERY_RESULTS) yield def toAnnotation(pit, targetUri, query, **kwargs): return PitToAnnotation().toAnnotation(pit=pit, targetUri=targetUri, query=query) observer = CallTrace('observer', methods={'queryErfGeoApi': queryErfGeoApi, 'toAnnotation': toAnnotation}) top = be( (Observable(), (ErfGeoEnrichmentFromSummary(), (observer,) ) ) ) summary = makeSummary(['straat: Leunseweg', 'dorp: Leunen', 'gemeente: Venray']) result = retval(top.any.annotationFromSummary(summary)) self.assertEquals([dict(query='"Leunseweg", "Leunen", "Venray"', expectedType='hg:Street', exact=True)], queries) annotationUri, annotation = result self.assertEquals(ERFGEO_ENRICHMENT_PROFILE.uriFor('uri:target'), annotationUri) self.assertEquals('nwb/venray-leunseweg', xpathFirst(annotation, '/rdf:RDF/oa:Annotation/oa:hasBody/rdf:Description/dcterms:spatial/hg:PlaceInTime/@rdf:about'))
def _sruResponseToJson(self, arguments, sruResponseLxml, sruRequest): request = '/search?' + urlencode(arguments, doseq=True) result = dict( request=request, sruRequest=sruRequest ) errors = xpath(sruResponseLxml, '/srw:searchRetrieveResponse/srw:diagnostics/diag:diagnostic') if len(errors) > 0: errorDicts = result['errors'] = [] for error in errors: errorDicts.append({ 'message': xpathFirst(error, 'diag:message/text()'), 'details': xpathFirst(error, 'diag:details/text()') }) return self._resultAsJson(result) total = int(xpathFirst(sruResponseLxml, '/srw:searchRetrieveResponse/srw:numberOfRecords/text()')) result['total'] = total result['items'] = [summaryWithEnrichmentToJsonLd(rdf) for rdf in xpath(sruResponseLxml, '/srw:searchRetrieveResponse/srw:records/srw:record/srw:recordData/rdf:RDF')] facets = {} for navigator in xpath(sruResponseLxml, '/srw:searchRetrieveResponse/srw:extraResponseData/drilldown:drilldown/drilldown:term-drilldown/drilldown:navigator'): name = xpathFirst(navigator, '@name') facetEntries = [] for ddItem in xpath(navigator, 'drilldown:item'): value = xpathFirst(ddItem, 'text()') count = int(xpathFirst(ddItem, '@count')) facetEntry = dict(value=value, count=count) if count != total: newQuery = arguments['query'][0] + ' AND %s exact "%s"' % (name, value) facetEntry['href'] = '/search?' + urlencode(dict(arguments, query=newQuery), doseq=True) facetEntries.append(facetEntry) facets[name] = facetEntries if facets: result['facets'] = facets nextRecordPosition = xpathFirst(sruResponseLxml, '/srw:searchRetrieveResponse/srw:nextRecordPosition/text()') if nextRecordPosition: result['nextPage'] = '/search?' + urlencode(dict(arguments, startRecord=nextRecordPosition), doseq=True) return self._resultAsJson(result)
def testOaiIdentify(self): header, body = getRequest(self.erfGeoEnrichmentPort, '/oai', {'verb': 'Identify'}) self.assertEquals("Digitale Collectie ErfGeo enrichments", xpathFirst(body, '/oai:OAI-PMH/oai:Identify/oai:repositoryName/text()'))
def summaryWithEnrichmentToJsonLd(rdf): urisResolved = set() def processResourceElement(d, element): uri = xpathFirst(element, '@rdf:about') if uri: d['@id'] = uri elementCurie = tagToCurie(element.tag) if elementCurie != 'rdf:Description' and not elementCurie in TYPES_TO_IGNORE: d['@type'] = elementCurie for child in element.iterchildren(tag=Element): processRelationElement(d, child) def processRelationElement(d, element): try: elementCurie = tagToCurie(element.tag) except KeyError: return if elementCurie == 'prov:wasDerivedFrom': return objects = [] value = element.text if value and value.strip(): value = value.strip() if elementCurie in DECIMAL_VALUE_RELATIONS: value = Decimal(value) if elementCurie in SINGLE_LITERAL_VALUE_RELATIONS: d[elementCurie] = value return objects.append(value) uri = xpathFirst(element, '@rdf:resource') if uri: if elementCurie == 'rdf:type': typeCurie = uriToCurie(uri) if not typeCurie in TYPES_TO_IGNORE: d['@type'] = typeCurie return value = uri if not uri in urisResolved: urisResolved.add(uri) descriptionElement = xpathFirst(rdf, '//*[@rdf:about="%s"]' % uri) if not descriptionElement is None: value = {} processResourceElement(value, descriptionElement) objects.append(value) for child in element.iterchildren(tag=Element): resourceDict = {} processResourceElement(resourceDict, child) objects.append(resourceDict) if objects: d.setdefault(elementCurie, []).extend(objects) prefix, _, _ = elementCurie.partition(':') context[prefix] = namespaces[prefix] if elementCurie in RESOURCE_RELATIONS: context[elementCurie] = {"@type": "@id"} context = {'oa': namespaces.oa} d = { '@context': context, '@id': xpathFirst(rdf, 'oa:Annotation/oa:hasTarget/@rdf:resource'), } for annotationBody in xpath(rdf, 'oa:Annotation/oa:hasBody/rdf:Description'): for element in annotationBody.iterchildren(tag=Element): processRelationElement(d, element) return d