def testTagToCurie(self): self.assertEquals('dc:title', namespaces.tagToCurie('{http://purl.org/dc/elements/1.1/}title')) self.assertRaises(KeyError, namespaces.tagToCurie, '{unknown}tag') self.assertRaises(ValueError, namespaces.tagToCurie, 'no-uri-in-tag') self.assertEquals('srw:records', namespaces.tagToCurie(namespaces.expandNsTag('srw:records'))) ns2 = namespaces.copyUpdate({'new':'uri:new'}) self.assertEquals('new:tag', ns2.tagToCurie('{uri:new}tag'))
def testTagToCurie(self): self.assertEqual( 'dc:title', namespaces.tagToCurie('{http://purl.org/dc/elements/1.1/}title')) self.assertRaises(KeyError, namespaces.tagToCurie, '{unknown}tag') self.assertRaises(ValueError, namespaces.tagToCurie, 'no-uri-in-tag') self.assertEqual( 'srw:records', namespaces.tagToCurie(namespaces.expandNsTag('srw:records'))) ns2 = namespaces.copyUpdate({'new': 'uri:new'}) self.assertEqual('new:tag', ns2.tagToCurie('{uri:new}tag'))
class FieldHier(Observable): ns = namespaces.copyUpdate(dict(x='http://meresco.org/namespace/example')) def add(self, lxmlNode, **kwargs): hierarchicalFields = self.ns.xpath(lxmlNode, '/x:document/x:fieldHier') for field in hierarchicalFields: values = self.ns.xpath(field, 'x:value/text()') self.do.addField(name=UNTOKENIZED_PREFIX + 'fieldHier', value=values) return yield
raise SruException(UNSUPPORTED_OPERATION, operation) if 'stylesheet' in arguments: raise SruException(UNSUPPORTED_PARAMETER, 'stylesheet') class SoapException(Exception): def __init__(self, faultCode, faultString): self._faultCode = faultCode self._faultString = faultString def asSoap(self): return """<SOAP:Fault><faultcode>%s</faultcode><faultstring>%s</faultstring></SOAP:Fault>""" % ( xmlEscape(self._faultCode), xmlEscape(self._faultString)) namespaces = _namespaces.copyUpdate( dict(soap="http://schemas.xmlsoap.org/soap/envelope/", )) xpath = namespaces.xpath xpathFirst = namespaces.xpathFirst def localname(tag): _, _, name = tag.rpartition('}') return str(name) SOAP_HEADER = """<SOAP:Envelope xmlns:SOAP="%(soap)s"><SOAP:Body>""" % namespaces SOAP_FOOTER = """</SOAP:Body></SOAP:Envelope>""" SOAP = SOAP_HEADER + "%s" + SOAP_FOOTER
# ## end license ## from seecr.test import IntegrationTestCase from seecr.test.utils import getRequest, sleepWheel, htmlXPath from meresco.xml import xpathFirst, xpath, namespaces from lxml import etree # TODO: create UnitTestCase for o.a. writeDelete / unDelete # TODO: SRU-throttle mogelijkheden uitzoeken. testNamespaces = namespaces.copyUpdate({'oaibrand':'http://www.openarchives.org/OAI/2.0/branding/', 'prs' : 'http://www.onderzoekinformatie.nl/nod/prs', 'proj' : 'http://www.onderzoekinformatie.nl/nod/act', 'org' : 'http://www.onderzoekinformatie.nl/nod/org', 'long' : 'http://www.knaw.nl/narcis/1.0/long/', 'short' : 'http://www.knaw.nl/narcis/1.0/short/', 'mods' : 'http://www.loc.gov/mods/v3', 'didl' : 'urn:mpeg:mpeg21:2002:02-DIDL-NS', 'norm' : 'http://dans.knaw.nl/narcis/normalized', }) class SruSlaveTest(IntegrationTestCase): def testSruQuery(self): response = self.doSruQuery(query='*', recordSchema='knaw_short') # print "doSruQuery(query='*'):", etree.tostring(response) self.assertEqual('13', xpathFirst(response, '//srw:numberOfRecords/text()')) self.assertEqual(set([ 'Example Program 1', 'Example Program 2', 'RAIN: Pan-European gridded data sets of extreme weather probability of occurrence under present and future climate',
from meresco.dans.logger import Logger # Normalisation Logger. from meresco.seecr.oai import OaiAddDeleteRecordWithPrefixesAndSetSpecs, OaiAddRecord from meresco.dans.xlsserver import XlsServer NL_DIDL_NORMALISED_PREFIX = 'nl_didl_norm' NL_DIDL_COMBINED_PREFIX = 'nl_didl_combined' NAMESPACEMAP = namespaces.copyUpdate({ 'dip': 'urn:mpeg:mpeg21:2005:01-DIP-NS', 'gal': 'info:eu-repo/grantAgreement', 'hbo': 'info:eu-repo/xmlns/hboMODSextension', 'wmp': 'http://www.surfgroepen.nl/werkgroepmetadataplus', 'gmhnorm': 'http://gh.kb-dans.nl/normalised/v0.9/', 'gmhcombined': 'http://gh.kb-dans.nl/combined/v0.9/', 'meta': 'http://meresco.org/namespace/harvester/meta', 'oai': 'http://www.openarchives.org/OAI/2.0/' }) myPath = dirname(abspath(__file__)) # dynamicHtmlPath = join(myPath, 'controlpanel', 'html', 'dynamic') # staticHtmlPath = join(myPath, 'controlpanel', 'html', 'static') def createDownloadHelix(reactor, periodicDownload, oaiDownload,
# You should have received a copy of the GNU General Public License # along with "Digitale Collectie ErfGeo Enrichment"; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # ## end license ## from functools import partial from meresco.xml import namespaces as _namespaces from meresco.xml.utils import createElement, createSubElement namespaces = _namespaces.copyUpdate(dict( dcoa="http://data.digitalecollectie.nl/ns/oa#", geo="http://www.w3.org/2003/01/geo/wgs84_pos#", geos="http://www.opengis.net/ont/geosparql#", hg="http://schema.histograph.io/#", oa="http://www.w3.org/ns/oa#", vcard="http://www.w3.org/2006/vcard/ns#", )) def uriFromTag(tag): return namespaces.expandNsUri(namespaces.prefixedTag(tag)) namespaces.uriFromTag = uriFromTag xpath = namespaces.xpath xpathFirst = namespaces.xpathFirst expandNsUri = namespaces.expandNsUri expandNsTag = namespaces.expandNsTag curieToUri = namespaces.curieToUri uriToCurie = namespaces.uriToCurie curieToTag = namespaces.curieToTag
from time import sleep from urllib import urlencode from meresco.core import Observable from meresco.components import lxmltostring, RetrieveToGetDataAdapter from meresco.components.http.utils import CRLF from meresco.sequentialstore import MultiSequentialStorage from meresco.xml import namespaces from meresco.oai import OaiPmh, OaiJazz, OaiBranding, SuspendRegister from weightless.core import be, compose namespaces = namespaces.copyUpdate({ 'toolkit': 'http://oai.dlib.vt.edu/OAI/metadata/toolkit', 'branding': 'http://www.openarchives.org/OAI/2.0/branding/', 'identifier': 'http://www.openarchives.org/OAI/2.0/oai-identifier', }) xpath = namespaces.xpath xpathFirst = namespaces.xpathFirst BATCHSIZE = 10 HOSTNAME = gethostname() class _OaiPmhTest(SeecrTestCase): def setUp(self): SeecrTestCase.setUp(self) self.jazz = jazz = OaiJazz(join(self.tempdir, 'jazz')) self.storage = MultiSequentialStorage(join(self.tempdir, 'sequential-store')) self.oaipmh = self.getOaiPmh()
#!/usr/bin/env python from os import listdir from os.path import join from lxml.etree import parse from collections import defaultdict from lucene import initVM initVM() from meresco.xml import namespaces namespaces = namespaces.copyUpdate(dict(oa='http://www.w3.org/ns/oa#')) xpath = namespaces.xpath xpathFirst = namespaces.xpathFirst def open_writer(path): from java.io import File from org.apache.lucene.analysis.core import WhitespaceAnalyzer from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.index import IndexWriter, IndexWriterConfig from org.apache.lucene.store import FSDirectory from org.apache.lucene.util import Version directory = FSDirectory.open(File(path)) analyzer = StandardAnalyzer(Version.LUCENE_43) config = IndexWriterConfig(Version.LUCENE_43, analyzer) writer = IndexWriter(directory, config) return writer def open_searcher(writer): from org.apache.lucene.search import IndexSearcher reader = writer.getReader()
# (at your option) any later version. # # "Meresco Harvester" is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with "Meresco Harvester"; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # ## end license ## from meresco.xml import namespaces as _namespaces namespaces = _namespaces.copyUpdate( dict( dc="http://purl.org/dc/elements/1.1/", oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/", oai="http://www.openarchives.org/OAI/2.0/", srw="http://www.loc.gov/zing/srw/", diag="http://www.loc.gov/zing/srw/diagnostic/", ucp="info:lc/xmlns/update-v1", sahara="http://sahara.cq2.org/xsd/saharaget.xsd", status="http://sahara.cq2.org/xsd/saharaget.xsd", ) ) xpath = namespaces.xpath xpathFirst = namespaces.xpathFirst
## end license ## from seecr.test import SeecrTestCase from meresco.xml import namespaces from meresco.xml.subtreestreebuilder import SubTreesTreeBuilder, SimpleSaxFileParser from lxml.etree import parse, XMLParser, tostring from math import ceil from StringIO import StringIO namespaces = namespaces.copyUpdate({ 'def_': 'u:ri/default#', 'newdef_': 'u:ri/newdefault#', 'other_': 'u:ri/other#', 'pre_': 'u:ri/prefixed#', }) xpath = namespaces.xpath xpathFirst = namespaces.xpathFirst # lxml / LibXML implementation detail; data feed()'ed can be buffered or otherwise unprocessed, until close() is called on the feedparsing interface. # This will result in 0...n (start|comment|data|pi|end)-calls and # then a close-call on the TreeBuilder-interface. # # Therefor, getSubtrees() *must* be called after a close() on the XMLParser. class SubTreesTreeBuilderTest(SeecrTestCase): def testParseAndProcessSimpleFile(self): builder = SubTreesTreeBuilder(elementPath=['records', 'record'])
# the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # "Metastreams Harvester" is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with "Metastreams Harvester"; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # ## end license ## from meresco.xml import namespaces as _namespaces namespaces = _namespaces.copyUpdate( dict( dc="http://purl.org/dc/elements/1.1/", oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/", oai="http://www.openarchives.org/OAI/2.0/", srw='http://www.loc.gov/zing/srw/', diag='http://www.loc.gov/zing/srw/diagnostic/', ucp="info:lc/xmlns/update-v1", sahara="http://sahara.cq2.org/xsd/saharaget.xsd", status="http://sahara.cq2.org/xsd/saharaget.xsd", )) xpath = namespaces.xpath xpathFirst = namespaces.xpathFirst
def _srwSpecificValidation(self, operation, arguments): if operation != 'searchRetrieve': raise SruException(UNSUPPORTED_OPERATION, operation) if 'stylesheet' in arguments: raise SruException(UNSUPPORTED_PARAMETER, 'stylesheet') class SoapException(Exception): def __init__(self, faultCode, faultString): self._faultCode = faultCode self._faultString = faultString def asSoap(self): return """<SOAP:Fault><faultcode>%s</faultcode><faultstring>%s</faultstring></SOAP:Fault>""" % (xmlEscape(self._faultCode), xmlEscape(self._faultString)) namespaces = _namespaces.copyUpdate(dict( soap="http://schemas.xmlsoap.org/soap/envelope/", )) xpath = namespaces.xpath xpathFirst = namespaces.xpathFirst def localname(tag): _, _, name = tag.rpartition('}') return str(name) SOAP_HEADER = """<SOAP:Envelope xmlns:SOAP="%(soap)s"><SOAP:Body>""" % namespaces SOAP_FOOTER = """</SOAP:Body></SOAP:Envelope>""" SOAP = SOAP_HEADER + "%s" + SOAP_FOOTER
from meresco.servers.gateway.gatewayserver import NORMALISED_DOC_NAME from meresco.components.http.utils import ContentTypePlainText, okPlainText, ContentTypeJson # from meresco.dans.loggerrss import LoggerRSS # from meresco.dans.logger import Logger # Normalisation Logger. # NL_DIDL_NORMALISED_PREFIX = 'nl_didl_norm' # NL_DIDL_COMBINED_PREFIX = 'nl_didl_combined' NAMESPACEMAP = namespaces.copyUpdate({ 'dip': 'urn:mpeg:mpeg21:2005:01-DIP-NS', 'gal': "info:eu-repo/grantAgreement", 'hbo': "info:eu-repo/xmlns/hboMODSextension", 'wmp': "http://www.surfgroepen.nl/werkgroepmetadataplus", 'norm': 'http://dans.knaw.nl/narcis/normalized', }) def createDownloadHelix(reactor, periodicDownload, oaiDownload, dbStorageComponent): return \ (periodicDownload, # Scheduled connection to a remote (response / request)... (XmlParseLxml(fromKwarg="data", toKwarg="lxmlNode", parseOptions=dict(huge_tree=True, remove_blank_text=True)), # Convert from plain text to lxml-object. (oaiDownload, # Implementation/Protocol of a PeriodicDownload... (UpdateAdapterFromOaiDownloadProcessor(), # Maakt van een SRU update/delete bericht (lxmlNode) een relevante message: 'delete' of 'add' message. # (FilterMessages(['delete']), # Filtert delete messages
from copy import deepcopy from weightless.core import NoneOfTheObserversRespond, DeclineMessage from meresco.core import Observable from meresco.components import lxmltostring, Converter from meresco.dans.nameidentifier import Orcid, Dai, Isni, Rid, NameIdentifierFactory from meresco.xml import namespaces namespacesmap = namespaces.copyUpdate({ # See: https://github.com/seecr/meresco-xml/blob/master/meresco/xml/namespaces.py 'dip' : 'urn:mpeg:mpeg21:2005:01-DIP-NS', 'dii' : 'urn:mpeg:mpeg21:2002:01-DII-NS', 'dai' : 'info:eu-repo/dai', 'gal' : 'info:eu-repo/grantAgreement', 'wmp' : 'http://www.surfgroepen.nl/werkgroepmetadataplus', 'prs' : 'http://www.onderzoekinformatie.nl/nod/prs', 'proj' : 'http://www.onderzoekinformatie.nl/nod/act', 'org' : 'http://www.onderzoekinformatie.nl/nod/org', 'long' : 'http://www.knaw.nl/narcis/1.0/long/', 'short' : 'http://www.knaw.nl/narcis/1.0/short/', 'mods' : 'http://www.loc.gov/mods/v3', 'didl' : 'urn:mpeg:mpeg21:2002:02-DIDL-NS', 'norm' : 'http://dans.knaw.nl/narcis/normalized', }) class ShortConverter(Converter): def __init__(self, fromKwarg, toKwarg=None, name=None, truncate_chars=300): Converter.__init__(self, name=name, fromKwarg=fromKwarg, toKwarg=toKwarg) self._truncate_chars = truncate_chars
Format.DATACITE: xmlDatacite } methods = {Item.GENRE: '_getGenre', Item.ACCESS_RIGHTS: '_getAccessRights'} testEmpty = etree.fromstring('<test/>') long = NormaliseOaiRecord(UiaConverter) namespacesmap = namespaces.copyUpdate( { # See: https://github.com/seecr/meresco-xml/blob/master/meresco/xml/namespaces.py 'dip': 'urn:mpeg:mpeg21:2005:01-DIP-NS', 'dii': 'urn:mpeg:mpeg21:2002:01-DII-NS', 'xlink': 'http://www.w3.org/1999/xlink', 'dai': 'info:eu-repo/dai', 'gal': 'info:eu-repo/grantAgreement', 'wmp': 'http://www.surfgroepen.nl/werkgroepmetadataplus', 'prs': 'http://www.onderzoekinformatie.nl/nod/prs', 'proj': 'http://www.onderzoekinformatie.nl/nod/act', 'org': 'http://www.onderzoekinformatie.nl/nod/org', 'long': 'http://www.knaw.nl/narcis/1.0/long/', 'short': 'http://www.knaw.nl/narcis/1.0/short/', 'mods': 'http://www.loc.gov/mods/v3', 'didl': 'urn:mpeg:mpeg21:2002:02-DIDL-NS', 'norm': 'http://dans.knaw.nl/narcis/normalized', 'datacite': 'http://datacite.org/schema/kernel-4' }) class LongConverterTest(unittest.TestCase): def _reset(self, xmlBase): self.xml = copy.deepcopy(xmlBase) self.test = copy.deepcopy(testEmpty)
from meresco.xml import xpathFirst, xpath, namespaces from lxml import etree # TODO: create UnitTestCase for o.a. writeDelete / unDelete # TODO: SRU-throttle mogelijkheden uitzoeken. testNamespaces = namespaces.copyUpdate({ 'oaibrand': 'http://www.openarchives.org/OAI/2.0/branding/', 'prs': 'http://www.onderzoekinformatie.nl/nod/prs', 'proj': 'http://www.onderzoekinformatie.nl/nod/act', 'org': 'http://www.onderzoekinformatie.nl/nod/org', 'long': 'http://www.knaw.nl/narcis/1.0/long/', 'short': 'http://www.knaw.nl/narcis/1.0/short/', 'mods': 'http://www.loc.gov/mods/v3', 'didl': 'urn:mpeg:mpeg21:2002:02-DIDL-NS', 'norm': 'http://dans.knaw.nl/narcis/normalized', }) class SruSlaveTest(IntegrationTestCase): def testSruQuery(self): response = self.doSruQuery(query='*', recordSchema='knaw_short',
from meresco.core import Observable from meresco.components import lxmltostring, Converter from meresco.dans.metadataformats import MetadataFormat from meresco.xml import namespaces import time HVSTR_NS = '{http://meresco.org/namespace/harvester/meta}' DOCUMENT_NS = '{http://meresco.org/namespace/harvester/document}' namespaceMap = namespaces.copyUpdate({ 'prs' : 'http://www.onderzoekinformatie.nl/nod/prs', 'ond' : 'http://www.onderzoekinformatie.nl/nod/act', 'org' : 'http://www.onderzoekinformatie.nl/nod/org', 'long' : 'http://www.knaw.nl/narcis/1.0/long/', 'short' : 'http://www.knaw.nl/narcis/1.0/short/', 'mods' : 'http://www.loc.gov/mods/v3', 'didl' : 'urn:mpeg:mpeg21:2002:02-DIDL-NS', 'norm' : 'http://dans.knaw.nl/narcis/normalized', }) MODS_VERSION = '3.6' MODS_NAMESPACE = "http://www.loc.gov/mods/v3" MODS = "{%s}" % MODS_NAMESPACE NSMAP = { None : MODS_NAMESPACE, 'xlink': 'http://www.w3.org/1999/xlink', 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', }
from time import sleep from urllib.parse import urlencode from meresco.core import Observable from meresco.components import lxmltostring, RetrieveToGetDataAdapter from meresco.components.http.utils import parseResponse, CRLF from meresco.sequentialstore import MultiSequentialStorage from meresco.xml import namespaces from meresco.oai import OaiPmh, OaiJazz, OaiBranding, SuspendRegister from weightless.core import be, compose, asBytes namespaces = namespaces.copyUpdate({ 'toolkit': 'http://oai.dlib.vt.edu/OAI/metadata/toolkit', 'branding': 'http://www.openarchives.org/OAI/2.0/branding/', 'identifier': 'http://www.openarchives.org/OAI/2.0/oai-identifier', }) xpath = namespaces.xpath xpathFirst = namespaces.xpathFirst BATCHSIZE = 10 HOSTNAME = gethostname() class _OaiPmhTest(SeecrTestCase): def setUp(self): SeecrTestCase.setUp(self) self.jazz = jazz = OaiJazz(join(self.tempdir, 'jazz')) self.storage = MultiSequentialStorage(join(self.tempdir, 'sequential-store')) self.oaipmh = self.getOaiPmh()