def testUpdateRecordWhileSendingData(self): batchSize = 3 oaiJazz = OaiJazz(join(self.tempdir, 'oai')) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) self._addOaiRecords(storageComponent, oaiJazz, count=batchSize + 10) dna = be((Observable(), (OaiPmh(repositoryName='test', adminEmail='*****@*****.**', batchSize=batchSize), (storageComponent,), (oaiJazz,), ) )) kwargs = dict( Method='GET', Headers={'Host': 'myserver'}, port=1234, path='/oaipmh.pl', arguments=dict(verb=['ListIdentifiers'], metadataPrefix=['prefix']), ) stream = compose(dna.all.handleRequest(**kwargs)) buf = StringIO() for stuff in stream: buf.write(stuff) if 'identifier>id0<' in stuff: oaiJazz.addOaiRecord(identifier="id1", sets=[], metadataFormats=[("prefix", "", "")]) result = XML(buf.getvalue().split(CRLF*2)[-1]) resumptionToken = xpathFirst(result, '/oai:OAI-PMH/oai:ListIdentifiers/oai:resumptionToken/text()') self.assertFalse(resumptionToken is None)
def testUpdateRecordWhileSendingData(self): batchSize = 3 oaiJazz = OaiJazz(join(self.tempdir, 'oai')) oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="") storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) self._addOaiRecords(storageComponent, oaiJazz, count=batchSize + 10) dna = be((Observable(), ( OaiPmh(repositoryName='test', adminEmail='*****@*****.**', batchSize=batchSize), (storageComponent, ), (oaiJazz, ), ))) kwargs = dict( Method='GET', Headers={'Host': 'myserver'}, port=1234, path='/oaipmh.pl', arguments=dict(verb=['ListIdentifiers'], metadataPrefix=['prefix']), ) stream = compose(dna.all.handleRequest(**kwargs)) buf = StringIO() for stuff in stream: buf.write(stuff) if 'identifier>id0<' in stuff: oaiJazz.addOaiRecord(identifier="id1", metadataPrefixes=["prefix"]) result = XML(buf.getvalue().split(CRLF * 2)[-1].encode()) resumptionToken = xpathFirst( result, '/oai:OAI-PMH/oai:ListIdentifiers/oai:resumptionToken/text()') self.assertFalse(resumptionToken is None)
def testNearRealtimeOai(self): self.run = True portNumber = randint(50000, 60000) suspendRegister = SuspendRegister() oaiJazz = OaiJazz(join(self.tempdir, 'oai')) oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="") oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) self._addOaiRecords(storageComponent, oaiJazz, 3) oaiPmhThread = Thread( None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) observer = CallTrace("observer", ignoredAttributes=["observer_init"], methods={'add': lambda **kwargs: (x for x in [])}) harvestThread = Thread( None, lambda: self.startOaiHarvester(portNumber, observer)) oaiPmhThread.start() harvestThread.start() try: requests = 3 sleepWheel(1.0 + 1.0 * requests) self.assertEqual([ 'startOaiBatch', 'add', 'add', 'stopOaiBatch', 'startOaiBatch', 'add', 'stopOaiBatch' ], [m.name for m in observer.calledMethods]) ids = [ xpath(m.kwargs['lxmlNode'], '//oai:header/oai:identifier/text()') for m in observer.calledMethods if m.name == 'add' ] self.assertEqual([['id0'], ['id1'], ['id2']], ids) self.assertEqual(1, len(suspendRegister)) observer.calledMethods.reset() requests += 1 storageComponent.addData(identifier="id3", name="prefix", data=b"<a>a3</a>") oaiJazz.addOaiRecord(identifier="id3", metadataPrefixes=["prefix"]) sleepWheel(1) self.assertEqual(0, len(suspendRegister)) self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode']) self.assertTrue("id3" in kwarg, kwarg) sleepWheel(1.0) self.assertEqual(1, len(suspendRegister)) finally: self.run = False oaiPmhThread.join() harvestThread.join() oaiJazz.close()
def testRemoveSet(self): oaiJazz = OaiJazz(self.tempdir) oaiJazz.updateSet('a:b', 'set A/B') oaiJazz.updateSet('a:c', 'set A/C') oaiJazz.addOaiRecord('id:0', setSpecs=['a:b', 'a:c'], metadataFormats=[('prefix', '', '')]) oaiJazz.addOaiRecord('id:1', setSpecs=['a:b'], metadataFormats=[('prefix', '', '')]) oaiJazz.addOaiRecord('id:2', setSpecs=['a:c'], metadataFormats=[('prefix', '', '')]) self.assertEquals([ ('id:0', set([u'a', u'a:b', u'a:c']), False), ('id:1', set([u'a', u'a:b']), False), ('id:2', set([u'a', u'a:c']), False), ], [(r.identifier, r.sets, r.isDeleted) for r in oaiJazz.oaiSelect(prefix='prefix').records]) self.assertEquals(set(['a:b', 'a', 'a:c']), oaiJazz.getAllSets()) oaiJazz.close() removeSetsFromOai(self.tempdir, sets=['a:b'], prefix='prefix', batchSize=1) oaiJazz = OaiJazz(self.tempdir) self.assertEquals([ ('id:2', set([u'a', u'a:c']), False), ('id:0', set([u'a', u'a:c']), False), ('id:1', set([]), False), # remove hierarchical sets! if possible ], [(r.identifier, r.sets, r.isDeleted) for r in oaiJazz.oaiSelect(prefix='prefix').records]) self.assertEquals(set(['a', 'a:c']), oaiJazz.getAllSets())
def testNearRealtimeOaiSavesState(self): observer = CallTrace("observer", ignoredAttributes=["observer_init"], methods={'add': lambda **kwargs: (x for x in [])}) oaiJazz = OaiJazz(join(self.tempdir, 'oai')) oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="") suspendRegister = SuspendRegister() oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) self._addOaiRecords(storageComponent, oaiJazz, 1) oaiPmhThread = None harvestThread = None def start(): global oaiPmhThread, harvestThread self.run = True portNumber = randint(50000, 60000) oaiPmhThread = Thread( None, lambda: self.startOaiPmh( portNumber, oaiJazz, storageComponent, suspendRegister)) harvestThread = Thread( None, lambda: self.startOaiHarvester(portNumber, observer)) oaiPmhThread.start() harvestThread.start() def stop(): global oaiPmhThread, harvestThread self.run = False oaiPmhThread.join() oaiPmhThread = None harvestThread.join() harvestThread = None start() requests = 1 sleepWheel(1.0 + 1.0 * requests) self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode']) self.assertTrue("id0" in kwarg, kwarg) stop() observer.calledMethods.reset() storageComponent.addData(identifier="id1", name="prefix", data=b"<a>a1</a>") oaiJazz.addOaiRecord(identifier="id1", metadataPrefixes=["prefix"]) start() requests = 1 sleepWheel(1.0 + 1.0 * requests) self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode']) self.assertFalse("id0" in kwarg, kwarg) self.assertTrue("id1" in kwarg, kwarg) stop()
def testListRecordsWithMultiSequentialStorage(self): oaijazz = OaiJazz(join(self.tempdir, '1')) oailist = OaiList(batchSize=2, repository=OaiRepository()) storage = MultiSequentialStorage(join(self.tempdir, "2")) oailist.addObserver(oaijazz) oairecord = OaiRecord() oailist.addObserver(storage) oailist.addObserver(oairecord) identifier = "id0" oaijazz.addOaiRecord(identifier, (), metadataFormats=[('oai_dc', '', '')]) storage.addData(identifier=identifier, name="oai_dc", data="data01") response = oailist.listRecords(arguments=dict( verb=['ListRecords'], metadataPrefix=['oai_dc']), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEquals("data01", xpath(parse(StringIO(body)), '//oai:metadata')[0].text)
def testListRecordsWithALotOfDeletedRecords(self): oaijazz = OaiJazz(join(self.tempdir, '1')) oailist = OaiList(batchSize=2, repository=OaiRepository()) storage = MultiSequentialStorage(join(self.tempdir, "2")) oailist.addObserver(oaijazz) oairecord = OaiRecord() oailist.addObserver(storage) oailist.addObserver(oairecord) for id in ['id0', 'id1', 'id1']: oaijazz.addOaiRecord(id, (), metadataFormats=[('oai_dc', '', '')]) storage.addData(identifier=id, name="oai_dc", data="data_%s" % id) response = oailist.listRecords(arguments=dict( verb=['ListRecords'], metadataPrefix=['oai_dc']), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEquals(["data_id0", "data_id1"], xpath(parse(StringIO(body)), '//oai:metadata/text()'))
def testNearRealtimeOaiSavesState(self): observer = CallTrace("observer", ignoredAttributes=["observer_init"], methods={'add': lambda **kwargs: (x for x in [])}) oaiJazz = OaiJazz(join(self.tempdir, 'oai')) suspendRegister = SuspendRegister() oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) self._addOaiRecords(storageComponent, oaiJazz, 1) oaiPmhThread = None harvestThread = None def start(): global oaiPmhThread, harvestThread self.run = True portNumber = randint(50000, 60000) oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) harvestThread = Thread(None, lambda: self.startOaiHarvester(portNumber, observer)) oaiPmhThread.start() harvestThread.start() def stop(): global oaiPmhThread, harvestThread self.run = False oaiPmhThread.join() oaiPmhThread = None harvestThread.join() harvestThread = None start() requests = 1 sleepWheel(1.0 + 1.0 * requests) self.assertEquals(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode']) self.assertTrue("id0" in kwarg, kwarg) stop() observer.calledMethods.reset() storageComponent.addData(identifier="id1", name="prefix", data="<a>a1</a>") oaiJazz.addOaiRecord(identifier="id1", sets=[], metadataFormats=[("prefix", "", "")]) start() requests = 1 sleepWheel(1.0 + 1.0 * requests) self.assertEquals(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode']) self.assertFalse("id0" in kwarg, kwarg) self.assertTrue("id1" in kwarg, kwarg) stop()
def testShouldRaiseExceptionOnSameRequestTwice(self): self.run = True portNumber = randint(50000, 60000) oaiJazz = OaiJazz(join(self.tempdir, 'oai')) suspendRegister = SuspendRegister() oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) clientId = str(uuid4()) requests = [] def doOaiListRecord(port): header, body = getRequest(port=portNumber, path="/", arguments={"verb": "ListRecords", "metadataPrefix": "prefix", "x-wait": "True"}, additionalHeaders={'X-Meresco-Oai-Client-Identifier': clientId}, parse=False) requests.append((header, body)) oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) harvestThread1 = Thread(None, lambda: doOaiListRecord(portNumber)) harvestThread2 = Thread(None, lambda: doOaiListRecord(portNumber)) with stderr_replaced(): oaiPmhThread.start() harvestThread1.start() try: while len(suspendRegister) == 0: sleep(0.01) harvest1Suspend = suspendRegister._suspendObject(clientId) self.assertTrue(harvest1Suspend is not None) harvestThread2.start() while harvest1Suspend == suspendRegister._suspendObject(clientId): sleep(0.01) sleep(0.01) self.assertTrue(clientId in suspendRegister) self.assertTrue(harvest1Suspend != suspendRegister._suspendObject(clientId)) self.assertEquals(1, len(requests)) header, body = requests[0] self.assertTrue('500' in header, header) self.assertTrue(body.startswith('Aborting suspended request'), body) storageComponent.addData(identifier="id1", name="prefix", data="<a>a1</a>") oaiJazz.addOaiRecord(identifier="id1", sets=[], metadataFormats=[("prefix", "", "")]) sleep(0.1) finally: self.run = False oaiPmhThread.join() harvestThread1.join() harvestThread2.join() oaiJazz.close()
def testGetRecordDeletedInRequestedPrefix(self): oaijazz = OaiJazz(self.tempdir + '/jazz') storage = MultiSequentialStorage(self.tempdir + "/seq-store") oairecord = OaiRecord() class MyStorage(object): def getData(self, identifier, name): return 'data' oaigetrecord = be((OaiGetRecord(repository=OaiRepository()), (oaijazz, ), (oairecord, (MyStorage(), )))) oaijazz.addOaiRecord(identifier='id:0', metadataPrefixes=['A', 'B']) oaijazz.deleteOaiRecordInPrefixes(identifier='id:0', metadataPrefixes=['A']) response = oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['A'], identifier=['id:0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( 'deleted', xpathFirst( XML(body.encode()), '/oai:OAI-PMH/oai:GetRecord/oai:record/oai:header/@status'), body) response = oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['B'], identifier=['id:0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( "data", xpathFirst(XML(body.encode()), '//oai:metadata/text()')) response = oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['C'], identifier=['id:0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( 'cannotDisseminateFormat', xpathFirst(XML(body.encode()), '/oai:OAI-PMH/oai:error/@code'))
def main(reactor, port, directory): dumpdir = join(directory, 'dump') isdir(dumpdir) or makedirs(dumpdir) dump = Dump(dumpdir) oaiStorage = MultiSequentialStorage(join(directory, 'storage')) oaiJazz = OaiJazz(join(directory, 'oai')) server = be( (Observable(), (ObservableHttpServer(reactor, port), (PathFilter("/dump"), (dump,) ), (PathFilter("/control"), (Control(), (dump,), (Log(),), ) ), (PathFilter('/oai'), (Log(), (OaiPmh(repositoryName="Oai Test Server", adminEmail="*****@*****.**", batchSize=10), (oaiStorage,), (oaiJazz,), ) ) ), (PathFilter("/log"), (RetrieveLog(), (Log(),) ) ), (PathFilter("/ready"), (StringServer('yes', ContentTypePlainText),) ) ) ) ) list(compose(server.once.observer_init())) for i in range(1,16): if i == 2: identifier = 'oai:record:02/&gkn' else: identifier = 'oai:record:%02d' % i oaiStorage.addData(identifier=identifier, name='oai_dc', data='''<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd"><dc:identifier>%s</dc:identifier></oai_dc:dc>''' % escapeXml(identifier)) oaiJazz.addOaiRecord(identifier=identifier, metadataFormats=[('oai_dc', 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', 'http://www.openarchives.org/OAI/2.0/oai_dc/')]) if i in [3,6]: list(compose(oaiJazz.delete(identifier=identifier)))
def main(reactor, port, directory): dumpdir = join(directory, 'dump') isdir(dumpdir) or makedirs(dumpdir) dump = Dump(dumpdir) oaiStorage = MultiSequentialStorage(join(directory, 'storage')) oaiJazz = OaiJazz(join(directory, 'oai')) server = be( (Observable(), (ObservableHttpServer(reactor, port), (PathFilter("/dump"), (dump, )), (PathFilter("/control"), ( Control(), (dump, ), (Log(), ), )), (PathFilter('/oai'), (Log(), ( OaiPmh(repositoryName="Oai Test Server", adminEmail="*****@*****.**", batchSize=10), (oaiStorage, ), (oaiJazz, ), ))), (PathFilter('/badoai'), (Log(), (BadOai(), ))), (PathFilter("/log"), (RetrieveLog(), (Log(), ))), (PathFilter("/ready"), (StringServer('yes', ContentTypePlainText), ))))) list(compose(server.once.observer_init())) oaiJazz.updateMetadataFormat( prefix="oai_dc", schema="http://www.openarchives.org/OAI/2.0/oai_dc.xsd", namespace="http://www.openarchives.org/OAI/2.0/oai_dc/") for i in range(1, 16): if i == 2: identifier = 'oai:record:02/&gkn' else: identifier = 'oai:record:%02d' % i oaiStorage.addData( identifier=identifier, name='oai_dc', data=bytes( '''<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd"><dc:identifier>%s</dc:identifier><dc:title>Title is √</dc:title></oai_dc:dc>''' % escapeXml(identifier), encoding='utf-8')) oaiJazz.addOaiRecord(identifier=identifier, metadataPrefixes=['oai_dc']) if i in [3, 6]: list(compose(oaiJazz.delete(identifier=identifier)))
def testNearRealtimeOai(self): self.run = True portNumber = randint(50000, 60000) suspendRegister = SuspendRegister() oaiJazz = OaiJazz(join(self.tempdir, 'oai')) oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) self._addOaiRecords(storageComponent, oaiJazz, 3) oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) observer = CallTrace("observer", ignoredAttributes=["observer_init"], methods={'add': lambda **kwargs: (x for x in [])}) harvestThread = Thread(None, lambda: self.startOaiHarvester(portNumber, observer)) oaiPmhThread.start() harvestThread.start() try: requests = 3 sleepWheel(1.0 + 1.0 * requests) self.assertEquals(['startOaiBatch', 'add', 'add', 'stopOaiBatch', 'startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) ids = [xpath(m.kwargs['lxmlNode'], '//oai:header/oai:identifier/text()') for m in observer.calledMethods if m.name == 'add'] self.assertEquals([['id0'],['id1'],['id2']], ids) self.assertEquals(1, len(suspendRegister)) observer.calledMethods.reset() requests += 1 storageComponent.addData(identifier="id3", name="prefix", data="<a>a3</a>") oaiJazz.addOaiRecord(identifier="id3", sets=[], metadataFormats=[("prefix", "", "")]) sleepWheel(1) self.assertEquals(0, len(suspendRegister)) self.assertEquals(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode']) self.assertTrue("id3" in kwarg, kwarg) sleepWheel(1.0) self.assertEquals(1, len(suspendRegister)) finally: self.run = False oaiPmhThread.join() harvestThread.join() oaiJazz.close()
def testGetRecordWithMultiSequentialStorage(self): oaijazz = OaiJazz(self.tempdir + "/jazz") storage = MultiSequentialStorage(self.tempdir + "/seq-store") oairecord = OaiRecord() oaigetrecord = be( ( OaiGetRecord(repository=OaiRepository()), (oaijazz,), (oairecord, (RetrieveToGetDataAdapter(), (storage,))), ) ) oaijazz.addOaiRecord(identifier="id0", sets=(), metadataFormats=[("oai_dc", "", "")]) storage.addData(identifier="id0", name="oai_dc", data="data01") response = oaigetrecord.getRecord( arguments=dict(verb=["GetRecord"], metadataPrefix=["oai_dc"], identifier=["id0"]), **self.httpkwargs ) _, body = asString(response).split("\r\n\r\n") self.assertEquals("data01", xpath(parse(StringIO(body)), "//oai:metadata")[0].text)
def testGetRecordDeletedInRequestedPrefix(self): oaijazz = OaiJazz(self.tempdir + '/jazz') storage = MultiSequentialStorage(self.tempdir + "/seq-store") oairecord = OaiRecord() class MyStorage(object): def getData(self, identifier, name): return 'data' oaigetrecord = be((OaiGetRecord(repository=OaiRepository()), (oaijazz,), (oairecord, (MyStorage(),) ) )) oaijazz.addOaiRecord(identifier='id:0', metadataPrefixes=['A', 'B']) oaijazz.deleteOaiRecordInPrefixes(identifier='id:0', metadataPrefixes=['A']) response = oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['A'], identifier=['id:0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual('deleted', xpathFirst(XML(body), '/oai:OAI-PMH/oai:GetRecord/oai:record/oai:header/@status'), body) response = oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['B'], identifier=['id:0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual("data", xpathFirst(XML(body), '//oai:metadata/text()')) response = oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['C'], identifier=['id:0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual('cannotDisseminateFormat', xpathFirst(XML(body), '/oai:OAI-PMH/oai:error/@code'))
def testGetRecordWithMultiSequentialStorage(self): oaijazz = OaiJazz(self.tempdir + '/jazz') oaijazz.updateMetadataFormat(prefix="oai_dc", schema="", namespace="") storage = MultiSequentialStorage(self.tempdir + "/seq-store") oairecord = OaiRecord() oaigetrecord = be( (OaiGetRecord(repository=OaiRepository()), (oaijazz, ), (oairecord, (RetrieveToGetDataAdapter(), (storage, ))))) oaijazz.addOaiRecord(identifier="id0", metadataPrefixes=['oai_dc']) storage.addData(identifier="id0", name="oai_dc", data=b"data01") response = oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['oai_dc'], identifier=['id0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( "data01", xpath(parse(BytesIO(body.encode())), '//oai:metadata')[0].text)
def testRemoveSet(self): oaiJazz = OaiJazz(self.tempdir) oaiJazz.updateSet('a:b', 'set A/B') oaiJazz.updateSet('a:c', 'set A/C') oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="") oaiJazz.addOaiRecord('id:0', setSpecs=['a:b', 'a:c'], metadataPrefixes=['prefix']) oaiJazz.addOaiRecord('id:1', setSpecs=['a:b'], metadataPrefixes=['prefix']) oaiJazz.addOaiRecord('id:2', setSpecs=['a:c'], metadataPrefixes=['prefix']) self.assertEqual([ ('id:0', set(['a', 'a:b', 'a:c']), False), ('id:1', set(['a', 'a:b']), False), ('id:2', set(['a', 'a:c']), False), ], [(r.identifier, r.sets, r.isDeleted) for r in oaiJazz.oaiSelect(prefix='prefix').records]) self.assertEqual(set(['a:b', 'a', 'a:c']), oaiJazz.getAllSets()) oaiJazz.close() removeSetsFromOai(self.tempdir, sets=['a:b'], prefix='prefix', batchSize=1) oaiJazz = OaiJazz(self.tempdir) self.assertEqual([ ('id:2', set(['a', 'a:c']), False), ('id:0', set(['a', 'a:c']), False), ('id:1', set([]), False), ], [(r.identifier, r.sets, r.isDeleted) for r in oaiJazz.oaiSelect(prefix='prefix').records]) self.assertEqual(set(['a', 'a:c']), oaiJazz.getAllSets())
class OaiListTest(SeecrTestCase): def setUp(self): SeecrTestCase.setUp(self) self.oaiJazz = OaiJazz(self.tempdir) self.oaiList = OaiList(batchSize=2, repository=OaiRepository()) self.observer = CallTrace('observer', emptyGeneratorMethods=['suspendBeforeSelect']) self.observer.methods['suspendAfterNoResult'] = lambda **kwargs: (s for s in ['SUSPEND']) self.observer.methods['oaiWatermark'] = lambda o=None: (x for x in ["Crafted By Seecr"]) def oaiRecord(record, metadataPrefix, fetchedRecords=None): yield '<mock:record xmlns:mock="uri:mock">%s/%s</mock:record>' % (escapeXml(record.identifier), escapeXml(metadataPrefix)) self.observer.methods['oaiRecord'] = oaiRecord self.observer.methods['oaiRecordHeader'] = oaiRecord self.observer.methods['getAllPrefixes'] = self.oaiJazz.getAllPrefixes self.observer.methods['oaiSelect'] = self.oaiJazz.oaiSelect self.getMultipleDataIdentifiers = [] def getMultipleData(**kwargs): self.getMultipleDataIdentifiers.append(list(kwargs.get('identifiers'))) raise NoneOfTheObserversRespond('No one', 0) self.observer.methods['getMultipleData'] = getMultipleData self.oaiList.addObserver(self.observer) self.clientId = str(uuid4()) self.httpkwargs = { 'path': '/path/to/oai', 'Headers': {'Host':'server', 'X-Meresco-Oai-Client-Identifier': self.clientId}, 'port': 9000, } def testListRecords(self): self._addRecords(['id:0&0', 'id:1&1']) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record'))) self.assertEquals(0, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken'))) self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord', 'oaiRecord'], [m.name for m in self.observer.calledMethods]) selectMethod = self.observer.calledMethods[1] self.assertEquals(dict(continueAfter='0', oaiUntil=None, prefix='oai_dc', oaiFrom=None, sets=[], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs) recordMethods = self.observer.calledMethods[4:] self.assertEquals({'recordId':'id:0&0', 'metadataPrefix':'oai_dc'}, _m(recordMethods[0].kwargs)) self.assertEquals({'recordId':'id:1&1', 'metadataPrefix':'oai_dc'}, _m(recordMethods[1].kwargs)) self.assertEquals([['id:0&0', 'id:1&1']], self.getMultipleDataIdentifiers) def testListRecordsUsesFetchedRecords(self): self._addRecords(['id:0&0', 'id:1']) self.observer.methods['getMultipleData'] = lambda name, identifiers, ignoreMissing=False: [('id:0&0', 'data1'), ('id:1', 'data2'), ('id:2', 'data3')] consume(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc']}, **self.httpkwargs)) self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord', 'oaiRecord'], self.observer.calledMethodNames()) self.assertEquals({'id:0&0': 'data1', 'id:1': 'data2', 'id:2': 'data3'}, self.observer.calledMethods[4].kwargs['fetchedRecords']) self.assertEquals({'id:0&0': 'data1', 'id:1': 'data2', 'id:2': 'data3'}, self.observer.calledMethods[4].kwargs['fetchedRecords']) def testListRecordsWithDeletes(self): self._addRecords(['id:0&0', 'id:1&1']) consume(self.oaiJazz.delete(identifier='id:1&1')) consume(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc']}, **self.httpkwargs)) self.assertEquals([['id:0&0']], self.getMultipleDataIdentifiers) def testListRecordsWithMultiSequentialStorage(self): oaijazz = OaiJazz(join(self.tempdir, '1')) oailist = OaiList(batchSize=2, repository=OaiRepository()) storage = MultiSequentialStorage(join(self.tempdir, "2")) oailist.addObserver(oaijazz) oairecord = OaiRecord() oailist.addObserver(storage) oailist.addObserver(oairecord) identifier = "id0" oaijazz.addOaiRecord(identifier, (), metadataFormats=[('oai_dc', '', '')]) storage.addData(identifier=identifier, name="oai_dc", data="data01") response = oailist.listRecords(arguments=dict( verb=['ListRecords'], metadataPrefix=['oai_dc']), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEquals("data01", xpath(parse(StringIO(body)), '//oai:metadata')[0].text) def testListRecordsWithALotOfDeletedRecords(self): oaijazz = OaiJazz(join(self.tempdir, '1')) oailist = OaiList(batchSize=2, repository=OaiRepository()) storage = MultiSequentialStorage(join(self.tempdir, "2")) oailist.addObserver(oaijazz) oairecord = OaiRecord() oailist.addObserver(storage) oailist.addObserver(oairecord) for id in ['id0', 'id1', 'id1']: oaijazz.addOaiRecord(id, (), metadataFormats=[('oai_dc', '', '')]) storage.addData(identifier=id, name="oai_dc", data="data_%s" % id) response = oailist.listRecords(arguments=dict( verb=['ListRecords'], metadataPrefix=['oai_dc']), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEquals(["data_id0", "data_id1"], xpath(parse(StringIO(body)), '//oai:metadata/text()')) def testListIdentifiers(self): self._addRecords(['id:0&0', 'id:1&1']) header, body = ''.join(compose(self.oaiList.listIdentifiers(arguments={'verb':['ListIdentifiers'], 'metadataPrefix': ['oai_dc']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListIdentifiers/mock:record'))) self.assertEquals(0, len(xpath(oai, '/oai:OAI-PMH/oai:ListIdentifiers/oai:resumptionToken'))) self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecordHeader', 'oaiRecordHeader'], [m.name for m in self.observer.calledMethods]) selectMethod = self.observer.calledMethods[1] self.assertEquals(dict(continueAfter='0', oaiUntil=None, prefix='oai_dc', oaiFrom=None, sets=[], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs) headerMethods = self.observer.calledMethods[4:] self.assertEquals({'recordId':'id:0&0', 'metadataPrefix':'oai_dc'}, _m(headerMethods[0].kwargs)) self.assertEquals({'recordId':'id:1&1', 'metadataPrefix':'oai_dc'}, _m(headerMethods[1].kwargs)) def testListRecordsProducesResumptionToken(self): self._addRecords(['id:0&0', 'id:1&1', 'id:2&2'], sets=[('set0', 'setName')]) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'from': ['2000-01-01T00:00:00Z'], 'until': ['4012-01-01T00:00:00Z'], 'set': ['set0']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record'))) resumptionToken = ResumptionToken.fromString(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()')[0]) self.assertEquals('4012-01-01T00:00:00Z', resumptionToken.until) self.assertEquals('2000-01-01T00:00:00Z', resumptionToken.from_) self.assertEquals('set0', resumptionToken.set_) self.assertEquals('oai_dc', resumptionToken.metadataPrefix) continueAfter = self.oaiJazz.getRecord('id:1&1').stamp self.assertEquals(str(continueAfter), resumptionToken.continueAfter) self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord', 'oaiRecord'], [m.name for m in self.observer.calledMethods]) selectMethod = self.observer.calledMethods[1] self.assertEquals(dict(continueAfter='0', oaiUntil='4012-01-01T00:00:00Z', prefix='oai_dc', oaiFrom='2000-01-01T00:00:00Z', sets=['set0'], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs) recordMethods = self.observer.calledMethods[4:] self.assertEquals({'recordId':'id:0&0', 'metadataPrefix':'oai_dc'}, _m(recordMethods[0].kwargs)) self.assertEquals({'recordId':'id:1&1', 'metadataPrefix':'oai_dc'}, _m(recordMethods[1].kwargs)) def testListRecordsUsesGivenResumptionToken(self): self._addRecords(['id:2&2'], sets=[('set0', 'setName')]) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'resumptionToken':['u4012-01-01T00:00:00Z|c1000|moai_dc|sset0|f2000-01-01T00:00:00Z']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(1, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record'))) self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord'], [m.name for m in self.observer.calledMethods]) selectMethod = self.observer.calledMethods[1] self.assertEquals(dict(continueAfter='1000', oaiUntil='4012-01-01T00:00:00Z', prefix='oai_dc', oaiFrom='2000-01-01T00:00:00Z', sets=['set0'], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs) recordMethods = self.observer.calledMethods[4:] self.assertEquals({'recordId':'id:2&2', 'metadataPrefix':'oai_dc'}, _m(recordMethods[0].kwargs)) def testListRecordsEmptyFinalResumptionToken(self): self._addRecords(['id:2&2', 'id:3&3']) resumptionToken = str(ResumptionToken(metadataPrefix='oai_dc', continueAfter=0)) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'resumptionToken':[resumptionToken]}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record'))) resumptionTokens = xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken') self.assertEquals(1, len(resumptionTokens)) self.assertEquals(None, resumptionTokens[0].text) self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord', 'oaiRecord'], [m.name for m in self.observer.calledMethods]) selectMethod = self.observer.calledMethods[1] self.assertEquals(dict(continueAfter='0', oaiUntil='', prefix='oai_dc', oaiFrom='', sets=[], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs) recordMethods = self.observer.calledMethods[-2:] self.assertEquals({'recordId':'id:2&2', 'metadataPrefix':'oai_dc'}, _m(recordMethods[0].kwargs)) self.assertEquals({'recordId':'id:3&3', 'metadataPrefix':'oai_dc'}, _m(recordMethods[1].kwargs)) def testNoRecordsMatch(self): self._addRecords(['id:0']) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix':['oai_dc'], 'set': ['does_not_exist']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(['noRecordsMatch'], xpath(oai, "/oai:OAI-PMH/oai:error/@code")) def testListRecordsUsingXWait(self): self.oaiList = OaiList(batchSize=2, supportXWait=True, repository=OaiRepository()) self.oaiList.addObserver(self.observer) result = compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-wait': ['True']}, **self.httpkwargs)) result.next() self.assertEquals(['suspendBeforeSelect', 'getAllPrefixes', 'suspendAfterNoResult'], [m.name for m in self.observer.calledMethods]) self.assertEquals({"clientIdentifier": self.clientId, "prefix": 'oai_dc', 'sets': [], 'oaiFrom': None, 'oaiUntil':None, 'shouldCountHits': False, 'x-wait':True, 'continueAfter': '0', 'partition': None}, self.observer.calledMethods[-1].kwargs) self._addRecords(['id:1&1']) self.observer.calledMethods.reset() header, body = ''.join(compose(result)).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(1, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record'))) self.assertEquals(1, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()'))) self.assertEquals(['suspendBeforeSelect', 'getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord'], [m.name for m in self.observer.calledMethods]) selectMethod = self.observer.calledMethods[2] self.assertEquals(dict(continueAfter='0', oaiUntil=None, prefix='oai_dc', oaiFrom=None, sets=[], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs) recordMethods = self.observer.calledMethods[-1:] self.assertEquals({'recordId':'id:1&1', 'metadataPrefix':'oai_dc'}, _m(recordMethods[0].kwargs)) def testListRecordsWithoutClientIdentifierGeneratesOne(self): self.oaiList = OaiList(batchSize=2, supportXWait=True, repository=OaiRepository()) self.oaiList.addObserver(self.observer) self.httpkwargs = { 'path': '/path/to/oai', 'Headers':{'Host':'server'}, 'port':9000, 'Client': ('127.0.0.1', 1234) } with stderr_replaced() as s: result = compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-wait': ['True']}, **self.httpkwargs)) result.next() self.assertEquals(['suspendBeforeSelect', 'getAllPrefixes', 'suspendAfterNoResult'], [m.name for m in self.observer.calledMethods]) self.assertTrue('clientIdentifier' in self.observer.calledMethods[-1].kwargs) self.assertEquals(len(str(uuid4())), len(self.observer.calledMethods[-1].kwargs['clientIdentifier'])) self.assertEquals("X-Meresco-Oai-Client-Identifier not found in HTTP Headers. Generated a uuid for OAI client from 127.0.0.1\n", s.getvalue()) def testNotSupportedXWait(self): self._addRecords(['id:1', 'id:2']) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-wait': ['True']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(['badArgument'], xpath(oai, "/oai:OAI-PMH/oai:error/@code")) def testNotSupportedValueXWait(self): self._addRecords(['id:1', 'id:2']) self.oaiList = OaiList(batchSize=2, supportXWait=True, repository=OaiRepository()) self.oaiList.addObserver(self.observer) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-wait': ['YesPlease']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(['badArgument'], xpath(oai, "/oai:OAI-PMH/oai:error/@code")) self.assertTrue("only supports 'True' as valid value" in xpath(oai, "/oai:OAI-PMH/oai:error/text()")[0]) def testListRecordsWithPartition(self): self._addRecords(['id:1', 'id:2']) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-partition': ['2/2']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(['id:1/oai_dc'], xpath(oai, '//mock:record/text()')) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-partition': ['1/2']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(['id:2/oai_dc'], xpath(oai, '//mock:record/text()')) @stderr_replaced def testListRecordsWithOldPartitionParameter(self): self._addRecords(['id:1', 'id:2']) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-parthash': ['2/2']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(['id:1/oai_dc'], xpath(oai, '//mock:record/text()')) def testListRecordsProducesResumptionTokenWithPartition(self): self._addRecords(['id:%s' % i for i in xrange(10)]) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-partition':['1/2']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record'))) resumptionToken = ResumptionToken.fromString(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()')[0]) self.assertEquals(['id:2/oai_dc', 'id:3/oai_dc'], xpath(oai, '//mock:record/text()')) self.assertEquals('1/2', str(resumptionToken.partition)) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'resumptionToken': [str(resumptionToken)]}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(['id:5/oai_dc', 'id:6/oai_dc'], xpath(oai, '//mock:record/text()')) def testFromAndUntil(self): self._addRecords(['id:3&3']) def selectArguments(oaiFrom, oaiUntil): self.observer.calledMethods.reset() arguments = {'verb':['ListRecords'], 'metadataPrefix': ['oai_dc']} if oaiFrom: arguments['from'] = [oaiFrom] if oaiUntil: arguments['until'] = [oaiUntil] header, body = ''.join(compose(self.oaiList.listRecords(arguments=arguments, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(['getAllPrefixes', 'oaiSelect'], [m.name for m in self.observer.calledMethods][:2]) selectKwargs = self.observer.calledMethods[1].kwargs return selectKwargs['oaiFrom'], selectKwargs['oaiUntil'] self.assertEquals((None, None), selectArguments(None, None)) self.assertEquals(('2000-01-01T00:00:00Z', '2000-01-01T00:00:00Z'), selectArguments('2000-01-01T00:00:00Z', '2000-01-01T00:00:00Z')) self.assertEquals(('2000-01-01T00:00:00Z', '2000-01-01T23:59:59Z'), selectArguments('2000-01-01', '2000-01-01')) self.assertEquals((None, '2000-01-01T00:00:00Z'), selectArguments(None, '2000-01-01T00:00:00Z')) self.assertEquals(('2000-01-01T00:00:00Z', None), selectArguments('2000-01-01T00:00:00Z', None)) def testFromAndUntilErrors(self): def getError(oaiFrom, oaiUntil): self._addRecords(['id:3&3']) self.observer.calledMethods.reset() arguments = {'verb':['ListRecords'], 'metadataPrefix': ['oai_dc']} if oaiFrom: arguments['from'] = [oaiFrom] if oaiUntil: arguments['until'] = [oaiUntil] header, body = ''.join(compose(self.oaiList.listRecords(arguments=arguments, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(1, len(xpath(oai, '//oai:error')), body) error = xpath(oai, '//oai:error')[0] return error.attrib['code'] self.assertEquals('badArgument', getError('thisIsNotEvenADateStamp', 'thisIsNotEvenADateStamp')) self.assertEquals('badArgument', getError('2000-01-01T00:00:00Z', '2000-01-01')) self.assertEquals('badArgument', getError('2000-01-01T00:00:00Z', '1999-01-01T00:00:00Z')) def testConcurrentListRequestsDontInterfere(self): self.oaiList = OaiList(batchSize=2, supportXWait=True, repository=OaiRepository()) self.oaiList.addObserver(self.observer) # ListRecords request resultListRecords = compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-wait': ['True']}, **self.httpkwargs)) resultListRecords.next() # ListIdentifiers request resultListIdentifiers = compose(self.oaiList.listRecords(arguments={'verb':['ListIdentifiers'], 'metadataPrefix': ['oai_dc']}, **self.httpkwargs)) resultListIdentifiers.next() # resume ListRecords self._addRecords(['id:1&1']) header, body = ''.join(compose(resultListRecords)).split(CRLF*2) self.assertFalse('</ListIdentifiers>' in body, body) self.assertTrue('</ListRecords>' in body, body) def testXCount(self): self._addRecords(['id%s' % i for i in xrange(99)]) header, body = ''.join(s for s in compose(self.oaiList.listRecords(arguments={'verb': ['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-count': ['True']}, **self.httpkwargs)) if not s is Yield).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record'))) recordsRemaining = xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/@recordsRemaining')[0] self.assertEquals('97', recordsRemaining) continueAfter = self.oaiJazz.getRecord('id97').stamp resumptionToken = str(ResumptionToken(metadataPrefix='oai_dc', continueAfter=continueAfter)) header, body = ''.join(s for s in compose(self.oaiList.listRecords(arguments={'verb': ['ListRecords'], 'resumptionToken': [resumptionToken], 'x-count': ['True']}, **self.httpkwargs)) if not s is Yield).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(1, len(xpath(oai, '//mock:record'))) self.assertEquals(0, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/@recordsRemaining'))) selectMethod = self.observer.calledMethods[1] self.assertEquals(dict(continueAfter='0', oaiUntil=None, prefix='oai_dc', oaiFrom=None, sets=[], batchSize=2, shouldCountHits=True, partition=None), selectMethod.kwargs) def testGetMultipleDataWithOtherBatchSize(self): self._addRecords(['id%s' % i for i in xrange(99)]) self.oaiList = OaiList(batchSize=10, dataBatchSize=2, repository=OaiRepository()) self.oaiList.addObserver(self.observer) def getMultipleData(identifiers, **kwargs): return [(id, '<data id="%s"/>' % id) for id in identifiers] self.observer.methods['getMultipleData'] = getMultipleData def oaiRecord(record, metadataPrefix, fetchedRecords=None): yield fetchedRecords[record.identifier] self.observer.methods['oaiRecord'] = oaiRecord body = asString(self.oaiList.listRecords(arguments=dict(verb=['ListRecords'], metadataPrefix=['oai_dc']), **self.httpkwargs)).split(CRLF*2,1)[-1] oai = parse(StringIO(body)) self.assertEquals(['id0', 'id1', 'id2', 'id3', 'id4', 'id5', 'id6', 'id7', 'id8', 'id9'], xpath(oai, '//oai:ListRecords/oai:data/@id')) self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord', 'oaiRecord', 'getMultipleData', 'oaiRecord', 'oaiRecord', 'getMultipleData', 'oaiRecord', 'oaiRecord', 'getMultipleData', 'oaiRecord', 'oaiRecord', 'getMultipleData', 'oaiRecord', 'oaiRecord' ], self.observer.calledMethodNames()) def _addRecords(self, identifiers, sets=None): for identifier in identifiers: self.oaiJazz.addOaiRecord(identifier=identifier, sets=sets, metadataFormats=[('oai_dc', '', '')])
class OaiInfoTest(SeecrTestCase): def setUp(self): super(OaiInfoTest, self).setUp() self.oaiInfo = OaiInfo(reactor=CallTrace(), oaiPath='/') self.jazz = OaiJazz(self.tempdir) self.top = be((Observable(), (self.oaiInfo, (self.jazz, )))) self.jazz.updateSet(setSpec="set1", setName="set1") self.jazz.updateSet(setSpec="set2", setName="set name 2") self.jazz.updateMetadataFormat(prefix="prefix1", schema="", namespace="") self.jazz.updateMetadataFormat(prefix="oai", schema="oai-schema", namespace="oai-namespace") self.jazz.addOaiRecord(identifier='record1', setSpecs=['set1'], metadataPrefixes=['prefix1']) self.jazz.addOaiRecord(identifier='record2', setSpecs=['set1'], metadataPrefixes=['prefix1', 'oai']) self.jazz.addOaiRecord(identifier='record3', setSpecs=['set1', 'set2'], metadataPrefixes=['prefix1']) consume(self.jazz.delete(identifier='record3')) self.jazz.commit() def testInfo(self): result = asString( self.top.all.handleRequest(path='/info/json', arguments={})) header, body = result.split('\r\n\r\n') lastStamp = self.jazz.getLastStampId(prefix=None) self.assertTrue(lastStamp != None) self.assertEqual( { 'totalRecords': { 'total': 3, 'deletes': 1 }, 'lastStamp': lastStamp }, loads(body)) def testGetAllSets(self): result = asString( self.top.all.handleRequest(path='/info/json/sets', arguments={})) header, body = result.split('\r\n\r\n') self.assertEqual(['set1', 'set2'], loads(body)) def testGetAllPrefixes(self): result = asString( self.top.all.handleRequest(path='/info/json/prefixes', arguments={})) header, body = result.split('\r\n\r\n') self.assertEqual(['oai', 'prefix1'], loads(body)) def testPrefixInfo(self): result = asString( self.top.all.handleRequest(path='/info/json/prefix', arguments=dict(prefix=['prefix1']))) header, body = result.split('\r\n\r\n') lastStamp = self.jazz.getLastStampId(prefix='prefix1') self.assertTrue(lastStamp != None) self.assertEqual( dict(prefix='prefix1', schema='', namespace='', nrOfRecords=dict(total=3, deletes=1), lastStamp=lastStamp), loads(body)) result = asString( self.top.all.handleRequest(path='/info/json/prefix', arguments=dict(prefix=['oai']))) header, body = result.split('\r\n\r\n') oaiLastStamp = self.jazz.getLastStampId(prefix='oai') self.assertTrue(oaiLastStamp != None) self.assertTrue(lastStamp != oaiLastStamp) self.assertEqual( dict(prefix='oai', schema='oai-schema', namespace='oai-namespace', nrOfRecords=dict(total=1, deletes=0), lastStamp=oaiLastStamp), loads(body)) def testUnknownPrefixInfo(self): result = asString( self.top.all.handleRequest(path='/info/json/prefix', arguments=dict(prefix=['unknown']))) header, body = result.split('\r\n\r\n') self.assertEqual({}, loads(body)) def testSetInfo(self): result = asString( self.top.all.handleRequest(path='/info/json/set', arguments=dict(set=['set1']))) header, body = result.split('\r\n\r\n') lastStamp = self.jazz.getLastStampId(setSpec='set1', prefix=None) self.assertTrue(lastStamp != None) self.assertEqual( dict(setSpec='set1', name='set1', nrOfRecords=dict(total=3, deletes=1), lastStamp=lastStamp), loads(body)) result = asString( self.top.all.handleRequest(path='/info/json/set', arguments=dict(set=['set2']))) header, body = result.split('\r\n\r\n') set2LastStamp = self.jazz.getLastStampId(setSpec='set2', prefix=None) self.assertTrue(lastStamp == set2LastStamp) self.assertEqual( dict(setSpec='set2', name='set name 2', nrOfRecords=dict(total=1, deletes=1), lastStamp=set2LastStamp), loads(body)) def testResumptionTokenInfo(self): firstRecord = next( self.jazz.oaiSelect(prefix='prefix1', batchSize=1).records) resumptionToken = ResumptionToken(metadataPrefix='prefix1', continueAfter=firstRecord.stamp) result = asString( self.top.all.handleRequest( path='/info/json/resumptiontoken', arguments=dict(resumptionToken=[str(resumptionToken)]))) header, body = result.split('\r\n\r\n') self.assertEqual( { 'prefix': 'prefix1', 'set': None, 'from': None, 'until': None, 'nrOfRecords': { 'total': 3, 'deletes': 1 }, 'nrOfRemainingRecords': { 'total': 2, 'deletes': 1 }, 'timestamp': firstRecord.stamp }, loads(body))
def testExport(self): jazz = OaiJazz(join(self.tempdir, 'oai'), deleteInSets=True) jazz.updateMetadataFormat(prefix='someprefix', schema='https://example.org/schema.xsd', namespace='urn:ns') jazz.updateMetadataFormat(prefix='prefix', schema='schema', namespace='namespace') jazz.updateSet(setSpec='a', setName='A') jazz.updateSet(setSpec='setSpec', setName='setName') jazz.addOaiRecord(identifier='id:0', metadataPrefixes=['prefix']) jazz.addOaiRecord(identifier='id:1', metadataPrefixes=['prefix'], setSpecs=['a', 'a:b', 'd:e:f']) jazz.addOaiRecord(identifier='id:2', metadataPrefixes=['prefix', 'someprefix'], setSpecs=['a', 'a:b', 'd:e:f']) jazz.addOaiRecord(identifier='id:3', metadataPrefixes=['prefix', 'someprefix'], setSpecs=['a', 'a:b', 'd:e:f']) for i in range(4, 3000): jazz.addOaiRecord(identifier='id:{}'.format(i), metadataPrefixes=['prefix']) jazz.deleteOaiRecordInPrefixes(identifier='id:2', metadataPrefixes=['someprefix']) jazz.deleteOaiRecordInSets(identifier='id:3', setSpecs=['d:e:f']) jazz.deleteOaiRecord(identifier='id:7') dumpfile = join(self.tempdir, 'dump') jazz.export(dumpfile) with open(dumpfile) as fp: d = fp.readlines() self.assertEqual(3003, len(d)) self.assertEqual('META:\n', d[0]) self.assertEqual('RECORDS:\n', d[2]) meta = loads(d[1].strip()) self.assertEqual( { 'export_version': 1, 'metadataPrefixes': { 'someprefix': { 'schema': 'https://example.org/schema.xsd', 'namespace': 'urn:ns' }, 'prefix': { 'schema': 'schema', 'namespace': 'namespace' }, }, 'sets': { 'a': { 'setName': 'A' }, 'a:b': { 'setName': '' }, 'd': { 'setName': '' }, 'd:e': { 'setName': '' }, 'd:e:f': { 'setName': '' }, 'setSpec': { 'setName': 'setName' }, } }, meta) record0 = loads(d[3].strip()) self.assertAlmostEqual(time(), record0['timestamp'] / 10.0**6, delta=3) record0['timestamp'] = 'TIMESTAMP' self.assertEqual( { 'identifier': 'id:0', 'timestamp': 'TIMESTAMP', 'tombstone': False, 'deletedPrefixes': [], 'prefixes': ['prefix'], 'deletedSets': [], 'sets': [], }, record0) record2 = loads(d[-3].strip()) record2['timestamp'] = 'TIMESTAMP' self.assertEqual( { 'identifier': 'id:2', 'timestamp': 'TIMESTAMP', 'tombstone': False, 'deletedPrefixes': ['someprefix'], 'prefixes': ['prefix', 'someprefix'], 'deletedSets': [], 'sets': ['a', 'a:b', 'd', 'd:e', 'd:e:f'], }, record2) record3 = loads(d[-2].strip()) record3['timestamp'] = 'TIMESTAMP' self.assertEqual( { 'identifier': 'id:3', 'timestamp': 'TIMESTAMP', 'tombstone': False, 'deletedPrefixes': [], 'prefixes': ['prefix', 'someprefix'], 'deletedSets': ['d:e:f'], 'sets': ['a', 'a:b', 'd', 'd:e', 'd:e:f'], }, record3) record7 = loads(d[-1].strip()) record7['timestamp'] = 'TIMESTAMP' self.assertEqual( { 'identifier': 'id:7', 'timestamp': 'TIMESTAMP', 'tombstone': True, 'deletedPrefixes': ['prefix'], 'prefixes': ['prefix'], 'deletedSets': [], 'sets': [], }, record7)
class OaiInfoTest(SeecrTestCase): def setUp(self): super(OaiInfoTest, self).setUp() self.oaiInfo = OaiInfo(reactor=CallTrace(), oaiPath='/') self.jazz = OaiJazz(self.tempdir) self.top = be((Observable(), (self.oaiInfo, (self.jazz,) ) )) self.jazz.addOaiRecord(identifier='record1', sets=[('set1', 'set1')], metadataFormats=[('prefix1', '', '')]) self.jazz.addOaiRecord(identifier='record2', sets=[('set1', 'set1')], metadataFormats=[('prefix1', '', ''), ('oai', 'oai-schema', 'oai-namespace')]) self.jazz.addOaiRecord(identifier='record3', sets=[('set1', 'set1'), ('set2', 'set name 2')], metadataFormats=[('prefix1', '', '')]) consume(self.jazz.delete(identifier='record3')) self.jazz.commit() def testInfo(self): result = asString(self.top.all.handleRequest(path='/info/json', arguments={})) header, body = result.split('\r\n\r\n') lastStamp = self.jazz.getLastStampId(prefix=None) self.assertTrue(lastStamp != None) self.assertEquals({'totalRecords': {'total': 3, 'deletes': 1}, 'lastStamp': lastStamp}, loads(body)) def testGetAllSets(self): result = asString(self.top.all.handleRequest(path='/info/json/sets', arguments={})) header, body = result.split('\r\n\r\n') self.assertEquals(['set1', 'set2'], loads(body)) def testGetAllPrefixes(self): result = asString(self.top.all.handleRequest(path='/info/json/prefixes', arguments={})) header, body = result.split('\r\n\r\n') self.assertEquals(['oai', 'prefix1'], loads(body)) def testPrefixInfo(self): result = asString(self.top.all.handleRequest(path='/info/json/prefix', arguments=dict(prefix=['prefix1']))) header, body = result.split('\r\n\r\n') lastStamp = self.jazz.getLastStampId(prefix='prefix1') self.assertTrue(lastStamp != None) self.assertEquals(dict(prefix='prefix1', schema='', namespace='', nrOfRecords=dict(total=3, deletes=1), lastStamp=lastStamp), loads(body)) result = asString(self.top.all.handleRequest(path='/info/json/prefix', arguments=dict(prefix=['oai']))) header, body = result.split('\r\n\r\n') oaiLastStamp = self.jazz.getLastStampId(prefix='oai') self.assertTrue(oaiLastStamp != None) self.assertTrue(lastStamp != oaiLastStamp) self.assertEquals(dict(prefix='oai', schema='oai-schema', namespace='oai-namespace', nrOfRecords=dict(total=1, deletes=0), lastStamp=oaiLastStamp), loads(body)) def testUnknownPrefixInfo(self): result = asString(self.top.all.handleRequest(path='/info/json/prefix', arguments=dict(prefix=['unknown']))) header, body = result.split('\r\n\r\n') self.assertEquals({}, loads(body)) def testSetInfo(self): result = asString(self.top.all.handleRequest(path='/info/json/set', arguments=dict(set=['set1']))) header, body = result.split('\r\n\r\n') lastStamp = self.jazz.getLastStampId(setSpec='set1', prefix=None) self.assertTrue(lastStamp != None) self.assertEquals(dict(setSpec='set1', name='set1', nrOfRecords=dict(total=3, deletes=1), lastStamp=lastStamp), loads(body)) result = asString(self.top.all.handleRequest(path='/info/json/set', arguments=dict(set=['set2']))) header, body = result.split('\r\n\r\n') set2LastStamp = self.jazz.getLastStampId(setSpec='set2', prefix=None) self.assertTrue(lastStamp == set2LastStamp) self.assertEquals(dict(setSpec='set2', name='set name 2', nrOfRecords=dict(total=1, deletes=1), lastStamp=set2LastStamp), loads(body)) def testResumptionTokenInfo(self): firstRecord = self.jazz.oaiSelect(prefix='prefix1', batchSize=1).records.next() resumptionToken = ResumptionToken(metadataPrefix='prefix1', continueAfter=firstRecord.stamp) result = asString(self.top.all.handleRequest(path='/info/json/resumptiontoken', arguments=dict(resumptionToken=[str(resumptionToken)]))) header, body = result.split('\r\n\r\n') self.assertEquals({ 'prefix':'prefix1', 'set':None, 'from':None, 'until':None, 'nrOfRecords': {'total': 3, 'deletes': 1}, 'nrOfRemainingRecords': {'total': 2, 'deletes': 1}, 'timestamp': firstRecord.stamp }, loads(body))
def testShouldRaiseExceptionOnSameRequestTwice(self): self.run = True portNumber = randint(50000, 60000) oaiJazz = OaiJazz(join(self.tempdir, 'oai')) oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="") suspendRegister = SuspendRegister() oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) clientId = str(uuid4()) responses = [] def doOaiListRecord(port): header, body = getRequest(port=portNumber, path="/", arguments={ "verb": "ListRecords", "metadataPrefix": "prefix", "x-wait": "True" }, additionalHeaders={ 'X-Meresco-Oai-Client-Identifier': clientId }, parse=False) responses.append((header, body)) oaiPmhThread = Thread( None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) harvestThread1 = Thread(None, lambda: doOaiListRecord(portNumber)) harvestThread2 = Thread(None, lambda: doOaiListRecord(portNumber)) with stderr_replaced(): oaiPmhThread.start() harvestThread1.start() try: while len(suspendRegister) == 0: sleep(0.01) harvest1Suspend = suspendRegister._suspendObject(clientId) self.assertTrue(harvest1Suspend is not None) harvestThread2.start() while harvest1Suspend == suspendRegister._suspendObject( clientId): sleep(0.01) sleep(0.01) self.assertTrue(clientId in suspendRegister) self.assertTrue( harvest1Suspend != suspendRegister._suspendObject(clientId) ) self.assertEqual(1, len(responses)) statusAndHeader, body = responses[0] self.assertEqual("204", statusAndHeader['StatusCode']) self.assertTrue(body.startswith(b'Aborting suspended request'), body) storageComponent.addData(identifier="id1", name="prefix", data=b"<a>a1</a>") oaiJazz.addOaiRecord(identifier="id1", metadataPrefixes=["prefix"]) sleep(0.1) finally: self.run = False oaiPmhThread.join() harvestThread1.join() harvestThread2.join() oaiJazz.close()