def testIncrementalHarvestScheduleSetToNone(self): observer = CallTrace(emptyGeneratorMethods=['add']) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", incrementalHarvestSchedule=Schedule(period=0), workingDirectory=self.tempdir, xWait=False, err=StringIO()) oaiDownloadProcessor.addObserver(observer) consume( oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE)))) self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from) self.assertNotEqual(None, oaiDownloadProcessor._earliestNextRequestTime) self.assertEqual( ['startOaiBatch', 'add', 'stopOaiBatch', 'signalHarvestingDone'], observer.calledMethodNames()) observer.calledMethods.reset() oaiDownloadProcessor.setFrom(from_=None) oaiDownloadProcessor.setIncrementalHarvestSchedule(schedule=None) consume( oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE)))) self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from) self.assertEqual(None, oaiDownloadProcessor._earliestNextRequestTime) self.assertEqual( ['startOaiBatch', 'add', 'stopOaiBatch', 'signalHarvestingDone'], observer.calledMethodNames())
def testHandleWithTwoRecords(self): observer = CallTrace(methods={'add': lambda **kwargs: (x for x in [])}) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True) oaiDownloadProcessor.addObserver(observer) secondRecord = '<record xmlns="http://www.openarchives.org/OAI/2.0/"><header><identifier>oai:identifier:2</identifier><datestamp>2011-08-22T07:41:00Z</datestamp></header><metadata>ignored</metadata></record>' list( compose( oaiDownloadProcessor.handle( parse( StringIO(LISTRECORDS_RESPONSE % (secondRecord + RESUMPTION_TOKEN)))))) self.assertEqual(['startOaiBatch', 'add', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) addMethod0, addMethod1 = observer.calledMethods[1:3] self.assertEqual(0, len(addMethod0.args)) self.assertEqualsWS(ONE_RECORD, lxmltostring(addMethod0.kwargs['lxmlNode'])) self.assertEqual('2011-08-22T07:34:00Z', addMethod0.kwargs['datestamp']) self.assertEqual('oai:identifier:1', addMethod0.kwargs['identifier']) self.assertEqualsWS(secondRecord, lxmltostring(addMethod1.kwargs['lxmlNode'])) self.assertEqual('2011-08-22T07:41:00Z', addMethod1.kwargs['datestamp']) self.assertEqual('oai:identifier:2', addMethod1.kwargs['identifier'])
def testRequest(self): oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True) self.assertEqual( """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % oaiDownloadProcessor._identifier, oaiDownloadProcessor.buildRequest())
def testSetInRequest(self): oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", set="setName", workingDirectory=self.tempdir, xWait=True) self.assertEqual( """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&set=setName&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % oaiDownloadProcessor._identifier, oaiDownloadProcessor.buildRequest()) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", set="set-_.!~*'()", workingDirectory=self.tempdir, xWait=True) self.assertEqual( """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&set=set-_.%%21~%%2A%%27%%28%%29&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % oaiDownloadProcessor._identifier, oaiDownloadProcessor.buildRequest()) resumptionToken = "u|c1286437597991025|mprefix|s|f" with open(join(self.tempdir, 'harvester.state'), 'w') as f: f.write("Resumptiontoken: %s\n" % resumptionToken) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", set="setName", workingDirectory=self.tempdir, xWait=True) self.assertEqual( """GET /oai?verb=ListRecords&resumptionToken=u%%7Cc1286437597991025%%7Cmprefix%%7Cs%%7Cf&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % oaiDownloadProcessor._identifier, oaiDownloadProcessor.buildRequest())
def testRequestWithAdditionalHeaders(self): oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True) request = oaiDownloadProcessor.buildRequest( additionalHeaders={'Host': 'example.org'}) self.assertEqual( """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nHost: example.org\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % oaiDownloadProcessor._identifier, request)
def testRaiseErrorOnBadResponse(self): oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True) badRecord = '<record>No Header</record>' try: list( compose( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % badRecord))))) self.fail() except IndexError: pass
def testSignalHarvestingDone(self): observer = CallTrace(emptyGeneratorMethods=['add']) oaiDownloadProcessor = OaiDownloadProcessor( path='/p', metadataPrefix='p', workingDirectory=self.tempdir, incrementalHarvestSchedule=None) oaiDownloadProcessor.addObserver(observer) consume( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % '')))) self.assertEqual( ['startOaiBatch', 'add', 'stopOaiBatch', 'signalHarvestingDone'], observer.calledMethodNames())
def testYieldSuspendFromAdd(self): observer = CallTrace() oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False) oaiDownloadProcessor.addObserver(observer) suspend = Suspend() observer.returnValues['add'] = (x for x in [suspend]) yields = list( compose( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % ''))))) self.assertEqual([suspend, None], yields)
def testIncrementalHarvestScheduleNone(self): observer = CallTrace(emptyGeneratorMethods=['add']) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False, err=StringIO(), incrementalHarvestSchedule=None) oaiDownloadProcessor.addObserver(observer) consume( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % '')))) self.assertEqual(None, oaiDownloadProcessor._resumptionToken) self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from) self.assertEqual(None, oaiDownloadProcessor._earliestNextRequestTime)
def testReadResumptionTokenWhenNoState(self): oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True, err=StringIO()) self.assertEqual(None, oaiDownloadProcessor._resumptionToken)
def testIncrementalHarvestWithFromWithDefaultScheduleMidnight(self): observer = CallTrace(emptyGeneratorMethods=['add']) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False, err=StringIO()) oaiDownloadProcessor._time = oaiDownloadProcessor._incrementalHarvestSchedule._time = lambda: 0o1 * 60 * 60 oaiDownloadProcessor._incrementalHarvestSchedule._utcnow = lambda: datetime.strptime( "01:00", "%H:%M") oaiDownloadProcessor.addObserver(observer) consume( oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE)))) self.assertEqual(None, oaiDownloadProcessor._resumptionToken) self.assertEqual(24 * 60 * 60.0, oaiDownloadProcessor._earliestNextRequestTime)
def testListRecordsRequestError(self): resumptionToken = "u|c1286437597991025|mprefix|s|f" with open(join(self.tempdir, 'harvester.state'), 'w') as f: f.write("Resumptiontoken: %s\n" % resumptionToken) observer = CallTrace() oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True, err=StringIO()) oaiDownloadProcessor.addObserver(observer) self.assertEqual( 'GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n' % (urlencode([('verb', 'ListRecords'), ('resumptionToken', resumptionToken), ('x-wait', 'True') ]), oaiDownloadProcessor._identifier), oaiDownloadProcessor.buildRequest()) consume(oaiDownloadProcessor.handle(parse(StringIO(ERROR_RESPONSE)))) self.assertEqual(0, len(observer.calledMethods)) self.assertEqual("someError: Some error occurred.\n", oaiDownloadProcessor._err.getvalue()) self.assertEqual( 'GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n' % (urlencode( [('verb', 'ListRecords'), ('metadataPrefix', 'oai_dc'), ('x-wait', 'True')]), oaiDownloadProcessor._identifier), oaiDownloadProcessor.buildRequest())
def testUpdateRequestAfterSetResumptionToken(self): oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", set="aSet", workingDirectory=self.tempdir, xWait=False) oaiDownloadProcessor.setPath('/otherOai') oaiDownloadProcessor.setFrom('2014') oaiDownloadProcessor.setResumptionToken('ReSumptionToken') self.assertEqual( """GET /otherOai?verb=ListRecords&resumptionToken=ReSumptionToken HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % oaiDownloadProcessor._identifier, oaiDownloadProcessor.buildRequest())
def testReadInvalidState(self): with open(join(self.tempdir, 'harvester.state'), 'w') as f: f.write("invalid") oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True, err=StringIO()) self.assertEqual(None, oaiDownloadProcessor._resumptionToken)
def startOaiHarvester(self, portNumber, observer): with Reactor() as reactor: server = be( (Observable(), (PeriodicDownload(reactor, 'localhost', portNumber), (XmlParseLxml(fromKwarg="data", toKwarg="lxmlNode"), ( OaiDownloadProcessor('/', 'prefix', self.tempdir), (observer, ), ))))) list(compose(server.once.observer_init())) self._loopReactor(reactor)
def testSetIncrementalHarvestScheduleNotAllowedInCaseOfRestartAfterFinish( self): oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False, err=StringIO(), restartAfterFinish=True) self.assertRaises( ValueError, lambda: oaiDownloadProcessor. setIncrementalHarvestSchedule(schedule=Schedule(period=3)))
def testHandle(self): observer = CallTrace(methods={'add': lambda **kwargs: (x for x in [])}) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False) oaiDownloadProcessor.addObserver(observer) list( compose( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % ''))))) self.assertEqual( ['startOaiBatch', 'add', 'stopOaiBatch', 'signalHarvestingDone'], [m.name for m in observer.calledMethods]) addMethod = observer.calledMethods[1] self.assertEqual(0, len(addMethod.args)) self.assertEqualsWS(ONE_RECORD, lxmltostring(addMethod.kwargs['lxmlNode'])) self.assertEqual('2011-08-22T07:34:00Z', addMethod.kwargs['datestamp']) self.assertEqual('oai:identifier:1', addMethod.kwargs['identifier'])
def testReadResumptionTokenFromStateWithNewline(self): resumptionToken = "u|c1286437597991025|mprefix|s|f" with open(join(self.tempdir, 'harvester.state'), 'w') as f: f.write("Resumptiontoken: %s\n" % resumptionToken) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True, err=StringIO()) self.assertEqual(resumptionToken, oaiDownloadProcessor._resumptionToken)
def testOaiListRequestOnCallstack(self): listRequests = [] def addMethod(**kwargs): listRequests.append(local('__callstack_var_oaiListRequest__')) return yield observer = CallTrace(methods={'add': addMethod}) top = be( (Observable(), (OaiDownloadProcessor(path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False), (observer, )))) consume(top.all.handle(parse(StringIO(LISTRECORDS_RESPONSE % '')))) self.assertEqual( ['startOaiBatch', 'add', 'stopOaiBatch', 'signalHarvestingDone'], [m.name for m in observer.calledMethods]) self.assertEqual([{ 'set': None, 'metadataPrefix': 'oai_dc' }], listRequests) listRequests = [] observer.calledMethods.reset() top = be( (Observable(), (OaiDownloadProcessor(path="/oai", metadataPrefix="other", set='aSet', workingDirectory=self.tempdir, xWait=True), (observer, )))) consume( top.all.handle( parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN)))) self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) self.assertEqual([{ 'set': 'aSet', 'metadataPrefix': 'other' }], listRequests)
def testHarvesterStateWithError(self): resumptionToken = "u|c1286437597991025|mprefix|s|f" with open(join(self.tempdir, 'harvester.state'), 'w') as f: f.write("Resumptiontoken: %s\n" % resumptionToken) observer = CallTrace() observer.exceptions = {'add': Exception("Could be anything")} oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True, err=StringIO(), name="Name") oaiDownloadProcessor.addObserver(observer) self.assertRaises( Exception, lambda: list( compose( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN) ))))) state = oaiDownloadProcessor.getState() self.assertEqual(resumptionToken, state.resumptionToken) self.assertEqual(None, state.from_) self.assertEqual( "ERROR while processing 'oai:identifier:1': Could be anything", state.errorState) self.assertEqual("Name", state.name) oaiDownloadProcessor2 = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True, err=StringIO()) state2 = oaiDownloadProcessor2.getState() self.assertEqual(resumptionToken, state2.resumptionToken) self.assertEqual( "ERROR while processing 'oai:identifier:1': Could be anything", state2.errorState)
def testBuildRequestNoneWhenNoResumptionToken(self): observer = CallTrace(emptyGeneratorMethods=['add']) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False, err=StringIO()) oaiDownloadProcessor.addObserver(observer) consume( oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE)))) self.assertEqual(None, oaiDownloadProcessor._resumptionToken) self.assertEqual(None, oaiDownloadProcessor.buildRequest())
def testShutdownPersistsStateOnAutocommit(self): observer = CallTrace(emptyGeneratorMethods=['add']) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, autoCommit=False) oaiDownloadProcessor.addObserver(observer) consume( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN)))) state = oaiDownloadProcessor.getState() self.assertFalse(isfile(join(self.tempdir, 'harvester.state'))) oaiDownloadProcessor.handleShutdown() self.assertEqual( { "errorState": None, 'from': '2002-06-01T19:20:30Z', "resumptionToken": state.resumptionToken }, JsonDict.load(join(self.tempdir, 'harvester.state')))
def testSetIncrementalHarvestSchedule(self): oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False, err=StringIO(), incrementalHarvestSchedule=None) oaiDownloadProcessor._time = lambda: 10 oaiDownloadProcessor.setIncrementalHarvestSchedule(schedule=Schedule( period=3)) self.assertEqual(0, oaiDownloadProcessor._earliestNextRequestTime) consume( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % '')))) self.assertEqual(13, oaiDownloadProcessor._earliestNextRequestTime)
def testUseResumptionToken(self): observer = CallTrace(emptyGeneratorMethods=['add']) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True, err=StringIO()) oaiDownloadProcessor.addObserver(observer) consume( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN)))) self.assertEqual('x?y&z', oaiDownloadProcessor._resumptionToken) self.assertEqual( 'GET /oai?verb=ListRecords&resumptionToken=x%%3Fy%%26z&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n' % oaiDownloadProcessor._identifier, oaiDownloadProcessor.buildRequest()) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True, err=StringIO()) self.assertEqual('x?y&z', oaiDownloadProcessor._resumptionToken)
def testResponseDateAsFrom(self): observer = CallTrace(emptyGeneratorMethods=['add']) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False, err=StringIO()) oaiDownloadProcessor.addObserver(observer) consume( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN)))) self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False, err=StringIO()) self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)
def testKeepResumptionTokenOnFailingAddCall(self): resumptionToken = "u|c1286437597991025|mprefix|s|f" with open(join(self.tempdir, 'harvester.state'), 'w') as f: f.write("Resumptiontoken: %s\n" % resumptionToken) observer = CallTrace() observer.exceptions = {'add': Exception("Could be anything")} oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True, err=StringIO()) oaiDownloadProcessor.addObserver(observer) self.assertEqual( 'GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n' % (urlencode([('verb', 'ListRecords'), ('resumptionToken', resumptionToken), ('x-wait', 'True') ]), oaiDownloadProcessor._identifier), oaiDownloadProcessor.buildRequest()) self.assertRaises( Exception, lambda: list( compose( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN) ))))) self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) errorOutput = oaiDownloadProcessor._err.getvalue() self.assertTrue(errorOutput.startswith('Traceback'), errorOutput) self.assertTrue( 'Exception: Could be anything\nWhile processing:\n<record xmlns="http://www.openarchives.org/OAI/2.0/"><header><identifier>oai:identifier:1' in errorOutput, errorOutput) self.assertEqual( 'GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n' % (urlencode([('verb', 'ListRecords'), ('resumptionToken', resumptionToken), ('x-wait', 'True') ]), oaiDownloadProcessor._identifier), oaiDownloadProcessor.buildRequest())
def testRestartAfterFinish(self): observer = CallTrace(emptyGeneratorMethods=['add']) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False, err=StringIO(), restartAfterFinish=True) oaiDownloadProcessor.addObserver(observer) consume( oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE)))) self.assertEqual(None, oaiDownloadProcessor._resumptionToken) request = oaiDownloadProcessor.buildRequest() self.assertTrue( request.startswith( 'GET /oai?verb=ListRecords&metadataPrefix=oai_dc HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: ' ), request)
def testIncrementalHarvestReScheduleIfNoRecordsMatch(self): observer = CallTrace(emptyGeneratorMethods=['add']) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", incrementalHarvestSchedule=Schedule(period=0), workingDirectory=self.tempdir, xWait=False, err=StringIO()) oaiDownloadProcessor.addObserver(observer) consume( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % '')))) self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from) consume( oaiDownloadProcessor.handle( parse(StringIO(NO_RECORDS_MATCH_RESPONSE)))) self.assertEqual(None, oaiDownloadProcessor._errorState) self.assertEqual('2012-06-01T19:20:30Z', oaiDownloadProcessor._from)
def testPersistentIdentifier(self): identifierFilepath = join(self.tempdir, 'harvester.identifier') self.assertFalse(isfile(identifierFilepath)) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True) currentIdentifier = oaiDownloadProcessor._identifier self.assertTrue(isfile(identifierFilepath)) with open(identifierFilepath) as f: self.assertEqual(currentIdentifier, f.read()) self.assertEqual( """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % currentIdentifier, oaiDownloadProcessor.buildRequest()) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=True) self.assertEqual( """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n""" % currentIdentifier, oaiDownloadProcessor.buildRequest())
def testHandleYieldsAtLeastOnceAfterEachRecord(self): def add(**kwargs): return yield observer = CallTrace(methods={'add': add}) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False) oaiDownloadProcessor.addObserver(observer) yields = list( compose( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % ''))))) self.assertEqual(1, len(yields)) secondRecord = '<record xmlns="http://www.openarchives.org/OAI/2.0/"><header><identifier>oai:identifier:2</identifier><datestamp>2011-08-22T07:41:00Z</datestamp></header><metadata>ignored</metadata></record>' yields = list( compose( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % secondRecord))))) self.assertEqual(2, len(yields))