Exemplo n.º 1
0
    def testIncrementalHarvestWithFromAfterSomePeriod(self):
        observer = CallTrace(emptyGeneratorMethods=['add'])
        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            xWait=False,
            err=StringIO(),
            incrementalHarvestSchedule=Schedule(period=10))
        oaiDownloadProcessor._time = lambda: 1.0
        oaiDownloadProcessor.addObserver(observer)
        consume(
            oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE))))
        self.assertEqual(None, oaiDownloadProcessor._resumptionToken)
        self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)

        self.assertEqual(None, oaiDownloadProcessor.buildRequest())
        oaiDownloadProcessor._time = lambda: 6.0
        self.assertEqual(None, oaiDownloadProcessor.buildRequest())
        oaiDownloadProcessor._time = lambda: 10.0
        self.assertEqual(None, oaiDownloadProcessor.buildRequest())
        oaiDownloadProcessor._time = lambda: 11.1
        request = oaiDownloadProcessor.buildRequest()
        self.assertTrue(
            request.startswith(
                'GET /oai?verb=ListRecords&from=2002-06-01T19%3A20%3A30Z&metadataPrefix=oai_dc'
            ), request)
Exemplo n.º 2
0
    def testIncrementalHarvestScheduleNoneOverruledWithSetIncrementalHarvestSchedule(
            self):
        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            xWait=False,
            err=StringIO(),
            incrementalHarvestSchedule=None)
        oaiDownloadProcessor._time = lambda: 10
        consume(
            oaiDownloadProcessor.handle(
                parse(StringIO(LISTRECORDS_RESPONSE % ''))))
        self.assertEqual(None, oaiDownloadProcessor._resumptionToken)
        self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)
        self.assertEqual(None, oaiDownloadProcessor._earliestNextRequestTime)

        oaiDownloadProcessor.setIncrementalHarvestSchedule(schedule=Schedule(
            period=3))
        self.assertEqual(None, oaiDownloadProcessor.buildRequest())
        self.assertEqual(None, oaiDownloadProcessor._earliestNextRequestTime)
        oaiDownloadProcessor.scheduleNextRequest()
        self.assertNotEqual(None, oaiDownloadProcessor.buildRequest())
        self.assertEqual(0, oaiDownloadProcessor._earliestNextRequestTime)
        consume(
            oaiDownloadProcessor.handle(
                parse(StringIO(LISTRECORDS_RESPONSE % ''))))
        self.assertEqual(13, oaiDownloadProcessor._earliestNextRequestTime)
Exemplo n.º 3
0
 def testListRecordsRequestError(self):
     resumptionToken = "u|c1286437597991025|mprefix|s|f"
     with open(join(self.tempdir, 'harvester.state'), 'w') as f:
         f.write("Resumptiontoken: %s\n" % resumptionToken)
     observer = CallTrace()
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True,
         err=StringIO())
     oaiDownloadProcessor.addObserver(observer)
     self.assertEqual(
         'GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n'
         % (urlencode([('verb', 'ListRecords'),
                       ('resumptionToken', resumptionToken),
                       ('x-wait', 'True')
                       ]), oaiDownloadProcessor._identifier),
         oaiDownloadProcessor.buildRequest())
     consume(oaiDownloadProcessor.handle(parse(StringIO(ERROR_RESPONSE))))
     self.assertEqual(0, len(observer.calledMethods))
     self.assertEqual("someError: Some error occurred.\n",
                      oaiDownloadProcessor._err.getvalue())
     self.assertEqual(
         'GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n'
         % (urlencode(
             [('verb', 'ListRecords'), ('metadataPrefix', 'oai_dc'),
              ('x-wait', 'True')]), oaiDownloadProcessor._identifier),
         oaiDownloadProcessor.buildRequest())
Exemplo n.º 4
0
 def testSetInRequest(self):
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         set="setName",
         workingDirectory=self.tempdir,
         xWait=True)
     self.assertEqual(
         """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&set=setName&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n"""
         % oaiDownloadProcessor._identifier,
         oaiDownloadProcessor.buildRequest())
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         set="set-_.!~*'()",
         workingDirectory=self.tempdir,
         xWait=True)
     self.assertEqual(
         """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&set=set-_.%%21~%%2A%%27%%28%%29&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n"""
         % oaiDownloadProcessor._identifier,
         oaiDownloadProcessor.buildRequest())
     resumptionToken = "u|c1286437597991025|mprefix|s|f"
     with open(join(self.tempdir, 'harvester.state'), 'w') as f:
         f.write("Resumptiontoken: %s\n" % resumptionToken)
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         set="setName",
         workingDirectory=self.tempdir,
         xWait=True)
     self.assertEqual(
         """GET /oai?verb=ListRecords&resumptionToken=u%%7Cc1286437597991025%%7Cmprefix%%7Cs%%7Cf&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n"""
         % oaiDownloadProcessor._identifier,
         oaiDownloadProcessor.buildRequest())
Exemplo n.º 5
0
 def testRequest(self):
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True)
     self.assertEqual(
         """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n"""
         % oaiDownloadProcessor._identifier,
         oaiDownloadProcessor.buildRequest())
Exemplo n.º 6
0
 def testRequestWithAdditionalHeaders(self):
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True)
     request = oaiDownloadProcessor.buildRequest(
         additionalHeaders={'Host': 'example.org'})
     self.assertEqual(
         """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nHost: example.org\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n"""
         % oaiDownloadProcessor._identifier, request)
Exemplo n.º 7
0
 def testKeepResumptionTokenOnFailingAddCall(self):
     resumptionToken = "u|c1286437597991025|mprefix|s|f"
     with open(join(self.tempdir, 'harvester.state'), 'w') as f:
         f.write("Resumptiontoken: %s\n" % resumptionToken)
     observer = CallTrace()
     observer.exceptions = {'add': Exception("Could be anything")}
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True,
         err=StringIO())
     oaiDownloadProcessor.addObserver(observer)
     self.assertEqual(
         'GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n'
         % (urlencode([('verb', 'ListRecords'),
                       ('resumptionToken', resumptionToken),
                       ('x-wait', 'True')
                       ]), oaiDownloadProcessor._identifier),
         oaiDownloadProcessor.buildRequest())
     self.assertRaises(
         Exception, lambda: list(
             compose(
                 oaiDownloadProcessor.handle(
                     parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN)
                           )))))
     self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'],
                      [m.name for m in observer.calledMethods])
     errorOutput = oaiDownloadProcessor._err.getvalue()
     self.assertTrue(errorOutput.startswith('Traceback'), errorOutput)
     self.assertTrue(
         'Exception: Could be anything\nWhile processing:\n<record xmlns="http://www.openarchives.org/OAI/2.0/"><header><identifier>oai:identifier:1'
         in errorOutput, errorOutput)
     self.assertEqual(
         'GET /oai?%s HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n'
         % (urlencode([('verb', 'ListRecords'),
                       ('resumptionToken', resumptionToken),
                       ('x-wait', 'True')
                       ]), oaiDownloadProcessor._identifier),
         oaiDownloadProcessor.buildRequest())
Exemplo n.º 8
0
 def testBuildRequestNoneWhenNoResumptionToken(self):
     observer = CallTrace(emptyGeneratorMethods=['add'])
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=False,
         err=StringIO())
     oaiDownloadProcessor.addObserver(observer)
     consume(
         oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE))))
     self.assertEqual(None, oaiDownloadProcessor._resumptionToken)
     self.assertEqual(None, oaiDownloadProcessor.buildRequest())
Exemplo n.º 9
0
 def testUpdateRequest(self):
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True)
     oaiDownloadProcessor.setPath('/otherOai')
     oaiDownloadProcessor.setMetadataPrefix('otherPrefix')
     oaiDownloadProcessor.setSet('aSet')
     oaiDownloadProcessor.setFrom('2014')
     self.assertEqual(
         """GET /otherOai?verb=ListRecords&from=2014&metadataPrefix=otherPrefix&set=aSet&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n"""
         % oaiDownloadProcessor._identifier,
         oaiDownloadProcessor.buildRequest())
Exemplo n.º 10
0
    def testScheduleNextRequest(self):
        oaiDownloadProcessor = OaiDownloadProcessor(
            path='/p', metadataPrefix='p', workingDirectory=self.tempdir)
        oaiDownloadProcessor._time = lambda: 17
        consume(
            oaiDownloadProcessor.handle(
                parse(StringIO(LISTRECORDS_RESPONSE % ''))))
        self.assertTrue(oaiDownloadProcessor._earliestNextRequestTime > 17)

        oaiDownloadProcessor.scheduleNextRequest()
        self.assertEqual(0, oaiDownloadProcessor._earliestNextRequestTime)
        self.assertEqual(True, oaiDownloadProcessor._timeForNextRequest())
        self.assertNotEqual(None, oaiDownloadProcessor.buildRequest())

        oaiDownloadProcessor.scheduleNextRequest(Schedule(period=0))
        self.assertEqual(17, oaiDownloadProcessor._earliestNextRequestTime)
        self.assertEqual(True, oaiDownloadProcessor._timeForNextRequest())
        self.assertNotEqual(None, oaiDownloadProcessor.buildRequest())

        oaiDownloadProcessor.scheduleNextRequest(Schedule(period=120))
        self.assertEqual(137, oaiDownloadProcessor._earliestNextRequestTime)
        self.assertEqual(False, oaiDownloadProcessor._timeForNextRequest())
        self.assertEqual(None, oaiDownloadProcessor.buildRequest())
Exemplo n.º 11
0
 def testUpdateRequestAfterSetResumptionToken(self):
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         set="aSet",
         workingDirectory=self.tempdir,
         xWait=False)
     oaiDownloadProcessor.setPath('/otherOai')
     oaiDownloadProcessor.setFrom('2014')
     oaiDownloadProcessor.setResumptionToken('ReSumptionToken')
     self.assertEqual(
         """GET /otherOai?verb=ListRecords&resumptionToken=ReSumptionToken HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n"""
         % oaiDownloadProcessor._identifier,
         oaiDownloadProcessor.buildRequest())
Exemplo n.º 12
0
    def testPersistentIdentifier(self):
        identifierFilepath = join(self.tempdir, 'harvester.identifier')
        self.assertFalse(isfile(identifierFilepath))
        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            xWait=True)
        currentIdentifier = oaiDownloadProcessor._identifier
        self.assertTrue(isfile(identifierFilepath))
        with open(identifierFilepath) as f:
            self.assertEqual(currentIdentifier, f.read())
        self.assertEqual(
            """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n"""
            % currentIdentifier, oaiDownloadProcessor.buildRequest())

        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            xWait=True)
        self.assertEqual(
            """GET /oai?verb=ListRecords&metadataPrefix=oai_dc&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n"""
            % currentIdentifier, oaiDownloadProcessor.buildRequest())
Exemplo n.º 13
0
 def testRestartAfterFinish(self):
     observer = CallTrace(emptyGeneratorMethods=['add'])
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=False,
         err=StringIO(),
         restartAfterFinish=True)
     oaiDownloadProcessor.addObserver(observer)
     consume(
         oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE))))
     self.assertEqual(None, oaiDownloadProcessor._resumptionToken)
     request = oaiDownloadProcessor.buildRequest()
     self.assertTrue(
         request.startswith(
             'GET /oai?verb=ListRecords&metadataPrefix=oai_dc HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: '
         ), request)
Exemplo n.º 14
0
 def testUseResumptionToken(self):
     observer = CallTrace(emptyGeneratorMethods=['add'])
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True,
         err=StringIO())
     oaiDownloadProcessor.addObserver(observer)
     consume(
         oaiDownloadProcessor.handle(
             parse(StringIO(LISTRECORDS_RESPONSE % RESUMPTION_TOKEN))))
     self.assertEqual('x?y&z', oaiDownloadProcessor._resumptionToken)
     self.assertEqual(
         'GET /oai?verb=ListRecords&resumptionToken=x%%3Fy%%26z&x-wait=True HTTP/1.0\r\nX-Meresco-Oai-Client-Identifier: %s\r\nUser-Agent: Meresco-Oai-DownloadProcessor/5.x\r\n\r\n'
         % oaiDownloadProcessor._identifier,
         oaiDownloadProcessor.buildRequest())
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=True,
         err=StringIO())
     self.assertEqual('x?y&z', oaiDownloadProcessor._resumptionToken)