示例#1
0
class OaiRequestTest(SeecrTestCase):
    def setUp(self):
        super(OaiRequestTest, self).setUp()
        self.request = MockOaiRequest('mocktud')

    def testUserAgentDefault(self):
        args = {}
        def myOwnUrlOpen(*fArgs, **fKwargs):
            args['args'] = fArgs
            args['kwargs'] = fKwargs
            return StringIO(oaiResponseXML())

        request = OaiRequest("http://harvest.me", _urlopen=myOwnUrlOpen)
        request.identify()
        
        self.assertEquals("Meresco Harvester trunk", args['args'][0].headers['User-agent'])

    def testContextSetToTLS12(self):
        from ssl import SSLError, PROTOCOL_TLSv1_2
        calls = []
        def loggingUrlOpen(*fArgs, **fKwargs):
            calls.append(fKwargs)
            raise SSLError("Some error")
        request = OaiRequest("http://harvest.me", _urlopen=loggingUrlOpen)
        try:
            request.identify()
            self.fail("Should have failed")
        except:
            pass
        self.assertEqual(2, len(calls))
        self.assertEqual(None, calls[0]['context'])
        context=calls[1]['context']
        self.assertEqual(PROTOCOL_TLSv1_2, context.protocol)



    def testMockOaiRequest(self):
        response = self.request.request({'verb': 'ListRecords', 'metadataPrefix': 'oai_dc'})
        self.assertEquals('2004-12-29T13:19:27Z', xpathFirst(response.response, '/oai:OAI-PMH/oai:responseDate/text()'))

    def testOtherOaiRequest(self):
        response = self.request.request({'verb': 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': 'oai:rep:12345'})
        self.assertEquals('2005-04-28T12:16:27Z', xpathFirst(response.response, '/oai:OAI-PMH/oai:responseDate/text()'))

    def testListRecordsError(self):
        try:
            self.request.listRecords(resumptionToken='BadResumptionToken')
            self.fail()
        except OAIError, e:
            self.assertEquals('The value of the resumptionToken argument is invalid or expired.',e.errorMessage())
            self.assertEquals(u'badResumptionToken', e.errorCode())
示例#2
0
class OaiRequestTest(unittest.TestCase):
    def setUp(self):
        self.request = MockOaiRequest('mocktud')

    def testMockOaiRequest(self):
        response = self.request.request({'verb': 'ListRecords', 'metadataPrefix': 'oai_dc'})
        self.assertEquals('2004-12-29T13:19:27Z', xpathFirst(response.response, '/oai:OAI-PMH/oai:responseDate/text()'))

    def testOtherOaiRequest(self):
        response = self.request.request({'verb': 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': 'oai:rep:12345'})
        self.assertEquals('2005-04-28T12:16:27Z', xpathFirst(response.response, '/oai:OAI-PMH/oai:responseDate/text()'))

    def testListRecordsError(self):
        try:
            self.request.listRecords(resumptionToken='BadResumptionToken')
            self.fail()
        except OAIError, e:
            self.assertEquals('The value of the resumptionToken argument is invalid or expired.',e.errorMessage())
            self.assertEquals(u'badResumptionToken', e.errorCode())
示例#3
0
 def testResumptionToken(self):
     self.mockRepository = MockOaiRequest('mocktud')
     f = open(self.stateDir + '/tud.stats', 'w')
     f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Done: 2004-12-31 16:39:15, ResumptionToken: ga+hier+verder\n')
     f.close();
     repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud')
     logger = self.createLogger()
     h = Harvester(repository)
     h.addObserver(self)
     h.addObserver(logger)
     h.addObserver(repository.createUploader(logger.eventLogger))
     h.addObserver(repository.mapping())
     self.listRecordsToken = None
     h.harvest()
     self.assertEquals('ga+hier+verder', self.listRecordsToken)
示例#4
0
 def testContinuousHarvesting(self):
     self.mockRepository = MockOaiRequest('mocktud')
     f = open(self.stateDir + '/tud.stats', 'w')
     f.write(' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n')
     f.close()
     JsonDict({'resumptionToken': None, 'from': "2015-01-01T00:12:13Z"}).dump(open(self.stateDir + '/tud.next', 'w'))
     repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud', continuous=True)
     logger = self.createLogger()
     h = Harvester(repository)
     h.addObserver(self)
     h.addObserver(logger)
     h.addObserver(repository.createUploader(logger.eventLogger))
     h.addObserver(repository.mapping())
     self.listRecordsFrom = None
     h.harvest()
     self.assertEquals('2015-01-01T00:12:13Z', self.listRecordsFrom)
示例#5
0
 def testOnlyErrorInLogFile(self):
     self.mockRepository = MockOaiRequest('mocktud')
     f = open(self.stateDir + '/tud.stats', 'w')
     f.write('Started: 1998-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error:\n')
     f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error: XXX\n')
     f.close();
     repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud')
     logger = self.createLogger()
     h = Harvester(repository)
     h.addObserver(self)
     h.addObserver(logger)
     h.addObserver(repository.createUploader(logger.eventLogger))
     h.addObserver(repository.mapping())
     self.listRecordsFrom = None
     h.harvest()
     self.assertEquals('aap', self.listRecordsFrom)
示例#6
0
    def testIncrementalHarvest(self):
        self.mockRepository = MockOaiRequest('mocktud')
        f = open(self.stateDir + '/tud.stats', 'w')
        f.write(' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n')
        f.close()
        JsonDict({'resumptionToken': None, 'from': "1999-12-01T16:37:41Z"}).dump(open(self.stateDir + '/tud.next', 'w'))

        f = open(self.stateDir + '/tud.ids', 'w')
        for i in range(113): f.write('oai:tudfakeid:%05i\n'%i)
        f.close()
        repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud')
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEquals('1999-12-01', self.listRecordsFrom)
        lines = open(self.stateDir + '/tud.stats').readlines()
        self.assertEquals(2, len(lines))
        self.assertEquals(('3', '3', '0', '116'), getHarvestedUploadedRecords(lines[1]))
示例#7
0
 def setUp(self):
     self.request = MockOaiRequest('mocktud')
示例#8
0
class HarvesterTest(unittest.TestCase):
    def setUp(self):
        self.sendCalled=0
        self.sendException = None
        self.upload = None
        self.sendParts=[]
        self.sendId=[]
        self.listRecordsSet = None
        self.listRecordsToken = None
        self.startCalled=0
        self.stopCalled=0
        self.logDir = self.stateDir = mkdtemp()

    def tearDown(self):
        rmtree(self.logDir)

    def createLogger(self):
        self.logger=HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='tud')
        return self.logger

    def createServer(self, url='http://repository.tudelft.nl/oai'):
        return OaiRequest(url)

    def testCreateHarvester(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        self.assertEquals((0,0),(self.startCalled,self.stopCalled))
        harvester.harvest()
        self.assertEquals((1,1),(self.startCalled,self.stopCalled))
        harvester = self.createHarvesterWithMockUploader('eur')
        self.assertEquals((1,1),(self.startCalled,self.stopCalled))
        harvester.harvest()
        self.assertEquals((2,2),(self.startCalled,self.stopCalled))

    def testDoUpload(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.harvest()

        self.assertEqual(3, self.sendCalled)
        self.assertEqual('tud:oai:tudelft.nl:007193', self.sendId[2])
        record = parse(StringIO(self.sendParts[2]['record']))
        subjects = record.xpath('/oai:record/oai:metadata/oai_dc:dc/dc:subject/text()', namespaces=namespaces)
        self.assertEqual(['quantitative electron microscopy', 'statistical experimental design', 'parameter estimation'], subjects)
        self.assertEquals('ResumptionToken: TestToken', file(os.path.join(self.stateDir, 'tud.stats')).read()[-27:-1])

    def testLogIDsForRemoval(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.harvest()
        idsfile = open(self.stateDir+'/tud.ids')
        try:
            self.assertEquals('tud:oai:tudelft.nl:007087',idsfile.readline().strip())
            self.assertEquals('tud:oai:tudelft.nl:007192',idsfile.readline().strip())
            self.assertEquals('tud:oai:tudelft.nl:007193',idsfile.readline().strip())
        finally:
            idsfile.close()

    def createHarvesterWithMockUploader(self, name, set=None, mockRequest=None):
        self.logger = HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name=name)
        repository = self.MockRepository(name, set)
        uploader = repository.createUploader(self.logger.eventLogger())
        self.mapper = repository.mapping()
        harvester = Harvester(repository)
        harvester.addObserver(mockRequest or MockOaiRequest('mocktud'))
        harvester.addObserver(self.logger)
        harvester.addObserver(uploader)
        harvester.addObserver(self.mapper)
        return harvester

    def testSimpleStat(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.harvest()
        self.assert_(os.path.isfile(self.stateDir+'/tud.stats'))
        stats = open(self.stateDir + '/tud.stats').readline().strip().split(',')
        year = strftime('%Y')
        self.assertEquals('Started: %s-'%year, stats[0][:14])
        self.assertEquals(' Harvested/Uploaded/Deleted/Total: 3/3/0/3', stats[1])
        self.assertEquals(' Done: %s-'%year, stats[2][:12])

    def testErrorStat(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        self.sendException = Exception('send failed')
        try:
            harvester.harvest()
        except:
            pass
        stats = open(self.stateDir + '/tud.stats').readline().strip().split(',')
        self.assertTrue(stats[2].startswith(' Error: '), stats[2])
        self.assertTrue(stats[2].endswith('send failed'), stats[2])

    def testResumptionTokenLog(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.harvest()
        stats = open(self.stateDir + '/tud.stats').readline().strip().split(',')
        self.assertEquals(' ResumptionToken: TestToken', stats[3])

    def testOtherMetadataPrefix(self):
        self.logger=HarvesterLog(stateDir=self.stateDir, logDir=self.logDir, name='tud')
        repository = self.MockRepository('tud', None)
        repository.metadataPrefix='lom'
        harvester = Harvester(repository)
        harvester.addObserver(MockOaiRequest('mocktud'))
        harvester.addObserver(self.logger)
        harvester.addObserver(repository.createUploader(self.logger.eventLogger))
        harvester.addObserver(repository.mapping())
        harvester.harvest()
        self.assertEquals(['tud:oai:lorenet:147'],self.sendId)

    def testWriteAndSeek(self):
        f = open('test','w')
        f.write('enige info: ')
        pos = f.tell()
        f.write('20000')
        f.seek(pos)
        f.write('12345')
        f.close()
        self.assertEquals('enige info: 12345', open('test','r').readline().strip())
        os.remove('test')

    def testException(self):
        try:
            raise Exception('aap')
            self.fail()
        except:
            self.assertEquals('aap', str(sys.exc_value))
            self.assertTrue('exceptions.Exception' in str(sys.exc_type), str(sys.exc_type))

    def testIncrementalHarvest(self):
        self.mockRepository = MockOaiRequest('mocktud')
        f = open(self.stateDir + '/tud.stats', 'w')
        f.write(' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n')
        f.close()
        JsonDict({'resumptionToken': None, 'from': "1999-12-01T16:37:41Z"}).dump(open(self.stateDir + '/tud.next', 'w'))

        f = open(self.stateDir + '/tud.ids', 'w')
        for i in range(113): f.write('oai:tudfakeid:%05i\n'%i)
        f.close()
        repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud')
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEquals('1999-12-01', self.listRecordsFrom)
        lines = open(self.stateDir + '/tud.stats').readlines()
        self.assertEquals(2, len(lines))
        self.assertEquals(('3', '3', '0', '116'), getHarvestedUploadedRecords(lines[1]))

    def testNotIncrementalInCaseOfError(self):
        self.mockRepository = MockOaiRequest('mocktud')
        f = open(self.stateDir + '/tud.stats', 'w')
        f.write('Started: 1998-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Done: 2004-12-31 16:39:15\n')
        f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error: XXX\n')
        f.close();
        repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud')
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEquals('1998-12-01', self.listRecordsFrom)

    def testOnlyErrorInLogFile(self):
        self.mockRepository = MockOaiRequest('mocktud')
        f = open(self.stateDir + '/tud.stats', 'w')
        f.write('Started: 1998-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error:\n')
        f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Error: XXX\n')
        f.close();
        repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud')
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEquals('aap', self.listRecordsFrom)

    def testResumptionToken(self):
        self.mockRepository = MockOaiRequest('mocktud')
        f = open(self.stateDir + '/tud.stats', 'w')
        f.write('Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 113/113/113, Done: 2004-12-31 16:39:15, ResumptionToken: ga+hier+verder\n')
        f.close();
        repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud')
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsToken = None
        h.harvest()
        self.assertEquals('ga+hier+verder', self.listRecordsToken)

    def testContinuousHarvesting(self):
        self.mockRepository = MockOaiRequest('mocktud')
        f = open(self.stateDir + '/tud.stats', 'w')
        f.write(' Started: 1999-12-01 16:37:41, Harvested/Uploaded/Total: 56/23/113, Done: 2004-12-31 16:39:15\n')
        f.close()
        JsonDict({'resumptionToken': None, 'from': "2015-01-01T00:12:13Z"}).dump(open(self.stateDir + '/tud.next', 'w'))
        repository = self.MockRepository3('tud' ,'http://repository.tudelft.nl/oai', None, 'tud', continuous=True)
        logger = self.createLogger()
        h = Harvester(repository)
        h.addObserver(self)
        h.addObserver(logger)
        h.addObserver(repository.createUploader(logger.eventLogger))
        h.addObserver(repository.mapping())
        self.listRecordsFrom = None
        h.harvest()
        self.assertEquals('2015-01-01T00:12:13Z', self.listRecordsFrom)

    def testHarvestSet(self):
        self.mockRepository = MockOaiRequest('mocktud')
        harvester = self.createHarvesterWithMockUploader('um', set='withfulltext:yes', mockRequest = self)
        harvester.harvest()
        self.assertEquals('withfulltext:yes', self.listRecordsSet)

    def mockHarvest(self, repository, logger, uploader):
        if not hasattr(self, 'mockHarvestArgs'):
            self.mockHarvestArgs=[]
        self.mockHarvestArgs.append({'name':repository.id,'baseurl':repository.baseurl,'set':repository.set,'repositoryGroupId':repository.repositoryGroupId})

    def testNoDateHarvester(self):
        "runs a test with xml containing no dates"
        harvester = self.createHarvesterWithMockUploader('tud')
        self.logger._state.token='NoDateToken'
        harvester.harvest()

    def testNothingInRepository(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        self.logger._state.token='EmptyListToken'
        harvester.harvest()
        lines = open(self.stateDir+'/tud.stats').readlines()
        self.assert_('Harvested/Uploaded/Deleted/Total: 0/0/0/0' in lines[0])

    def testUploadRecord(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.upload(oaiResponse(identifier='mockid'))
        self.assertEquals(['tud:mockid'], self.sendId)
        self.assertFalse(hasattr(self, 'delete_id'))

    def testSkippedRecord(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        def createUpload(repository, oaiResponse):
            upload = Upload(repository=repository, oaiResponse=oaiResponse)
            upload.id = "tud:mockid"
            upload.skip = True
            return upload
        self.mapper.createUpload = createUpload
        harvester.upload(oaiResponse(identifier='mockid'))
        self.assertEquals([], self.sendId)
        self.assertFalse(hasattr(self, 'delete_id'))

    def testDelete(self):
        harvester = self.createHarvesterWithMockUploader('tud')
        harvester.upload(oaiResponse(identifier='mockid', deleted=True))
        self.assertEquals([], self.sendId)
        self.assertEquals('tud:mockid', self.delete_id)

    def testDcIdentifierTake2(self):
        self.sendFulltexturl=None
        harvester = self.createHarvesterWithMockUploader('tud')
        self.logger.token='DcIdentifierHttp2'
        harvester.harvest()
        open(self.stateDir+'/tud.stats').readlines()

    def testHarvesterStopsIgnoringAfter100records(self):
        observer = CallTrace('observer')
        upload = Upload(repository=None, oaiResponse=oaiResponse(identifier='mockid'))
        upload.id = 'mockid'
        observer.returnValues['createUpload'] = upload
        observer.returnValues['totalInvalidIds'] = 101
        observer.exceptions['send'] =  InvalidDataException(upload.id, "message")
        repository=CallTrace("repository", returnValues={'maxIgnore': 100})
        harvester = Harvester(repository)
        harvester.addObserver(observer)
        self.assertRaises(TooMuchInvalidDataException, lambda: harvester.upload(oaiResponse(identifier='mockid')))
        self.assertEquals(['createUpload', "notifyHarvestedRecord", "send", "logInvalidData", "totalInvalidIds"], [m.name for m in observer.calledMethods])

    def testHarvesterIgnoringInvalidDataErrors(self):
        observer = CallTrace('observer')
        upload = Upload(repository=None, oaiResponse=oaiResponse(identifier='mockid'))
        upload.id = 'mockid'
        observer.returnValues['createUpload'] = upload
        observer.returnValues['totalInvalidIds'] = 0
        observer.exceptions['send'] =  InvalidDataException(upload.id, "message")
        repository=CallTrace("repository", returnValues={'maxIgnore': 100})
        harvester = Harvester(repository)
        harvester.addObserver(observer)
        harvester.upload(oaiResponse())
        self.assertEquals(['createUpload', "notifyHarvestedRecord", "send", 'logInvalidData', "totalInvalidIds", 'logIgnoredIdentifierWarning'], [m.name for m in observer.calledMethods])

    #self shunt:
    def send(self, upload):
        self.sendCalled+=1
        self.sendId.append(upload.id)
        self.sendParts.append(upload.parts)
        self.upload = upload
        if self.sendException:
            raise self.sendException

    def delete(self, anUpload):
        self.delete_id = anUpload.id

    def uploaderInfo(self):
        return 'The uploader is connected to /dev/null'

    def start(self):
        self.startCalled += 1

    def stop(self):
        self.stopCalled += 1

    def listRecordsButWaitLong(self, a, b, c, d):
        sleep(20)

    def MockRepository (self, id, set):
        return _MockRepository(id, 'http://mock.server', set, 'inst'+id,self)

    def MockRepository2 (self, nr):
        return _MockRepository('reponame'+nr, 'url'+nr, 'set'+nr, 'instname'+nr,self)

    def MockRepository3(self, id, baseurl, set, repositoryGroupId, continuous=False):
        return _MockRepository(id, baseurl, set, repositoryGroupId, self, continuous=continuous)

    def mockssetarget(self):
        return self

    def createUploader(self, logger):
        return self

    def listRecords(self, metadataPrefix = None, from_ = "aap", resumptionToken = 'mies', set = None):
        self.listRecordsFrom = from_
        self.listRecordsToken = resumptionToken
        self.listRecordsSet = set
        if metadataPrefix:
            if set:
                return self.mockRepository.listRecords(metadataPrefix = metadataPrefix, set = set)
            return self.mockRepository.listRecords(metadataPrefix = metadataPrefix)
        return self.mockRepository.listRecords(resumptionToken = resumptionToken)
示例#9
0
 def testHarvestSet(self):
     self.mockRepository = MockOaiRequest('mocktud')
     harvester = self.createHarvesterWithMockUploader('um', set='withfulltext:yes', mockRequest = self)
     harvester.harvest()
     self.assertEquals('withfulltext:yes', self.listRecordsSet)
示例#10
0
 def setUp(self):
     super(OaiRequestTest, self).setUp()
     self.request = MockOaiRequest('mocktud')