Пример #1
0
    def testNearRealtimeOai(self):
        self.run = True
        portNumber = randint(50000, 60000)
        suspendRegister = SuspendRegister()
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="")
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir,
                                                       'storage'))
        self._addOaiRecords(storageComponent, oaiJazz, 3)
        oaiPmhThread = Thread(
            None, lambda: self.startOaiPmh(portNumber, oaiJazz,
                                           storageComponent, suspendRegister))

        observer = CallTrace("observer",
                             ignoredAttributes=["observer_init"],
                             methods={'add': lambda **kwargs: (x for x in [])})
        harvestThread = Thread(
            None, lambda: self.startOaiHarvester(portNumber, observer))

        oaiPmhThread.start()
        harvestThread.start()

        try:
            requests = 3
            sleepWheel(1.0 + 1.0 * requests)

            self.assertEqual([
                'startOaiBatch', 'add', 'add', 'stopOaiBatch', 'startOaiBatch',
                'add', 'stopOaiBatch'
            ], [m.name for m in observer.calledMethods])
            ids = [
                xpath(m.kwargs['lxmlNode'],
                      '//oai:header/oai:identifier/text()')
                for m in observer.calledMethods if m.name == 'add'
            ]
            self.assertEqual([['id0'], ['id1'], ['id2']], ids)

            self.assertEqual(1, len(suspendRegister))
            observer.calledMethods.reset()

            requests += 1
            storageComponent.addData(identifier="id3",
                                     name="prefix",
                                     data=b"<a>a3</a>")
            oaiJazz.addOaiRecord(identifier="id3", metadataPrefixes=["prefix"])
            sleepWheel(1)

            self.assertEqual(0, len(suspendRegister))
            self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'],
                             [m.name for m in observer.calledMethods])
            kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
            self.assertTrue("id3" in kwarg, kwarg)
            sleepWheel(1.0)
            self.assertEqual(1, len(suspendRegister))
        finally:
            self.run = False
            oaiPmhThread.join()
            harvestThread.join()
            oaiJazz.close()
Пример #2
0
    def testRemoveSet(self):
        oaiJazz = OaiJazz(self.tempdir)
        oaiJazz.updateSet('a:b', 'set A/B')
        oaiJazz.updateSet('a:c', 'set A/C')
        oaiJazz.addOaiRecord('id:0', setSpecs=['a:b', 'a:c'], metadataFormats=[('prefix', '', '')])
        oaiJazz.addOaiRecord('id:1', setSpecs=['a:b'], metadataFormats=[('prefix', '', '')])
        oaiJazz.addOaiRecord('id:2', setSpecs=['a:c'], metadataFormats=[('prefix', '', '')])

        self.assertEquals([
                ('id:0', set([u'a', u'a:b', u'a:c']), False),
                ('id:1', set([u'a', u'a:b']), False),
                ('id:2', set([u'a', u'a:c']), False),
            ],
            [(r.identifier, r.sets, r.isDeleted) for r in oaiJazz.oaiSelect(prefix='prefix').records])
        self.assertEquals(set(['a:b', 'a', 'a:c']), oaiJazz.getAllSets())

        oaiJazz.close()

        removeSetsFromOai(self.tempdir, sets=['a:b'], prefix='prefix', batchSize=1)

        oaiJazz = OaiJazz(self.tempdir)
        self.assertEquals([
                ('id:2', set([u'a', u'a:c']), False),
                ('id:0', set([u'a', u'a:c']), False),
                ('id:1', set([]), False), # remove hierarchical sets! if possible
            ],
            [(r.identifier, r.sets, r.isDeleted) for r in oaiJazz.oaiSelect(prefix='prefix').records])
        self.assertEquals(set(['a', 'a:c']), oaiJazz.getAllSets())
Пример #3
0
    def testShouldRaiseExceptionOnSameRequestTwice(self):
        self.run = True
        portNumber = randint(50000, 60000)
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        suspendRegister = SuspendRegister()
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage'))
        clientId = str(uuid4())

        requests = []
        def doOaiListRecord(port):
            header, body = getRequest(port=portNumber, path="/", arguments={"verb": "ListRecords", "metadataPrefix": "prefix", "x-wait": "True"}, additionalHeaders={'X-Meresco-Oai-Client-Identifier': clientId}, parse=False)
            requests.append((header, body))

        oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister))
        harvestThread1 = Thread(None, lambda: doOaiListRecord(portNumber))
        harvestThread2 = Thread(None, lambda: doOaiListRecord(portNumber))

        with stderr_replaced():
            oaiPmhThread.start()
            harvestThread1.start()
            try:
                while len(suspendRegister) == 0:
                    sleep(0.01)
                harvest1Suspend = suspendRegister._suspendObject(clientId)
                self.assertTrue(harvest1Suspend is not None)
                harvestThread2.start()
                while harvest1Suspend == suspendRegister._suspendObject(clientId):
                    sleep(0.01)
                sleep(0.01)
                self.assertTrue(clientId in suspendRegister)
                self.assertTrue(harvest1Suspend != suspendRegister._suspendObject(clientId))

                self.assertEquals(1, len(requests))
                header, body = requests[0]
                self.assertTrue('500' in header, header)
                self.assertTrue(body.startswith('Aborting suspended request'), body)

                storageComponent.addData(identifier="id1", name="prefix", data="<a>a1</a>")
                oaiJazz.addOaiRecord(identifier="id1", sets=[], metadataFormats=[("prefix", "", "")])
                sleep(0.1)

            finally:
                self.run = False
                oaiPmhThread.join()
                harvestThread1.join()
                harvestThread2.join()
                oaiJazz.close()
Пример #4
0
    def testShouldNotStartToLoopLikeAMadMan(self):
        self.run = True
        portNumber = randint(50000, 60000)
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        suspendRegister = SuspendRegister(maximumSuspendedConnections=5)
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir,
                                                       'storage'))

        def doUrlOpenWithTimeout(port, basket):
            try:
                response = urlopen(
                    "http://localhost:%s/?verb=ListRecords&metadataPrefix=prefix&x-wait=True"
                    % port,
                    timeout=0.5)
                basket.append(response.getcode())
            except timeout as e:
                self.assertTrue('timed out' in str(e), str(e))

        oaiPmhThread = Thread(
            None, lambda: self.startOaiPmh(portNumber, oaiJazz,
                                           storageComponent, suspendRegister))
        threads = []
        todo = [doUrlOpenWithTimeout] * 7

        statusCodes = []
        oaiPmhThread.start()
        with stderr_replaced():
            while todo:
                func = todo.pop()
                harvestThread = Thread(None,
                                       lambda: func(portNumber, statusCodes))
                threads.append(harvestThread)
                harvestThread.start()

            try:
                while len(suspendRegister) == 0:
                    sleep(0.01)
            finally:
                for t in threads:
                    t.join()
                self.run = False
                oaiPmhThread.join()
                oaiJazz.close()

        self.assertEqual([204] * 2, statusCodes)
Пример #5
0
    def testNearRealtimeOai(self):
        self.run = True
        portNumber = randint(50000, 60000)
        suspendRegister = SuspendRegister()
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage'))
        self._addOaiRecords(storageComponent, oaiJazz, 3)
        oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister))

        observer = CallTrace("observer", ignoredAttributes=["observer_init"], methods={'add': lambda **kwargs: (x for x in [])})
        harvestThread = Thread(None, lambda: self.startOaiHarvester(portNumber, observer))

        oaiPmhThread.start()
        harvestThread.start()

        try:
            requests = 3
            sleepWheel(1.0 + 1.0 * requests)

            self.assertEquals(['startOaiBatch', 'add', 'add', 'stopOaiBatch', 'startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods])
            ids = [xpath(m.kwargs['lxmlNode'], '//oai:header/oai:identifier/text()') for m in observer.calledMethods if m.name == 'add']
            self.assertEquals([['id0'],['id1'],['id2']], ids)

            self.assertEquals(1, len(suspendRegister))
            observer.calledMethods.reset()

            requests += 1
            storageComponent.addData(identifier="id3", name="prefix", data="<a>a3</a>")
            oaiJazz.addOaiRecord(identifier="id3", sets=[], metadataFormats=[("prefix", "", "")])
            sleepWheel(1)

            self.assertEquals(0, len(suspendRegister))
            self.assertEquals(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods])
            kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
            self.assertTrue("id3" in kwarg, kwarg)
            sleepWheel(1.0)
            self.assertEquals(1, len(suspendRegister))
        finally:
            self.run = False
            oaiPmhThread.join()
            harvestThread.join()
            oaiJazz.close()
Пример #6
0
    def testRemoveSet(self):
        oaiJazz = OaiJazz(self.tempdir)
        oaiJazz.updateSet('a:b', 'set A/B')
        oaiJazz.updateSet('a:c', 'set A/C')
        oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="")
        oaiJazz.addOaiRecord('id:0',
                             setSpecs=['a:b', 'a:c'],
                             metadataPrefixes=['prefix'])
        oaiJazz.addOaiRecord('id:1',
                             setSpecs=['a:b'],
                             metadataPrefixes=['prefix'])
        oaiJazz.addOaiRecord('id:2',
                             setSpecs=['a:c'],
                             metadataPrefixes=['prefix'])

        self.assertEqual([
            ('id:0', set(['a', 'a:b', 'a:c']), False),
            ('id:1', set(['a', 'a:b']), False),
            ('id:2', set(['a', 'a:c']), False),
        ], [(r.identifier, r.sets, r.isDeleted)
            for r in oaiJazz.oaiSelect(prefix='prefix').records])
        self.assertEqual(set(['a:b', 'a', 'a:c']), oaiJazz.getAllSets())

        oaiJazz.close()

        removeSetsFromOai(self.tempdir,
                          sets=['a:b'],
                          prefix='prefix',
                          batchSize=1)

        oaiJazz = OaiJazz(self.tempdir)
        self.assertEqual([
            ('id:2', set(['a', 'a:c']), False),
            ('id:0', set(['a', 'a:c']), False),
            ('id:1', set([]), False),
        ], [(r.identifier, r.sets, r.isDeleted)
            for r in oaiJazz.oaiSelect(prefix='prefix').records])
        self.assertEqual(set(['a', 'a:c']), oaiJazz.getAllSets())
Пример #7
0
 def _convert(self, verbose=False):
     with open(join(self._dataDir, 'converting'), 'w') as f:
         f.write('CONVERTING')
     try:
         with open(join(self._dataDir, 'oai.version'), 'w') as v:
             v.write('12')
         o = OaiJazz(self._dataDir)
         try:
             continueAfter = 0
             while continueAfter is not None:
                 if continueAfter == 0:
                     continueAfter = None
                 result = o.oaiSelect(prefix=None, continueAfter=continueAfter)
                 continueAfter = result.continueAfter
                 for record in result.records:
                     if record.isDeleted and record.prefixes != record.deletedPrefixes:
                         if verbose:
                             print 'Converting', record.identifier
                         o.deleteOaiRecord(identifier=record.identifier)
         finally:
             o.close()
     finally:
         remove(join(self._dataDir, 'converting'))
Пример #8
0
    def testShouldRaiseExceptionOnSameRequestTwice(self):
        self.run = True
        portNumber = randint(50000, 60000)
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="")
        suspendRegister = SuspendRegister()
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir,
                                                       'storage'))
        clientId = str(uuid4())

        responses = []

        def doOaiListRecord(port):
            header, body = getRequest(port=portNumber,
                                      path="/",
                                      arguments={
                                          "verb": "ListRecords",
                                          "metadataPrefix": "prefix",
                                          "x-wait": "True"
                                      },
                                      additionalHeaders={
                                          'X-Meresco-Oai-Client-Identifier':
                                          clientId
                                      },
                                      parse=False)
            responses.append((header, body))

        oaiPmhThread = Thread(
            None, lambda: self.startOaiPmh(portNumber, oaiJazz,
                                           storageComponent, suspendRegister))
        harvestThread1 = Thread(None, lambda: doOaiListRecord(portNumber))
        harvestThread2 = Thread(None, lambda: doOaiListRecord(portNumber))

        with stderr_replaced():
            oaiPmhThread.start()
            harvestThread1.start()
            try:
                while len(suspendRegister) == 0:
                    sleep(0.01)
                harvest1Suspend = suspendRegister._suspendObject(clientId)
                self.assertTrue(harvest1Suspend is not None)
                harvestThread2.start()
                while harvest1Suspend == suspendRegister._suspendObject(
                        clientId):
                    sleep(0.01)
                sleep(0.01)
                self.assertTrue(clientId in suspendRegister)
                self.assertTrue(
                    harvest1Suspend != suspendRegister._suspendObject(clientId)
                )

                self.assertEqual(1, len(responses))
                statusAndHeader, body = responses[0]
                self.assertEqual("204", statusAndHeader['StatusCode'])
                self.assertTrue(body.startswith(b'Aborting suspended request'),
                                body)

                storageComponent.addData(identifier="id1",
                                         name="prefix",
                                         data=b"<a>a1</a>")
                oaiJazz.addOaiRecord(identifier="id1",
                                     metadataPrefixes=["prefix"])
                sleep(0.1)

            finally:
                self.run = False
                oaiPmhThread.join()
                harvestThread1.join()
                harvestThread2.join()
                oaiJazz.close()