Пример #1
0
    def testUpdateRecordWhileSendingData(self):
        batchSize = 3
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage'))
        self._addOaiRecords(storageComponent, oaiJazz, count=batchSize + 10)
        dna = be((Observable(),
            (OaiPmh(repositoryName='test', adminEmail='*****@*****.**', batchSize=batchSize),
                (storageComponent,),
                (oaiJazz,),
            )
        ))
        kwargs = dict(
            Method='GET',
            Headers={'Host': 'myserver'},
            port=1234,
            path='/oaipmh.pl',
            arguments=dict(verb=['ListIdentifiers'], metadataPrefix=['prefix']),
            )
        stream = compose(dna.all.handleRequest(**kwargs))
        buf = StringIO()
        for stuff in stream:
            buf.write(stuff)
            if 'identifier>id0<' in stuff:
                 oaiJazz.addOaiRecord(identifier="id1", sets=[], metadataFormats=[("prefix", "", "")])

        result = XML(buf.getvalue().split(CRLF*2)[-1])
        resumptionToken = xpathFirst(result, '/oai:OAI-PMH/oai:ListIdentifiers/oai:resumptionToken/text()')
        self.assertFalse(resumptionToken is None)
Пример #2
0
    def testUpdateRecordWhileSendingData(self):
        batchSize = 3
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="")
        storageComponent = MultiSequentialStorage(join(self.tempdir,
                                                       'storage'))
        self._addOaiRecords(storageComponent, oaiJazz, count=batchSize + 10)
        dna = be((Observable(), (
            OaiPmh(repositoryName='test',
                   adminEmail='*****@*****.**',
                   batchSize=batchSize),
            (storageComponent, ),
            (oaiJazz, ),
        )))
        kwargs = dict(
            Method='GET',
            Headers={'Host': 'myserver'},
            port=1234,
            path='/oaipmh.pl',
            arguments=dict(verb=['ListIdentifiers'],
                           metadataPrefix=['prefix']),
        )
        stream = compose(dna.all.handleRequest(**kwargs))
        buf = StringIO()
        for stuff in stream:
            buf.write(stuff)
            if 'identifier>id0<' in stuff:
                oaiJazz.addOaiRecord(identifier="id1",
                                     metadataPrefixes=["prefix"])

        result = XML(buf.getvalue().split(CRLF * 2)[-1].encode())
        resumptionToken = xpathFirst(
            result,
            '/oai:OAI-PMH/oai:ListIdentifiers/oai:resumptionToken/text()')
        self.assertFalse(resumptionToken is None)
Пример #3
0
    def testNearRealtimeOai(self):
        self.run = True
        portNumber = randint(50000, 60000)
        suspendRegister = SuspendRegister()
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="")
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir,
                                                       'storage'))
        self._addOaiRecords(storageComponent, oaiJazz, 3)
        oaiPmhThread = Thread(
            None, lambda: self.startOaiPmh(portNumber, oaiJazz,
                                           storageComponent, suspendRegister))

        observer = CallTrace("observer",
                             ignoredAttributes=["observer_init"],
                             methods={'add': lambda **kwargs: (x for x in [])})
        harvestThread = Thread(
            None, lambda: self.startOaiHarvester(portNumber, observer))

        oaiPmhThread.start()
        harvestThread.start()

        try:
            requests = 3
            sleepWheel(1.0 + 1.0 * requests)

            self.assertEqual([
                'startOaiBatch', 'add', 'add', 'stopOaiBatch', 'startOaiBatch',
                'add', 'stopOaiBatch'
            ], [m.name for m in observer.calledMethods])
            ids = [
                xpath(m.kwargs['lxmlNode'],
                      '//oai:header/oai:identifier/text()')
                for m in observer.calledMethods if m.name == 'add'
            ]
            self.assertEqual([['id0'], ['id1'], ['id2']], ids)

            self.assertEqual(1, len(suspendRegister))
            observer.calledMethods.reset()

            requests += 1
            storageComponent.addData(identifier="id3",
                                     name="prefix",
                                     data=b"<a>a3</a>")
            oaiJazz.addOaiRecord(identifier="id3", metadataPrefixes=["prefix"])
            sleepWheel(1)

            self.assertEqual(0, len(suspendRegister))
            self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'],
                             [m.name for m in observer.calledMethods])
            kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
            self.assertTrue("id3" in kwarg, kwarg)
            sleepWheel(1.0)
            self.assertEqual(1, len(suspendRegister))
        finally:
            self.run = False
            oaiPmhThread.join()
            harvestThread.join()
            oaiJazz.close()
Пример #4
0
    def testRemoveSet(self):
        oaiJazz = OaiJazz(self.tempdir)
        oaiJazz.updateSet('a:b', 'set A/B')
        oaiJazz.updateSet('a:c', 'set A/C')
        oaiJazz.addOaiRecord('id:0', setSpecs=['a:b', 'a:c'], metadataFormats=[('prefix', '', '')])
        oaiJazz.addOaiRecord('id:1', setSpecs=['a:b'], metadataFormats=[('prefix', '', '')])
        oaiJazz.addOaiRecord('id:2', setSpecs=['a:c'], metadataFormats=[('prefix', '', '')])

        self.assertEquals([
                ('id:0', set([u'a', u'a:b', u'a:c']), False),
                ('id:1', set([u'a', u'a:b']), False),
                ('id:2', set([u'a', u'a:c']), False),
            ],
            [(r.identifier, r.sets, r.isDeleted) for r in oaiJazz.oaiSelect(prefix='prefix').records])
        self.assertEquals(set(['a:b', 'a', 'a:c']), oaiJazz.getAllSets())

        oaiJazz.close()

        removeSetsFromOai(self.tempdir, sets=['a:b'], prefix='prefix', batchSize=1)

        oaiJazz = OaiJazz(self.tempdir)
        self.assertEquals([
                ('id:2', set([u'a', u'a:c']), False),
                ('id:0', set([u'a', u'a:c']), False),
                ('id:1', set([]), False), # remove hierarchical sets! if possible
            ],
            [(r.identifier, r.sets, r.isDeleted) for r in oaiJazz.oaiSelect(prefix='prefix').records])
        self.assertEquals(set(['a', 'a:c']), oaiJazz.getAllSets())
Пример #5
0
    def testNearRealtimeOaiSavesState(self):
        observer = CallTrace("observer",
                             ignoredAttributes=["observer_init"],
                             methods={'add': lambda **kwargs: (x for x in [])})
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="")
        suspendRegister = SuspendRegister()
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir,
                                                       'storage'))
        self._addOaiRecords(storageComponent, oaiJazz, 1)

        oaiPmhThread = None
        harvestThread = None

        def start():
            global oaiPmhThread, harvestThread
            self.run = True
            portNumber = randint(50000, 60000)
            oaiPmhThread = Thread(
                None, lambda: self.startOaiPmh(
                    portNumber, oaiJazz, storageComponent, suspendRegister))
            harvestThread = Thread(
                None, lambda: self.startOaiHarvester(portNumber, observer))
            oaiPmhThread.start()
            harvestThread.start()

        def stop():
            global oaiPmhThread, harvestThread
            self.run = False
            oaiPmhThread.join()
            oaiPmhThread = None
            harvestThread.join()
            harvestThread = None

        start()
        requests = 1
        sleepWheel(1.0 + 1.0 * requests)
        self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'],
                         [m.name for m in observer.calledMethods])
        kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
        self.assertTrue("id0" in kwarg, kwarg)
        stop()
        observer.calledMethods.reset()

        storageComponent.addData(identifier="id1",
                                 name="prefix",
                                 data=b"<a>a1</a>")
        oaiJazz.addOaiRecord(identifier="id1", metadataPrefixes=["prefix"])

        start()
        requests = 1
        sleepWheel(1.0 + 1.0 * requests)
        self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'],
                         [m.name for m in observer.calledMethods])
        kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
        self.assertFalse("id0" in kwarg, kwarg)
        self.assertTrue("id1" in kwarg, kwarg)
        stop()
Пример #6
0
 def testListRecordsWithMultiSequentialStorage(self):
     oaijazz = OaiJazz(join(self.tempdir, '1'))
     oailist = OaiList(batchSize=2, repository=OaiRepository())
     storage = MultiSequentialStorage(join(self.tempdir, "2"))
     oailist.addObserver(oaijazz)
     oairecord = OaiRecord()
     oailist.addObserver(storage)
     oailist.addObserver(oairecord)
     identifier = "id0"
     oaijazz.addOaiRecord(identifier, (), metadataFormats=[('oai_dc', '', '')])
     storage.addData(identifier=identifier, name="oai_dc", data="data01")
     response = oailist.listRecords(arguments=dict(
             verb=['ListRecords'], metadataPrefix=['oai_dc']), **self.httpkwargs)
     _, body = asString(response).split("\r\n\r\n")
     self.assertEquals("data01", xpath(parse(StringIO(body)), '//oai:metadata')[0].text)
Пример #7
0
 def testListRecordsWithALotOfDeletedRecords(self):
     oaijazz = OaiJazz(join(self.tempdir, '1'))
     oailist = OaiList(batchSize=2, repository=OaiRepository())
     storage = MultiSequentialStorage(join(self.tempdir, "2"))
     oailist.addObserver(oaijazz)
     oairecord = OaiRecord()
     oailist.addObserver(storage)
     oailist.addObserver(oairecord)
     for id in ['id0', 'id1', 'id1']:
         oaijazz.addOaiRecord(id, (), metadataFormats=[('oai_dc', '', '')])
         storage.addData(identifier=id, name="oai_dc", data="data_%s" % id)
     response = oailist.listRecords(arguments=dict(
             verb=['ListRecords'], metadataPrefix=['oai_dc']), **self.httpkwargs)
     _, body = asString(response).split("\r\n\r\n")
     self.assertEquals(["data_id0", "data_id1"], xpath(parse(StringIO(body)), '//oai:metadata/text()'))
Пример #8
0
    def testNearRealtimeOaiSavesState(self):
        observer = CallTrace("observer", ignoredAttributes=["observer_init"], methods={'add': lambda **kwargs: (x for x in [])})
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        suspendRegister = SuspendRegister()
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage'))
        self._addOaiRecords(storageComponent, oaiJazz, 1)

        oaiPmhThread = None
        harvestThread = None

        def start():
            global oaiPmhThread, harvestThread
            self.run = True
            portNumber = randint(50000, 60000)
            oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister))
            harvestThread = Thread(None, lambda: self.startOaiHarvester(portNumber, observer))
            oaiPmhThread.start()
            harvestThread.start()

        def stop():
            global oaiPmhThread, harvestThread
            self.run = False
            oaiPmhThread.join()
            oaiPmhThread = None
            harvestThread.join()
            harvestThread = None

        start()
        requests = 1
        sleepWheel(1.0 + 1.0 * requests)
        self.assertEquals(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods])
        kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
        self.assertTrue("id0" in kwarg, kwarg)
        stop()
        observer.calledMethods.reset()

        storageComponent.addData(identifier="id1", name="prefix", data="<a>a1</a>")
        oaiJazz.addOaiRecord(identifier="id1", sets=[], metadataFormats=[("prefix", "", "")])

        start()
        requests = 1
        sleepWheel(1.0 + 1.0 * requests)
        self.assertEquals(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods])
        kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
        self.assertFalse("id0" in kwarg, kwarg)
        self.assertTrue("id1" in kwarg, kwarg)
        stop()
Пример #9
0
    def testShouldRaiseExceptionOnSameRequestTwice(self):
        self.run = True
        portNumber = randint(50000, 60000)
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        suspendRegister = SuspendRegister()
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage'))
        clientId = str(uuid4())

        requests = []
        def doOaiListRecord(port):
            header, body = getRequest(port=portNumber, path="/", arguments={"verb": "ListRecords", "metadataPrefix": "prefix", "x-wait": "True"}, additionalHeaders={'X-Meresco-Oai-Client-Identifier': clientId}, parse=False)
            requests.append((header, body))

        oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister))
        harvestThread1 = Thread(None, lambda: doOaiListRecord(portNumber))
        harvestThread2 = Thread(None, lambda: doOaiListRecord(portNumber))

        with stderr_replaced():
            oaiPmhThread.start()
            harvestThread1.start()
            try:
                while len(suspendRegister) == 0:
                    sleep(0.01)
                harvest1Suspend = suspendRegister._suspendObject(clientId)
                self.assertTrue(harvest1Suspend is not None)
                harvestThread2.start()
                while harvest1Suspend == suspendRegister._suspendObject(clientId):
                    sleep(0.01)
                sleep(0.01)
                self.assertTrue(clientId in suspendRegister)
                self.assertTrue(harvest1Suspend != suspendRegister._suspendObject(clientId))

                self.assertEquals(1, len(requests))
                header, body = requests[0]
                self.assertTrue('500' in header, header)
                self.assertTrue(body.startswith('Aborting suspended request'), body)

                storageComponent.addData(identifier="id1", name="prefix", data="<a>a1</a>")
                oaiJazz.addOaiRecord(identifier="id1", sets=[], metadataFormats=[("prefix", "", "")])
                sleep(0.1)

            finally:
                self.run = False
                oaiPmhThread.join()
                harvestThread1.join()
                harvestThread2.join()
                oaiJazz.close()
Пример #10
0
    def testGetRecordDeletedInRequestedPrefix(self):
        oaijazz = OaiJazz(self.tempdir + '/jazz')
        storage = MultiSequentialStorage(self.tempdir + "/seq-store")
        oairecord = OaiRecord()

        class MyStorage(object):
            def getData(self, identifier, name):
                return 'data'

        oaigetrecord = be((OaiGetRecord(repository=OaiRepository()),
                           (oaijazz, ), (oairecord, (MyStorage(), ))))
        oaijazz.addOaiRecord(identifier='id:0', metadataPrefixes=['A', 'B'])
        oaijazz.deleteOaiRecordInPrefixes(identifier='id:0',
                                          metadataPrefixes=['A'])
        response = oaigetrecord.getRecord(arguments=dict(
            verb=['GetRecord'],
            metadataPrefix=['A'],
            identifier=['id:0'],
        ),
                                          **self.httpkwargs)
        _, body = asString(response).split("\r\n\r\n")
        self.assertEqual(
            'deleted',
            xpathFirst(
                XML(body.encode()),
                '/oai:OAI-PMH/oai:GetRecord/oai:record/oai:header/@status'),
            body)

        response = oaigetrecord.getRecord(arguments=dict(
            verb=['GetRecord'],
            metadataPrefix=['B'],
            identifier=['id:0'],
        ),
                                          **self.httpkwargs)
        _, body = asString(response).split("\r\n\r\n")
        self.assertEqual(
            "data", xpathFirst(XML(body.encode()), '//oai:metadata/text()'))

        response = oaigetrecord.getRecord(arguments=dict(
            verb=['GetRecord'],
            metadataPrefix=['C'],
            identifier=['id:0'],
        ),
                                          **self.httpkwargs)
        _, body = asString(response).split("\r\n\r\n")
        self.assertEqual(
            'cannotDisseminateFormat',
            xpathFirst(XML(body.encode()), '/oai:OAI-PMH/oai:error/@code'))
Пример #11
0
def main(reactor, port, directory):
    dumpdir = join(directory, 'dump')
    isdir(dumpdir) or makedirs(dumpdir)
    dump = Dump(dumpdir)
    oaiStorage = MultiSequentialStorage(join(directory, 'storage'))
    oaiJazz = OaiJazz(join(directory, 'oai'))
    server = be(
        (Observable(),
            (ObservableHttpServer(reactor, port),
                (PathFilter("/dump"),
                    (dump,)
                ),
                (PathFilter("/control"),
                    (Control(),
                        (dump,),
                        (Log(),),
                    )
                ),
                (PathFilter('/oai'),
                    (Log(),
                        (OaiPmh(repositoryName="Oai Test Server", adminEmail="*****@*****.**", batchSize=10),
                            (oaiStorage,),
                            (oaiJazz,),
                        )
                    )
                ),
                (PathFilter("/log"),
                    (RetrieveLog(),
                        (Log(),)
                    )
                ),
                (PathFilter("/ready"),
                    (StringServer('yes', ContentTypePlainText),)
                )
            )
        )
    )
    list(compose(server.once.observer_init()))
    for i in range(1,16):
        if i == 2:
            identifier = 'oai:record:02/&gkn'
        else:
            identifier = 'oai:record:%02d' % i
        oaiStorage.addData(identifier=identifier, name='oai_dc', data='''<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd"><dc:identifier>%s</dc:identifier></oai_dc:dc>''' % escapeXml(identifier))
        oaiJazz.addOaiRecord(identifier=identifier, metadataFormats=[('oai_dc', 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', 'http://www.openarchives.org/OAI/2.0/oai_dc/')])
        if i in [3,6]:
            list(compose(oaiJazz.delete(identifier=identifier)))
Пример #12
0
def main(reactor, port, directory):
    dumpdir = join(directory, 'dump')
    isdir(dumpdir) or makedirs(dumpdir)
    dump = Dump(dumpdir)
    oaiStorage = MultiSequentialStorage(join(directory, 'storage'))
    oaiJazz = OaiJazz(join(directory, 'oai'))
    server = be(
        (Observable(),
         (ObservableHttpServer(reactor, port), (PathFilter("/dump"), (dump, )),
          (PathFilter("/control"), (
              Control(),
              (dump, ),
              (Log(), ),
          )), (PathFilter('/oai'), (Log(), (
              OaiPmh(repositoryName="Oai Test Server",
                     adminEmail="*****@*****.**",
                     batchSize=10),
              (oaiStorage, ),
              (oaiJazz, ),
          ))), (PathFilter('/badoai'), (Log(), (BadOai(), ))),
          (PathFilter("/log"), (RetrieveLog(), (Log(), ))),
          (PathFilter("/ready"), (StringServer('yes',
                                               ContentTypePlainText), )))))
    list(compose(server.once.observer_init()))
    oaiJazz.updateMetadataFormat(
        prefix="oai_dc",
        schema="http://www.openarchives.org/OAI/2.0/oai_dc.xsd",
        namespace="http://www.openarchives.org/OAI/2.0/oai_dc/")
    for i in range(1, 16):
        if i == 2:
            identifier = 'oai:record:02/&gkn'
        else:
            identifier = 'oai:record:%02d' % i
        oaiStorage.addData(
            identifier=identifier,
            name='oai_dc',
            data=bytes(
                '''<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd"><dc:identifier>%s</dc:identifier><dc:title>Title is √</dc:title></oai_dc:dc>'''
                % escapeXml(identifier),
                encoding='utf-8'))
        oaiJazz.addOaiRecord(identifier=identifier,
                             metadataPrefixes=['oai_dc'])
        if i in [3, 6]:
            list(compose(oaiJazz.delete(identifier=identifier)))
Пример #13
0
    def testNearRealtimeOai(self):
        self.run = True
        portNumber = randint(50000, 60000)
        suspendRegister = SuspendRegister()
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage'))
        self._addOaiRecords(storageComponent, oaiJazz, 3)
        oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister))

        observer = CallTrace("observer", ignoredAttributes=["observer_init"], methods={'add': lambda **kwargs: (x for x in [])})
        harvestThread = Thread(None, lambda: self.startOaiHarvester(portNumber, observer))

        oaiPmhThread.start()
        harvestThread.start()

        try:
            requests = 3
            sleepWheel(1.0 + 1.0 * requests)

            self.assertEquals(['startOaiBatch', 'add', 'add', 'stopOaiBatch', 'startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods])
            ids = [xpath(m.kwargs['lxmlNode'], '//oai:header/oai:identifier/text()') for m in observer.calledMethods if m.name == 'add']
            self.assertEquals([['id0'],['id1'],['id2']], ids)

            self.assertEquals(1, len(suspendRegister))
            observer.calledMethods.reset()

            requests += 1
            storageComponent.addData(identifier="id3", name="prefix", data="<a>a3</a>")
            oaiJazz.addOaiRecord(identifier="id3", sets=[], metadataFormats=[("prefix", "", "")])
            sleepWheel(1)

            self.assertEquals(0, len(suspendRegister))
            self.assertEquals(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods])
            kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
            self.assertTrue("id3" in kwarg, kwarg)
            sleepWheel(1.0)
            self.assertEquals(1, len(suspendRegister))
        finally:
            self.run = False
            oaiPmhThread.join()
            harvestThread.join()
            oaiJazz.close()
Пример #14
0
    def testGetRecordWithMultiSequentialStorage(self):
        oaijazz = OaiJazz(self.tempdir + "/jazz")
        storage = MultiSequentialStorage(self.tempdir + "/seq-store")
        oairecord = OaiRecord()
        oaigetrecord = be(
            (
                OaiGetRecord(repository=OaiRepository()),
                (oaijazz,),
                (oairecord, (RetrieveToGetDataAdapter(), (storage,))),
            )
        )

        oaijazz.addOaiRecord(identifier="id0", sets=(), metadataFormats=[("oai_dc", "", "")])
        storage.addData(identifier="id0", name="oai_dc", data="data01")
        response = oaigetrecord.getRecord(
            arguments=dict(verb=["GetRecord"], metadataPrefix=["oai_dc"], identifier=["id0"]), **self.httpkwargs
        )
        _, body = asString(response).split("\r\n\r\n")
        self.assertEquals("data01", xpath(parse(StringIO(body)), "//oai:metadata")[0].text)
Пример #15
0
    def testGetRecordDeletedInRequestedPrefix(self):
        oaijazz = OaiJazz(self.tempdir + '/jazz')
        storage = MultiSequentialStorage(self.tempdir + "/seq-store")
        oairecord = OaiRecord()
        class MyStorage(object):
            def getData(self, identifier, name):
                return 'data'
        oaigetrecord = be((OaiGetRecord(repository=OaiRepository()),
            (oaijazz,),
            (oairecord,
                (MyStorage(),)
            )
        ))
        oaijazz.addOaiRecord(identifier='id:0', metadataPrefixes=['A', 'B'])
        oaijazz.deleteOaiRecordInPrefixes(identifier='id:0', metadataPrefixes=['A'])
        response = oaigetrecord.getRecord(arguments=dict(
                verb=['GetRecord'],
                metadataPrefix=['A'],
                identifier=['id:0'],
            ),
            **self.httpkwargs)
        _, body = asString(response).split("\r\n\r\n")
        self.assertEqual('deleted', xpathFirst(XML(body), '/oai:OAI-PMH/oai:GetRecord/oai:record/oai:header/@status'), body)

        response = oaigetrecord.getRecord(arguments=dict(
                verb=['GetRecord'],
                metadataPrefix=['B'],
                identifier=['id:0'],
            ),
            **self.httpkwargs)
        _, body = asString(response).split("\r\n\r\n")
        self.assertEqual("data", xpathFirst(XML(body), '//oai:metadata/text()'))

        response = oaigetrecord.getRecord(arguments=dict(
                verb=['GetRecord'],
                metadataPrefix=['C'],
                identifier=['id:0'],
            ),
            **self.httpkwargs)
        _, body = asString(response).split("\r\n\r\n")
        self.assertEqual('cannotDisseminateFormat', xpathFirst(XML(body), '/oai:OAI-PMH/oai:error/@code'))
Пример #16
0
    def testGetRecordWithMultiSequentialStorage(self):
        oaijazz = OaiJazz(self.tempdir + '/jazz')
        oaijazz.updateMetadataFormat(prefix="oai_dc", schema="", namespace="")
        storage = MultiSequentialStorage(self.tempdir + "/seq-store")
        oairecord = OaiRecord()
        oaigetrecord = be(
            (OaiGetRecord(repository=OaiRepository()), (oaijazz, ),
             (oairecord, (RetrieveToGetDataAdapter(), (storage, )))))

        oaijazz.addOaiRecord(identifier="id0", metadataPrefixes=['oai_dc'])
        storage.addData(identifier="id0", name="oai_dc", data=b"data01")
        response = oaigetrecord.getRecord(arguments=dict(
            verb=['GetRecord'],
            metadataPrefix=['oai_dc'],
            identifier=['id0'],
        ),
                                          **self.httpkwargs)
        _, body = asString(response).split("\r\n\r\n")
        self.assertEqual(
            "data01",
            xpath(parse(BytesIO(body.encode())), '//oai:metadata')[0].text)
Пример #17
0
    def testRemoveSet(self):
        oaiJazz = OaiJazz(self.tempdir)
        oaiJazz.updateSet('a:b', 'set A/B')
        oaiJazz.updateSet('a:c', 'set A/C')
        oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="")
        oaiJazz.addOaiRecord('id:0',
                             setSpecs=['a:b', 'a:c'],
                             metadataPrefixes=['prefix'])
        oaiJazz.addOaiRecord('id:1',
                             setSpecs=['a:b'],
                             metadataPrefixes=['prefix'])
        oaiJazz.addOaiRecord('id:2',
                             setSpecs=['a:c'],
                             metadataPrefixes=['prefix'])

        self.assertEqual([
            ('id:0', set(['a', 'a:b', 'a:c']), False),
            ('id:1', set(['a', 'a:b']), False),
            ('id:2', set(['a', 'a:c']), False),
        ], [(r.identifier, r.sets, r.isDeleted)
            for r in oaiJazz.oaiSelect(prefix='prefix').records])
        self.assertEqual(set(['a:b', 'a', 'a:c']), oaiJazz.getAllSets())

        oaiJazz.close()

        removeSetsFromOai(self.tempdir,
                          sets=['a:b'],
                          prefix='prefix',
                          batchSize=1)

        oaiJazz = OaiJazz(self.tempdir)
        self.assertEqual([
            ('id:2', set(['a', 'a:c']), False),
            ('id:0', set(['a', 'a:c']), False),
            ('id:1', set([]), False),
        ], [(r.identifier, r.sets, r.isDeleted)
            for r in oaiJazz.oaiSelect(prefix='prefix').records])
        self.assertEqual(set(['a', 'a:c']), oaiJazz.getAllSets())
Пример #18
0
class OaiListTest(SeecrTestCase):
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.oaiJazz = OaiJazz(self.tempdir)
        self.oaiList = OaiList(batchSize=2, repository=OaiRepository())
        self.observer = CallTrace('observer', emptyGeneratorMethods=['suspendBeforeSelect'])
        self.observer.methods['suspendAfterNoResult'] = lambda **kwargs: (s for s in ['SUSPEND'])
        self.observer.methods['oaiWatermark'] = lambda o=None: (x for x in ["Crafted By Seecr"])
        def oaiRecord(record, metadataPrefix, fetchedRecords=None):
            yield '<mock:record xmlns:mock="uri:mock">%s/%s</mock:record>' % (escapeXml(record.identifier), escapeXml(metadataPrefix))
        self.observer.methods['oaiRecord'] = oaiRecord
        self.observer.methods['oaiRecordHeader'] = oaiRecord
        self.observer.methods['getAllPrefixes'] = self.oaiJazz.getAllPrefixes
        self.observer.methods['oaiSelect'] = self.oaiJazz.oaiSelect
        self.getMultipleDataIdentifiers = []
        def getMultipleData(**kwargs):
            self.getMultipleDataIdentifiers.append(list(kwargs.get('identifiers')))
            raise NoneOfTheObserversRespond('No one', 0)
        self.observer.methods['getMultipleData'] = getMultipleData
        self.oaiList.addObserver(self.observer)
        self.clientId = str(uuid4())
        self.httpkwargs = {
            'path': '/path/to/oai',
            'Headers': {'Host':'server', 'X-Meresco-Oai-Client-Identifier': self.clientId},
            'port': 9000,
        }

    def testListRecords(self):
        self._addRecords(['id:0&0', 'id:1&1'])

        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))

        self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record')))
        self.assertEquals(0, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken')))
        self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord', 'oaiRecord'], [m.name for m in self.observer.calledMethods])
        selectMethod = self.observer.calledMethods[1]
        self.assertEquals(dict(continueAfter='0', oaiUntil=None, prefix='oai_dc', oaiFrom=None, sets=[], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs)
        recordMethods = self.observer.calledMethods[4:]
        self.assertEquals({'recordId':'id:0&0', 'metadataPrefix':'oai_dc'}, _m(recordMethods[0].kwargs))
        self.assertEquals({'recordId':'id:1&1', 'metadataPrefix':'oai_dc'}, _m(recordMethods[1].kwargs))
        self.assertEquals([['id:0&0', 'id:1&1']], self.getMultipleDataIdentifiers)

    def testListRecordsUsesFetchedRecords(self):
        self._addRecords(['id:0&0', 'id:1'])
        self.observer.methods['getMultipleData'] = lambda name, identifiers, ignoreMissing=False: [('id:0&0', 'data1'), ('id:1', 'data2'), ('id:2', 'data3')]
        consume(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc']}, **self.httpkwargs))
        self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord', 'oaiRecord'], self.observer.calledMethodNames())
        self.assertEquals({'id:0&0': 'data1', 'id:1': 'data2', 'id:2': 'data3'}, self.observer.calledMethods[4].kwargs['fetchedRecords'])
        self.assertEquals({'id:0&0': 'data1', 'id:1': 'data2', 'id:2': 'data3'}, self.observer.calledMethods[4].kwargs['fetchedRecords'])

    def testListRecordsWithDeletes(self):
        self._addRecords(['id:0&0', 'id:1&1'])
        consume(self.oaiJazz.delete(identifier='id:1&1'))
        consume(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc']}, **self.httpkwargs))
        self.assertEquals([['id:0&0']], self.getMultipleDataIdentifiers)

    def testListRecordsWithMultiSequentialStorage(self):
        oaijazz = OaiJazz(join(self.tempdir, '1'))
        oailist = OaiList(batchSize=2, repository=OaiRepository())
        storage = MultiSequentialStorage(join(self.tempdir, "2"))
        oailist.addObserver(oaijazz)
        oairecord = OaiRecord()
        oailist.addObserver(storage)
        oailist.addObserver(oairecord)
        identifier = "id0"
        oaijazz.addOaiRecord(identifier, (), metadataFormats=[('oai_dc', '', '')])
        storage.addData(identifier=identifier, name="oai_dc", data="data01")
        response = oailist.listRecords(arguments=dict(
                verb=['ListRecords'], metadataPrefix=['oai_dc']), **self.httpkwargs)
        _, body = asString(response).split("\r\n\r\n")
        self.assertEquals("data01", xpath(parse(StringIO(body)), '//oai:metadata')[0].text)

    def testListRecordsWithALotOfDeletedRecords(self):
        oaijazz = OaiJazz(join(self.tempdir, '1'))
        oailist = OaiList(batchSize=2, repository=OaiRepository())
        storage = MultiSequentialStorage(join(self.tempdir, "2"))
        oailist.addObserver(oaijazz)
        oairecord = OaiRecord()
        oailist.addObserver(storage)
        oailist.addObserver(oairecord)
        for id in ['id0', 'id1', 'id1']:
            oaijazz.addOaiRecord(id, (), metadataFormats=[('oai_dc', '', '')])
            storage.addData(identifier=id, name="oai_dc", data="data_%s" % id)
        response = oailist.listRecords(arguments=dict(
                verb=['ListRecords'], metadataPrefix=['oai_dc']), **self.httpkwargs)
        _, body = asString(response).split("\r\n\r\n")
        self.assertEquals(["data_id0", "data_id1"], xpath(parse(StringIO(body)), '//oai:metadata/text()'))

    def testListIdentifiers(self):
        self._addRecords(['id:0&0', 'id:1&1'])

        header, body = ''.join(compose(self.oaiList.listIdentifiers(arguments={'verb':['ListIdentifiers'], 'metadataPrefix': ['oai_dc']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))

        self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListIdentifiers/mock:record')))
        self.assertEquals(0, len(xpath(oai, '/oai:OAI-PMH/oai:ListIdentifiers/oai:resumptionToken')))
        self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecordHeader', 'oaiRecordHeader'], [m.name for m in self.observer.calledMethods])
        selectMethod = self.observer.calledMethods[1]
        self.assertEquals(dict(continueAfter='0', oaiUntil=None, prefix='oai_dc', oaiFrom=None, sets=[], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs)
        headerMethods = self.observer.calledMethods[4:]
        self.assertEquals({'recordId':'id:0&0', 'metadataPrefix':'oai_dc'}, _m(headerMethods[0].kwargs))
        self.assertEquals({'recordId':'id:1&1', 'metadataPrefix':'oai_dc'}, _m(headerMethods[1].kwargs))

    def testListRecordsProducesResumptionToken(self):
        self._addRecords(['id:0&0', 'id:1&1', 'id:2&2'], sets=[('set0', 'setName')])

        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'from': ['2000-01-01T00:00:00Z'], 'until': ['4012-01-01T00:00:00Z'], 'set': ['set0']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))

        self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record')))
        resumptionToken = ResumptionToken.fromString(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()')[0])
        self.assertEquals('4012-01-01T00:00:00Z', resumptionToken.until)
        self.assertEquals('2000-01-01T00:00:00Z', resumptionToken.from_)
        self.assertEquals('set0', resumptionToken.set_)
        self.assertEquals('oai_dc', resumptionToken.metadataPrefix)
        continueAfter = self.oaiJazz.getRecord('id:1&1').stamp
        self.assertEquals(str(continueAfter), resumptionToken.continueAfter)
        self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord', 'oaiRecord'], [m.name for m in self.observer.calledMethods])
        selectMethod = self.observer.calledMethods[1]
        self.assertEquals(dict(continueAfter='0', oaiUntil='4012-01-01T00:00:00Z', prefix='oai_dc', oaiFrom='2000-01-01T00:00:00Z', sets=['set0'], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs)
        recordMethods = self.observer.calledMethods[4:]
        self.assertEquals({'recordId':'id:0&0', 'metadataPrefix':'oai_dc'}, _m(recordMethods[0].kwargs))
        self.assertEquals({'recordId':'id:1&1', 'metadataPrefix':'oai_dc'}, _m(recordMethods[1].kwargs))

    def testListRecordsUsesGivenResumptionToken(self):
        self._addRecords(['id:2&2'], sets=[('set0', 'setName')])

        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'resumptionToken':['u4012-01-01T00:00:00Z|c1000|moai_dc|sset0|f2000-01-01T00:00:00Z']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))

        self.assertEquals(1, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record')))
        self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord'], [m.name for m in self.observer.calledMethods])
        selectMethod = self.observer.calledMethods[1]
        self.assertEquals(dict(continueAfter='1000', oaiUntil='4012-01-01T00:00:00Z', prefix='oai_dc', oaiFrom='2000-01-01T00:00:00Z', sets=['set0'], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs)
        recordMethods = self.observer.calledMethods[4:]
        self.assertEquals({'recordId':'id:2&2', 'metadataPrefix':'oai_dc'}, _m(recordMethods[0].kwargs))

    def testListRecordsEmptyFinalResumptionToken(self):
        self._addRecords(['id:2&2', 'id:3&3'])
        resumptionToken = str(ResumptionToken(metadataPrefix='oai_dc', continueAfter=0))
        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'resumptionToken':[resumptionToken]}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))

        self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record')))
        resumptionTokens = xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken')
        self.assertEquals(1, len(resumptionTokens))
        self.assertEquals(None, resumptionTokens[0].text)
        self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord', 'oaiRecord'], [m.name for m in self.observer.calledMethods])
        selectMethod = self.observer.calledMethods[1]
        self.assertEquals(dict(continueAfter='0', oaiUntil='', prefix='oai_dc', oaiFrom='', sets=[], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs)
        recordMethods = self.observer.calledMethods[-2:]
        self.assertEquals({'recordId':'id:2&2', 'metadataPrefix':'oai_dc'}, _m(recordMethods[0].kwargs))
        self.assertEquals({'recordId':'id:3&3', 'metadataPrefix':'oai_dc'}, _m(recordMethods[1].kwargs))

    def testNoRecordsMatch(self):
        self._addRecords(['id:0'])
        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix':['oai_dc'], 'set': ['does_not_exist']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))

        self.assertEquals(['noRecordsMatch'], xpath(oai, "/oai:OAI-PMH/oai:error/@code"))

    def testListRecordsUsingXWait(self):
        self.oaiList = OaiList(batchSize=2, supportXWait=True, repository=OaiRepository())
        self.oaiList.addObserver(self.observer)

        result = compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-wait': ['True']}, **self.httpkwargs))
        result.next()
        self.assertEquals(['suspendBeforeSelect', 'getAllPrefixes', 'suspendAfterNoResult'], [m.name for m in self.observer.calledMethods])
        self.assertEquals({"clientIdentifier": self.clientId, "prefix": 'oai_dc', 'sets': [], 'oaiFrom': None,  'oaiUntil':None, 'shouldCountHits': False, 'x-wait':True, 'continueAfter': '0', 'partition': None}, self.observer.calledMethods[-1].kwargs)
        self._addRecords(['id:1&1'])
        self.observer.calledMethods.reset()

        header, body = ''.join(compose(result)).split(CRLF*2)
        oai = parse(StringIO(body))

        self.assertEquals(1, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record')))
        self.assertEquals(1, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()')))
        self.assertEquals(['suspendBeforeSelect', 'getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord'], [m.name for m in self.observer.calledMethods])
        selectMethod = self.observer.calledMethods[2]
        self.assertEquals(dict(continueAfter='0', oaiUntil=None, prefix='oai_dc', oaiFrom=None, sets=[], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs)
        recordMethods = self.observer.calledMethods[-1:]
        self.assertEquals({'recordId':'id:1&1', 'metadataPrefix':'oai_dc'}, _m(recordMethods[0].kwargs))

    def testListRecordsWithoutClientIdentifierGeneratesOne(self):
        self.oaiList = OaiList(batchSize=2, supportXWait=True, repository=OaiRepository())
        self.oaiList.addObserver(self.observer)

        self.httpkwargs = {
            'path': '/path/to/oai',
            'Headers':{'Host':'server'},
            'port':9000,
            'Client': ('127.0.0.1', 1234)
        }
        with stderr_replaced() as s:
            result = compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-wait': ['True']}, **self.httpkwargs))
            result.next()
        self.assertEquals(['suspendBeforeSelect', 'getAllPrefixes', 'suspendAfterNoResult'], [m.name for m in self.observer.calledMethods])
        self.assertTrue('clientIdentifier' in self.observer.calledMethods[-1].kwargs)
        self.assertEquals(len(str(uuid4())), len(self.observer.calledMethods[-1].kwargs['clientIdentifier']))
        self.assertEquals("X-Meresco-Oai-Client-Identifier not found in HTTP Headers. Generated a uuid for OAI client from 127.0.0.1\n", s.getvalue())

    def testNotSupportedXWait(self):
        self._addRecords(['id:1', 'id:2'])
        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-wait': ['True']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))

        self.assertEquals(['badArgument'], xpath(oai, "/oai:OAI-PMH/oai:error/@code"))

    def testNotSupportedValueXWait(self):
        self._addRecords(['id:1', 'id:2'])
        self.oaiList = OaiList(batchSize=2, supportXWait=True, repository=OaiRepository())
        self.oaiList.addObserver(self.observer)
        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-wait': ['YesPlease']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))

        self.assertEquals(['badArgument'], xpath(oai, "/oai:OAI-PMH/oai:error/@code"))
        self.assertTrue("only supports 'True' as valid value" in xpath(oai, "/oai:OAI-PMH/oai:error/text()")[0])

    def testListRecordsWithPartition(self):
        self._addRecords(['id:1', 'id:2'])
        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-partition': ['2/2']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))
        self.assertEquals(['id:1/oai_dc'], xpath(oai, '//mock:record/text()'))
        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-partition': ['1/2']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))
        self.assertEquals(['id:2/oai_dc'], xpath(oai, '//mock:record/text()'))

    @stderr_replaced
    def testListRecordsWithOldPartitionParameter(self):
        self._addRecords(['id:1', 'id:2'])
        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-parthash': ['2/2']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))
        self.assertEquals(['id:1/oai_dc'], xpath(oai, '//mock:record/text()'))

    def testListRecordsProducesResumptionTokenWithPartition(self):
        self._addRecords(['id:%s' % i for i in xrange(10)])
        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-partition':['1/2']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))
        self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record')))
        resumptionToken = ResumptionToken.fromString(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()')[0])
        self.assertEquals(['id:2/oai_dc', 'id:3/oai_dc'], xpath(oai, '//mock:record/text()'))
        self.assertEquals('1/2', str(resumptionToken.partition))
        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'resumptionToken': [str(resumptionToken)]}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))
        self.assertEquals(['id:5/oai_dc', 'id:6/oai_dc'], xpath(oai, '//mock:record/text()'))


    def testFromAndUntil(self):
        self._addRecords(['id:3&3'])
        def selectArguments(oaiFrom, oaiUntil):
            self.observer.calledMethods.reset()
            arguments = {'verb':['ListRecords'], 'metadataPrefix': ['oai_dc']}
            if oaiFrom:
                arguments['from'] = [oaiFrom]
            if oaiUntil:
                arguments['until'] = [oaiUntil]
            header, body = ''.join(compose(self.oaiList.listRecords(arguments=arguments, **self.httpkwargs))).split(CRLF*2)
            oai = parse(StringIO(body))
            self.assertEquals(['getAllPrefixes', 'oaiSelect'], [m.name for m in self.observer.calledMethods][:2])
            selectKwargs = self.observer.calledMethods[1].kwargs
            return selectKwargs['oaiFrom'], selectKwargs['oaiUntil']

        self.assertEquals((None, None), selectArguments(None, None))
        self.assertEquals(('2000-01-01T00:00:00Z', '2000-01-01T00:00:00Z'), selectArguments('2000-01-01T00:00:00Z', '2000-01-01T00:00:00Z'))
        self.assertEquals(('2000-01-01T00:00:00Z', '2000-01-01T23:59:59Z'), selectArguments('2000-01-01', '2000-01-01'))
        self.assertEquals((None, '2000-01-01T00:00:00Z'), selectArguments(None, '2000-01-01T00:00:00Z'))
        self.assertEquals(('2000-01-01T00:00:00Z', None), selectArguments('2000-01-01T00:00:00Z', None))

    def testFromAndUntilErrors(self):
        def getError(oaiFrom, oaiUntil):
            self._addRecords(['id:3&3'])
            self.observer.calledMethods.reset()
            arguments = {'verb':['ListRecords'], 'metadataPrefix': ['oai_dc']}
            if oaiFrom:
                arguments['from'] = [oaiFrom]
            if oaiUntil:
                arguments['until'] = [oaiUntil]
            header, body = ''.join(compose(self.oaiList.listRecords(arguments=arguments, **self.httpkwargs))).split(CRLF*2)
            oai = parse(StringIO(body))
            self.assertEquals(1, len(xpath(oai, '//oai:error')), body)
            error = xpath(oai, '//oai:error')[0]
            return error.attrib['code']

        self.assertEquals('badArgument', getError('thisIsNotEvenADateStamp', 'thisIsNotEvenADateStamp'))
        self.assertEquals('badArgument', getError('2000-01-01T00:00:00Z', '2000-01-01'))
        self.assertEquals('badArgument', getError('2000-01-01T00:00:00Z', '1999-01-01T00:00:00Z'))

    def testConcurrentListRequestsDontInterfere(self):
        self.oaiList = OaiList(batchSize=2, supportXWait=True, repository=OaiRepository())
        self.oaiList.addObserver(self.observer)

        # ListRecords request
        resultListRecords = compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-wait': ['True']}, **self.httpkwargs))
        resultListRecords.next()

        # ListIdentifiers request
        resultListIdentifiers = compose(self.oaiList.listRecords(arguments={'verb':['ListIdentifiers'], 'metadataPrefix': ['oai_dc']}, **self.httpkwargs))
        resultListIdentifiers.next()

        # resume ListRecords
        self._addRecords(['id:1&1'])
        header, body = ''.join(compose(resultListRecords)).split(CRLF*2)
        self.assertFalse('</ListIdentifiers>' in body, body)
        self.assertTrue('</ListRecords>' in body, body)

    def testXCount(self):
        self._addRecords(['id%s' % i for i in xrange(99)])

        header, body = ''.join(s for s in compose(self.oaiList.listRecords(arguments={'verb': ['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-count': ['True']}, **self.httpkwargs)) if not s is Yield).split(CRLF*2)
        oai = parse(StringIO(body))
        self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record')))
        recordsRemaining = xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/@recordsRemaining')[0]
        self.assertEquals('97', recordsRemaining)
        continueAfter = self.oaiJazz.getRecord('id97').stamp
        resumptionToken = str(ResumptionToken(metadataPrefix='oai_dc', continueAfter=continueAfter))

        header, body = ''.join(s for s in compose(self.oaiList.listRecords(arguments={'verb': ['ListRecords'], 'resumptionToken': [resumptionToken], 'x-count': ['True']}, **self.httpkwargs)) if not s is Yield).split(CRLF*2)
        oai = parse(StringIO(body))
        self.assertEquals(1, len(xpath(oai, '//mock:record')))
        self.assertEquals(0, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/@recordsRemaining')))

        selectMethod = self.observer.calledMethods[1]
        self.assertEquals(dict(continueAfter='0', oaiUntil=None, prefix='oai_dc', oaiFrom=None, sets=[], batchSize=2, shouldCountHits=True, partition=None), selectMethod.kwargs)

    def testGetMultipleDataWithOtherBatchSize(self):
        self._addRecords(['id%s' % i for i in xrange(99)])
        self.oaiList = OaiList(batchSize=10, dataBatchSize=2, repository=OaiRepository())
        self.oaiList.addObserver(self.observer)
        def getMultipleData(identifiers, **kwargs):
            return [(id, '<data id="%s"/>' % id) for id in identifiers]
        self.observer.methods['getMultipleData'] = getMultipleData
        def oaiRecord(record, metadataPrefix, fetchedRecords=None):
            yield fetchedRecords[record.identifier]
        self.observer.methods['oaiRecord'] = oaiRecord

        body = asString(self.oaiList.listRecords(arguments=dict(verb=['ListRecords'], metadataPrefix=['oai_dc']), **self.httpkwargs)).split(CRLF*2,1)[-1]
        oai = parse(StringIO(body))
        self.assertEquals(['id0', 'id1', 'id2', 'id3', 'id4', 'id5', 'id6', 'id7', 'id8', 'id9'], xpath(oai, '//oai:ListRecords/oai:data/@id'))

        self.assertEquals(['getAllPrefixes',
                'oaiSelect',
                'oaiWatermark',
                'getMultipleData',
                'oaiRecord',
                'oaiRecord',
                'getMultipleData',
                'oaiRecord',
                'oaiRecord',
                'getMultipleData',
                'oaiRecord',
                'oaiRecord',
                'getMultipleData',
                'oaiRecord',
                'oaiRecord',
                'getMultipleData',
                'oaiRecord',
                'oaiRecord'
            ], self.observer.calledMethodNames())


    def _addRecords(self, identifiers, sets=None):
        for identifier in identifiers:
            self.oaiJazz.addOaiRecord(identifier=identifier, sets=sets, metadataFormats=[('oai_dc', '', '')])
Пример #19
0
class OaiInfoTest(SeecrTestCase):
    def setUp(self):
        super(OaiInfoTest, self).setUp()
        self.oaiInfo = OaiInfo(reactor=CallTrace(), oaiPath='/')
        self.jazz = OaiJazz(self.tempdir)
        self.top = be((Observable(), (self.oaiInfo, (self.jazz, ))))
        self.jazz.updateSet(setSpec="set1", setName="set1")
        self.jazz.updateSet(setSpec="set2", setName="set name 2")
        self.jazz.updateMetadataFormat(prefix="prefix1",
                                       schema="",
                                       namespace="")
        self.jazz.updateMetadataFormat(prefix="oai",
                                       schema="oai-schema",
                                       namespace="oai-namespace")
        self.jazz.addOaiRecord(identifier='record1',
                               setSpecs=['set1'],
                               metadataPrefixes=['prefix1'])
        self.jazz.addOaiRecord(identifier='record2',
                               setSpecs=['set1'],
                               metadataPrefixes=['prefix1', 'oai'])
        self.jazz.addOaiRecord(identifier='record3',
                               setSpecs=['set1', 'set2'],
                               metadataPrefixes=['prefix1'])
        consume(self.jazz.delete(identifier='record3'))
        self.jazz.commit()

    def testInfo(self):
        result = asString(
            self.top.all.handleRequest(path='/info/json', arguments={}))
        header, body = result.split('\r\n\r\n')
        lastStamp = self.jazz.getLastStampId(prefix=None)
        self.assertTrue(lastStamp != None)
        self.assertEqual(
            {
                'totalRecords': {
                    'total': 3,
                    'deletes': 1
                },
                'lastStamp': lastStamp
            }, loads(body))

    def testGetAllSets(self):
        result = asString(
            self.top.all.handleRequest(path='/info/json/sets', arguments={}))
        header, body = result.split('\r\n\r\n')
        self.assertEqual(['set1', 'set2'], loads(body))

    def testGetAllPrefixes(self):
        result = asString(
            self.top.all.handleRequest(path='/info/json/prefixes',
                                       arguments={}))
        header, body = result.split('\r\n\r\n')
        self.assertEqual(['oai', 'prefix1'], loads(body))

    def testPrefixInfo(self):
        result = asString(
            self.top.all.handleRequest(path='/info/json/prefix',
                                       arguments=dict(prefix=['prefix1'])))
        header, body = result.split('\r\n\r\n')

        lastStamp = self.jazz.getLastStampId(prefix='prefix1')
        self.assertTrue(lastStamp != None)
        self.assertEqual(
            dict(prefix='prefix1',
                 schema='',
                 namespace='',
                 nrOfRecords=dict(total=3, deletes=1),
                 lastStamp=lastStamp), loads(body))

        result = asString(
            self.top.all.handleRequest(path='/info/json/prefix',
                                       arguments=dict(prefix=['oai'])))
        header, body = result.split('\r\n\r\n')

        oaiLastStamp = self.jazz.getLastStampId(prefix='oai')
        self.assertTrue(oaiLastStamp != None)
        self.assertTrue(lastStamp != oaiLastStamp)
        self.assertEqual(
            dict(prefix='oai',
                 schema='oai-schema',
                 namespace='oai-namespace',
                 nrOfRecords=dict(total=1, deletes=0),
                 lastStamp=oaiLastStamp), loads(body))

    def testUnknownPrefixInfo(self):
        result = asString(
            self.top.all.handleRequest(path='/info/json/prefix',
                                       arguments=dict(prefix=['unknown'])))
        header, body = result.split('\r\n\r\n')
        self.assertEqual({}, loads(body))

    def testSetInfo(self):
        result = asString(
            self.top.all.handleRequest(path='/info/json/set',
                                       arguments=dict(set=['set1'])))
        header, body = result.split('\r\n\r\n')

        lastStamp = self.jazz.getLastStampId(setSpec='set1', prefix=None)
        self.assertTrue(lastStamp != None)
        self.assertEqual(
            dict(setSpec='set1',
                 name='set1',
                 nrOfRecords=dict(total=3, deletes=1),
                 lastStamp=lastStamp), loads(body))

        result = asString(
            self.top.all.handleRequest(path='/info/json/set',
                                       arguments=dict(set=['set2'])))
        header, body = result.split('\r\n\r\n')
        set2LastStamp = self.jazz.getLastStampId(setSpec='set2', prefix=None)
        self.assertTrue(lastStamp == set2LastStamp)
        self.assertEqual(
            dict(setSpec='set2',
                 name='set name 2',
                 nrOfRecords=dict(total=1, deletes=1),
                 lastStamp=set2LastStamp), loads(body))

    def testResumptionTokenInfo(self):
        firstRecord = next(
            self.jazz.oaiSelect(prefix='prefix1', batchSize=1).records)
        resumptionToken = ResumptionToken(metadataPrefix='prefix1',
                                          continueAfter=firstRecord.stamp)
        result = asString(
            self.top.all.handleRequest(
                path='/info/json/resumptiontoken',
                arguments=dict(resumptionToken=[str(resumptionToken)])))
        header, body = result.split('\r\n\r\n')
        self.assertEqual(
            {
                'prefix': 'prefix1',
                'set': None,
                'from': None,
                'until': None,
                'nrOfRecords': {
                    'total': 3,
                    'deletes': 1
                },
                'nrOfRemainingRecords': {
                    'total': 2,
                    'deletes': 1
                },
                'timestamp': firstRecord.stamp
            }, loads(body))
Пример #20
0
    def testExport(self):
        jazz = OaiJazz(join(self.tempdir, 'oai'), deleteInSets=True)
        jazz.updateMetadataFormat(prefix='someprefix',
                                  schema='https://example.org/schema.xsd',
                                  namespace='urn:ns')
        jazz.updateMetadataFormat(prefix='prefix',
                                  schema='schema',
                                  namespace='namespace')
        jazz.updateSet(setSpec='a', setName='A')
        jazz.updateSet(setSpec='setSpec', setName='setName')
        jazz.addOaiRecord(identifier='id:0', metadataPrefixes=['prefix'])
        jazz.addOaiRecord(identifier='id:1',
                          metadataPrefixes=['prefix'],
                          setSpecs=['a', 'a:b', 'd:e:f'])
        jazz.addOaiRecord(identifier='id:2',
                          metadataPrefixes=['prefix', 'someprefix'],
                          setSpecs=['a', 'a:b', 'd:e:f'])
        jazz.addOaiRecord(identifier='id:3',
                          metadataPrefixes=['prefix', 'someprefix'],
                          setSpecs=['a', 'a:b', 'd:e:f'])
        for i in range(4, 3000):
            jazz.addOaiRecord(identifier='id:{}'.format(i),
                              metadataPrefixes=['prefix'])

        jazz.deleteOaiRecordInPrefixes(identifier='id:2',
                                       metadataPrefixes=['someprefix'])
        jazz.deleteOaiRecordInSets(identifier='id:3', setSpecs=['d:e:f'])
        jazz.deleteOaiRecord(identifier='id:7')

        dumpfile = join(self.tempdir, 'dump')
        jazz.export(dumpfile)

        with open(dumpfile) as fp:
            d = fp.readlines()
        self.assertEqual(3003, len(d))
        self.assertEqual('META:\n', d[0])
        self.assertEqual('RECORDS:\n', d[2])
        meta = loads(d[1].strip())
        self.assertEqual(
            {
                'export_version': 1,
                'metadataPrefixes': {
                    'someprefix': {
                        'schema': 'https://example.org/schema.xsd',
                        'namespace': 'urn:ns'
                    },
                    'prefix': {
                        'schema': 'schema',
                        'namespace': 'namespace'
                    },
                },
                'sets': {
                    'a': {
                        'setName': 'A'
                    },
                    'a:b': {
                        'setName': ''
                    },
                    'd': {
                        'setName': ''
                    },
                    'd:e': {
                        'setName': ''
                    },
                    'd:e:f': {
                        'setName': ''
                    },
                    'setSpec': {
                        'setName': 'setName'
                    },
                }
            }, meta)
        record0 = loads(d[3].strip())
        self.assertAlmostEqual(time(), record0['timestamp'] / 10.0**6, delta=3)
        record0['timestamp'] = 'TIMESTAMP'
        self.assertEqual(
            {
                'identifier': 'id:0',
                'timestamp': 'TIMESTAMP',
                'tombstone': False,
                'deletedPrefixes': [],
                'prefixes': ['prefix'],
                'deletedSets': [],
                'sets': [],
            }, record0)
        record2 = loads(d[-3].strip())
        record2['timestamp'] = 'TIMESTAMP'
        self.assertEqual(
            {
                'identifier': 'id:2',
                'timestamp': 'TIMESTAMP',
                'tombstone': False,
                'deletedPrefixes': ['someprefix'],
                'prefixes': ['prefix', 'someprefix'],
                'deletedSets': [],
                'sets': ['a', 'a:b', 'd', 'd:e', 'd:e:f'],
            }, record2)
        record3 = loads(d[-2].strip())
        record3['timestamp'] = 'TIMESTAMP'
        self.assertEqual(
            {
                'identifier': 'id:3',
                'timestamp': 'TIMESTAMP',
                'tombstone': False,
                'deletedPrefixes': [],
                'prefixes': ['prefix', 'someprefix'],
                'deletedSets': ['d:e:f'],
                'sets': ['a', 'a:b', 'd', 'd:e', 'd:e:f'],
            }, record3)
        record7 = loads(d[-1].strip())
        record7['timestamp'] = 'TIMESTAMP'
        self.assertEqual(
            {
                'identifier': 'id:7',
                'timestamp': 'TIMESTAMP',
                'tombstone': True,
                'deletedPrefixes': ['prefix'],
                'prefixes': ['prefix'],
                'deletedSets': [],
                'sets': [],
            }, record7)
Пример #21
0
class OaiInfoTest(SeecrTestCase):

    def setUp(self):
        super(OaiInfoTest, self).setUp()
        self.oaiInfo = OaiInfo(reactor=CallTrace(), oaiPath='/')
        self.jazz = OaiJazz(self.tempdir)
        self.top = be((Observable(),
            (self.oaiInfo,
                (self.jazz,)
            )
        ))
        self.jazz.addOaiRecord(identifier='record1', sets=[('set1', 'set1')], metadataFormats=[('prefix1', '', '')])
        self.jazz.addOaiRecord(identifier='record2', sets=[('set1', 'set1')], metadataFormats=[('prefix1', '', ''), ('oai', 'oai-schema', 'oai-namespace')])
        self.jazz.addOaiRecord(identifier='record3', sets=[('set1', 'set1'), ('set2', 'set name 2')], metadataFormats=[('prefix1', '', '')])
        consume(self.jazz.delete(identifier='record3'))
        self.jazz.commit()

    def testInfo(self):
        result = asString(self.top.all.handleRequest(path='/info/json', arguments={}))
        header, body = result.split('\r\n\r\n')
        lastStamp = self.jazz.getLastStampId(prefix=None)
        self.assertTrue(lastStamp != None)
        self.assertEquals({'totalRecords': {'total': 3, 'deletes': 1}, 'lastStamp': lastStamp}, loads(body))

    def testGetAllSets(self):
        result = asString(self.top.all.handleRequest(path='/info/json/sets', arguments={}))
        header, body = result.split('\r\n\r\n')
        self.assertEquals(['set1', 'set2'], loads(body))

    def testGetAllPrefixes(self):
        result = asString(self.top.all.handleRequest(path='/info/json/prefixes', arguments={}))
        header, body = result.split('\r\n\r\n')
        self.assertEquals(['oai', 'prefix1'], loads(body))

    def testPrefixInfo(self):
        result = asString(self.top.all.handleRequest(path='/info/json/prefix', arguments=dict(prefix=['prefix1'])))
        header, body = result.split('\r\n\r\n')

        lastStamp = self.jazz.getLastStampId(prefix='prefix1')
        self.assertTrue(lastStamp != None)
        self.assertEquals(dict(prefix='prefix1', schema='', namespace='', nrOfRecords=dict(total=3, deletes=1), lastStamp=lastStamp), loads(body))

        result = asString(self.top.all.handleRequest(path='/info/json/prefix',
            arguments=dict(prefix=['oai'])))
        header, body = result.split('\r\n\r\n')

        oaiLastStamp = self.jazz.getLastStampId(prefix='oai')
        self.assertTrue(oaiLastStamp != None)
        self.assertTrue(lastStamp != oaiLastStamp)
        self.assertEquals(dict(prefix='oai', schema='oai-schema', namespace='oai-namespace', nrOfRecords=dict(total=1, deletes=0), lastStamp=oaiLastStamp), loads(body))

    def testUnknownPrefixInfo(self):
        result = asString(self.top.all.handleRequest(path='/info/json/prefix',
            arguments=dict(prefix=['unknown'])))
        header, body = result.split('\r\n\r\n')
        self.assertEquals({}, loads(body))

    def testSetInfo(self):
        result = asString(self.top.all.handleRequest(path='/info/json/set', arguments=dict(set=['set1'])))
        header, body = result.split('\r\n\r\n')

        lastStamp = self.jazz.getLastStampId(setSpec='set1', prefix=None)
        self.assertTrue(lastStamp != None)
        self.assertEquals(dict(setSpec='set1', name='set1', nrOfRecords=dict(total=3, deletes=1), lastStamp=lastStamp), loads(body))

        result = asString(self.top.all.handleRequest(path='/info/json/set',
            arguments=dict(set=['set2'])))
        header, body = result.split('\r\n\r\n')
        set2LastStamp = self.jazz.getLastStampId(setSpec='set2', prefix=None)
        self.assertTrue(lastStamp == set2LastStamp)
        self.assertEquals(dict(setSpec='set2', name='set name 2', nrOfRecords=dict(total=1, deletes=1), lastStamp=set2LastStamp), loads(body))

    def testResumptionTokenInfo(self):
        firstRecord = self.jazz.oaiSelect(prefix='prefix1', batchSize=1).records.next()
        resumptionToken =  ResumptionToken(metadataPrefix='prefix1', continueAfter=firstRecord.stamp)
        result = asString(self.top.all.handleRequest(path='/info/json/resumptiontoken', arguments=dict(resumptionToken=[str(resumptionToken)])))
        header, body = result.split('\r\n\r\n')
        self.assertEquals({
                'prefix':'prefix1',
                'set':None,
                'from':None,
                'until':None,
                'nrOfRecords': {'total': 3, 'deletes': 1},
                'nrOfRemainingRecords': {'total': 2, 'deletes': 1},
                'timestamp': firstRecord.stamp
            }, loads(body))
Пример #22
0
    def testShouldRaiseExceptionOnSameRequestTwice(self):
        self.run = True
        portNumber = randint(50000, 60000)
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="")
        suspendRegister = SuspendRegister()
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir,
                                                       'storage'))
        clientId = str(uuid4())

        responses = []

        def doOaiListRecord(port):
            header, body = getRequest(port=portNumber,
                                      path="/",
                                      arguments={
                                          "verb": "ListRecords",
                                          "metadataPrefix": "prefix",
                                          "x-wait": "True"
                                      },
                                      additionalHeaders={
                                          'X-Meresco-Oai-Client-Identifier':
                                          clientId
                                      },
                                      parse=False)
            responses.append((header, body))

        oaiPmhThread = Thread(
            None, lambda: self.startOaiPmh(portNumber, oaiJazz,
                                           storageComponent, suspendRegister))
        harvestThread1 = Thread(None, lambda: doOaiListRecord(portNumber))
        harvestThread2 = Thread(None, lambda: doOaiListRecord(portNumber))

        with stderr_replaced():
            oaiPmhThread.start()
            harvestThread1.start()
            try:
                while len(suspendRegister) == 0:
                    sleep(0.01)
                harvest1Suspend = suspendRegister._suspendObject(clientId)
                self.assertTrue(harvest1Suspend is not None)
                harvestThread2.start()
                while harvest1Suspend == suspendRegister._suspendObject(
                        clientId):
                    sleep(0.01)
                sleep(0.01)
                self.assertTrue(clientId in suspendRegister)
                self.assertTrue(
                    harvest1Suspend != suspendRegister._suspendObject(clientId)
                )

                self.assertEqual(1, len(responses))
                statusAndHeader, body = responses[0]
                self.assertEqual("204", statusAndHeader['StatusCode'])
                self.assertTrue(body.startswith(b'Aborting suspended request'),
                                body)

                storageComponent.addData(identifier="id1",
                                         name="prefix",
                                         data=b"<a>a1</a>")
                oaiJazz.addOaiRecord(identifier="id1",
                                     metadataPrefixes=["prefix"])
                sleep(0.1)

            finally:
                self.run = False
                oaiPmhThread.join()
                harvestThread1.join()
                harvestThread2.join()
                oaiJazz.close()