def testConvert(self): Convert11to12(self.oaiDir).go() o = OaiJazz(self.oaiDir) rec = o.getRecord('id:1') self.assertTrue(rec.isDeleted) self.assertEqual({'A', 'B'}, rec.deletedPrefixes) self.assertEqual({'A', 'B'}, rec.prefixes)
def testPreCondition(self): with open(join(self.oaiDir, 'oai.version'), 'w') as f: f.write('12') o = OaiJazz(self.oaiDir) rec = o.getRecord('id:1') self.assertTrue(rec.isDeleted) self.assertEqual(set(), rec.deletedPrefixes) self.assertEqual({'A', 'B'}, rec.prefixes)
class OaiListTest(SeecrTestCase): def setUp(self): SeecrTestCase.setUp(self) self.oaiJazz = OaiJazz(self.tempdir) self.oaiList = OaiList(batchSize=2, repository=OaiRepository()) self.observer = CallTrace('observer', emptyGeneratorMethods=['suspendBeforeSelect']) self.observer.methods['suspendAfterNoResult'] = lambda **kwargs: (s for s in ['SUSPEND']) self.observer.methods['oaiWatermark'] = lambda o=None: (x for x in ["Crafted By Seecr"]) def oaiRecord(record, metadataPrefix, fetchedRecords=None): yield '<mock:record xmlns:mock="uri:mock">%s/%s</mock:record>' % (escapeXml(record.identifier), escapeXml(metadataPrefix)) self.observer.methods['oaiRecord'] = oaiRecord self.observer.methods['oaiRecordHeader'] = oaiRecord self.observer.methods['getAllPrefixes'] = self.oaiJazz.getAllPrefixes self.observer.methods['oaiSelect'] = self.oaiJazz.oaiSelect self.getMultipleDataIdentifiers = [] def getMultipleData(**kwargs): self.getMultipleDataIdentifiers.append(list(kwargs.get('identifiers'))) raise NoneOfTheObserversRespond('No one', 0) self.observer.methods['getMultipleData'] = getMultipleData self.oaiList.addObserver(self.observer) self.clientId = str(uuid4()) self.httpkwargs = { 'path': '/path/to/oai', 'Headers': {'Host':'server', 'X-Meresco-Oai-Client-Identifier': self.clientId}, 'port': 9000, } def testListRecords(self): self._addRecords(['id:0&0', 'id:1&1']) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record'))) self.assertEquals(0, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken'))) self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord', 'oaiRecord'], [m.name for m in self.observer.calledMethods]) selectMethod = self.observer.calledMethods[1] self.assertEquals(dict(continueAfter='0', oaiUntil=None, prefix='oai_dc', oaiFrom=None, sets=[], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs) recordMethods = self.observer.calledMethods[4:] self.assertEquals({'recordId':'id:0&0', 'metadataPrefix':'oai_dc'}, _m(recordMethods[0].kwargs)) self.assertEquals({'recordId':'id:1&1', 'metadataPrefix':'oai_dc'}, _m(recordMethods[1].kwargs)) self.assertEquals([['id:0&0', 'id:1&1']], self.getMultipleDataIdentifiers) def testListRecordsUsesFetchedRecords(self): self._addRecords(['id:0&0', 'id:1']) self.observer.methods['getMultipleData'] = lambda name, identifiers, ignoreMissing=False: [('id:0&0', 'data1'), ('id:1', 'data2'), ('id:2', 'data3')] consume(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc']}, **self.httpkwargs)) self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord', 'oaiRecord'], self.observer.calledMethodNames()) self.assertEquals({'id:0&0': 'data1', 'id:1': 'data2', 'id:2': 'data3'}, self.observer.calledMethods[4].kwargs['fetchedRecords']) self.assertEquals({'id:0&0': 'data1', 'id:1': 'data2', 'id:2': 'data3'}, self.observer.calledMethods[4].kwargs['fetchedRecords']) def testListRecordsWithDeletes(self): self._addRecords(['id:0&0', 'id:1&1']) consume(self.oaiJazz.delete(identifier='id:1&1')) consume(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc']}, **self.httpkwargs)) self.assertEquals([['id:0&0']], self.getMultipleDataIdentifiers) def testListRecordsWithMultiSequentialStorage(self): oaijazz = OaiJazz(join(self.tempdir, '1')) oailist = OaiList(batchSize=2, repository=OaiRepository()) storage = MultiSequentialStorage(join(self.tempdir, "2")) oailist.addObserver(oaijazz) oairecord = OaiRecord() oailist.addObserver(storage) oailist.addObserver(oairecord) identifier = "id0" oaijazz.addOaiRecord(identifier, (), metadataFormats=[('oai_dc', '', '')]) storage.addData(identifier=identifier, name="oai_dc", data="data01") response = oailist.listRecords(arguments=dict( verb=['ListRecords'], metadataPrefix=['oai_dc']), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEquals("data01", xpath(parse(StringIO(body)), '//oai:metadata')[0].text) def testListRecordsWithALotOfDeletedRecords(self): oaijazz = OaiJazz(join(self.tempdir, '1')) oailist = OaiList(batchSize=2, repository=OaiRepository()) storage = MultiSequentialStorage(join(self.tempdir, "2")) oailist.addObserver(oaijazz) oairecord = OaiRecord() oailist.addObserver(storage) oailist.addObserver(oairecord) for id in ['id0', 'id1', 'id1']: oaijazz.addOaiRecord(id, (), metadataFormats=[('oai_dc', '', '')]) storage.addData(identifier=id, name="oai_dc", data="data_%s" % id) response = oailist.listRecords(arguments=dict( verb=['ListRecords'], metadataPrefix=['oai_dc']), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEquals(["data_id0", "data_id1"], xpath(parse(StringIO(body)), '//oai:metadata/text()')) def testListIdentifiers(self): self._addRecords(['id:0&0', 'id:1&1']) header, body = ''.join(compose(self.oaiList.listIdentifiers(arguments={'verb':['ListIdentifiers'], 'metadataPrefix': ['oai_dc']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListIdentifiers/mock:record'))) self.assertEquals(0, len(xpath(oai, '/oai:OAI-PMH/oai:ListIdentifiers/oai:resumptionToken'))) self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecordHeader', 'oaiRecordHeader'], [m.name for m in self.observer.calledMethods]) selectMethod = self.observer.calledMethods[1] self.assertEquals(dict(continueAfter='0', oaiUntil=None, prefix='oai_dc', oaiFrom=None, sets=[], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs) headerMethods = self.observer.calledMethods[4:] self.assertEquals({'recordId':'id:0&0', 'metadataPrefix':'oai_dc'}, _m(headerMethods[0].kwargs)) self.assertEquals({'recordId':'id:1&1', 'metadataPrefix':'oai_dc'}, _m(headerMethods[1].kwargs)) def testListRecordsProducesResumptionToken(self): self._addRecords(['id:0&0', 'id:1&1', 'id:2&2'], sets=[('set0', 'setName')]) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'from': ['2000-01-01T00:00:00Z'], 'until': ['4012-01-01T00:00:00Z'], 'set': ['set0']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record'))) resumptionToken = ResumptionToken.fromString(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()')[0]) self.assertEquals('4012-01-01T00:00:00Z', resumptionToken.until) self.assertEquals('2000-01-01T00:00:00Z', resumptionToken.from_) self.assertEquals('set0', resumptionToken.set_) self.assertEquals('oai_dc', resumptionToken.metadataPrefix) continueAfter = self.oaiJazz.getRecord('id:1&1').stamp self.assertEquals(str(continueAfter), resumptionToken.continueAfter) self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord', 'oaiRecord'], [m.name for m in self.observer.calledMethods]) selectMethod = self.observer.calledMethods[1] self.assertEquals(dict(continueAfter='0', oaiUntil='4012-01-01T00:00:00Z', prefix='oai_dc', oaiFrom='2000-01-01T00:00:00Z', sets=['set0'], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs) recordMethods = self.observer.calledMethods[4:] self.assertEquals({'recordId':'id:0&0', 'metadataPrefix':'oai_dc'}, _m(recordMethods[0].kwargs)) self.assertEquals({'recordId':'id:1&1', 'metadataPrefix':'oai_dc'}, _m(recordMethods[1].kwargs)) def testListRecordsUsesGivenResumptionToken(self): self._addRecords(['id:2&2'], sets=[('set0', 'setName')]) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'resumptionToken':['u4012-01-01T00:00:00Z|c1000|moai_dc|sset0|f2000-01-01T00:00:00Z']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(1, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record'))) self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord'], [m.name for m in self.observer.calledMethods]) selectMethod = self.observer.calledMethods[1] self.assertEquals(dict(continueAfter='1000', oaiUntil='4012-01-01T00:00:00Z', prefix='oai_dc', oaiFrom='2000-01-01T00:00:00Z', sets=['set0'], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs) recordMethods = self.observer.calledMethods[4:] self.assertEquals({'recordId':'id:2&2', 'metadataPrefix':'oai_dc'}, _m(recordMethods[0].kwargs)) def testListRecordsEmptyFinalResumptionToken(self): self._addRecords(['id:2&2', 'id:3&3']) resumptionToken = str(ResumptionToken(metadataPrefix='oai_dc', continueAfter=0)) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'resumptionToken':[resumptionToken]}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record'))) resumptionTokens = xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken') self.assertEquals(1, len(resumptionTokens)) self.assertEquals(None, resumptionTokens[0].text) self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord', 'oaiRecord'], [m.name for m in self.observer.calledMethods]) selectMethod = self.observer.calledMethods[1] self.assertEquals(dict(continueAfter='0', oaiUntil='', prefix='oai_dc', oaiFrom='', sets=[], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs) recordMethods = self.observer.calledMethods[-2:] self.assertEquals({'recordId':'id:2&2', 'metadataPrefix':'oai_dc'}, _m(recordMethods[0].kwargs)) self.assertEquals({'recordId':'id:3&3', 'metadataPrefix':'oai_dc'}, _m(recordMethods[1].kwargs)) def testNoRecordsMatch(self): self._addRecords(['id:0']) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix':['oai_dc'], 'set': ['does_not_exist']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(['noRecordsMatch'], xpath(oai, "/oai:OAI-PMH/oai:error/@code")) def testListRecordsUsingXWait(self): self.oaiList = OaiList(batchSize=2, supportXWait=True, repository=OaiRepository()) self.oaiList.addObserver(self.observer) result = compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-wait': ['True']}, **self.httpkwargs)) result.next() self.assertEquals(['suspendBeforeSelect', 'getAllPrefixes', 'suspendAfterNoResult'], [m.name for m in self.observer.calledMethods]) self.assertEquals({"clientIdentifier": self.clientId, "prefix": 'oai_dc', 'sets': [], 'oaiFrom': None, 'oaiUntil':None, 'shouldCountHits': False, 'x-wait':True, 'continueAfter': '0', 'partition': None}, self.observer.calledMethods[-1].kwargs) self._addRecords(['id:1&1']) self.observer.calledMethods.reset() header, body = ''.join(compose(result)).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(1, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record'))) self.assertEquals(1, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()'))) self.assertEquals(['suspendBeforeSelect', 'getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord'], [m.name for m in self.observer.calledMethods]) selectMethod = self.observer.calledMethods[2] self.assertEquals(dict(continueAfter='0', oaiUntil=None, prefix='oai_dc', oaiFrom=None, sets=[], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs) recordMethods = self.observer.calledMethods[-1:] self.assertEquals({'recordId':'id:1&1', 'metadataPrefix':'oai_dc'}, _m(recordMethods[0].kwargs)) def testListRecordsWithoutClientIdentifierGeneratesOne(self): self.oaiList = OaiList(batchSize=2, supportXWait=True, repository=OaiRepository()) self.oaiList.addObserver(self.observer) self.httpkwargs = { 'path': '/path/to/oai', 'Headers':{'Host':'server'}, 'port':9000, 'Client': ('127.0.0.1', 1234) } with stderr_replaced() as s: result = compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-wait': ['True']}, **self.httpkwargs)) result.next() self.assertEquals(['suspendBeforeSelect', 'getAllPrefixes', 'suspendAfterNoResult'], [m.name for m in self.observer.calledMethods]) self.assertTrue('clientIdentifier' in self.observer.calledMethods[-1].kwargs) self.assertEquals(len(str(uuid4())), len(self.observer.calledMethods[-1].kwargs['clientIdentifier'])) self.assertEquals("X-Meresco-Oai-Client-Identifier not found in HTTP Headers. Generated a uuid for OAI client from 127.0.0.1\n", s.getvalue()) def testNotSupportedXWait(self): self._addRecords(['id:1', 'id:2']) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-wait': ['True']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(['badArgument'], xpath(oai, "/oai:OAI-PMH/oai:error/@code")) def testNotSupportedValueXWait(self): self._addRecords(['id:1', 'id:2']) self.oaiList = OaiList(batchSize=2, supportXWait=True, repository=OaiRepository()) self.oaiList.addObserver(self.observer) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-wait': ['YesPlease']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(['badArgument'], xpath(oai, "/oai:OAI-PMH/oai:error/@code")) self.assertTrue("only supports 'True' as valid value" in xpath(oai, "/oai:OAI-PMH/oai:error/text()")[0]) def testListRecordsWithPartition(self): self._addRecords(['id:1', 'id:2']) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-partition': ['2/2']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(['id:1/oai_dc'], xpath(oai, '//mock:record/text()')) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-partition': ['1/2']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(['id:2/oai_dc'], xpath(oai, '//mock:record/text()')) @stderr_replaced def testListRecordsWithOldPartitionParameter(self): self._addRecords(['id:1', 'id:2']) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-parthash': ['2/2']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(['id:1/oai_dc'], xpath(oai, '//mock:record/text()')) def testListRecordsProducesResumptionTokenWithPartition(self): self._addRecords(['id:%s' % i for i in xrange(10)]) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-partition':['1/2']}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record'))) resumptionToken = ResumptionToken.fromString(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()')[0]) self.assertEquals(['id:2/oai_dc', 'id:3/oai_dc'], xpath(oai, '//mock:record/text()')) self.assertEquals('1/2', str(resumptionToken.partition)) header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'resumptionToken': [str(resumptionToken)]}, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(['id:5/oai_dc', 'id:6/oai_dc'], xpath(oai, '//mock:record/text()')) def testFromAndUntil(self): self._addRecords(['id:3&3']) def selectArguments(oaiFrom, oaiUntil): self.observer.calledMethods.reset() arguments = {'verb':['ListRecords'], 'metadataPrefix': ['oai_dc']} if oaiFrom: arguments['from'] = [oaiFrom] if oaiUntil: arguments['until'] = [oaiUntil] header, body = ''.join(compose(self.oaiList.listRecords(arguments=arguments, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(['getAllPrefixes', 'oaiSelect'], [m.name for m in self.observer.calledMethods][:2]) selectKwargs = self.observer.calledMethods[1].kwargs return selectKwargs['oaiFrom'], selectKwargs['oaiUntil'] self.assertEquals((None, None), selectArguments(None, None)) self.assertEquals(('2000-01-01T00:00:00Z', '2000-01-01T00:00:00Z'), selectArguments('2000-01-01T00:00:00Z', '2000-01-01T00:00:00Z')) self.assertEquals(('2000-01-01T00:00:00Z', '2000-01-01T23:59:59Z'), selectArguments('2000-01-01', '2000-01-01')) self.assertEquals((None, '2000-01-01T00:00:00Z'), selectArguments(None, '2000-01-01T00:00:00Z')) self.assertEquals(('2000-01-01T00:00:00Z', None), selectArguments('2000-01-01T00:00:00Z', None)) def testFromAndUntilErrors(self): def getError(oaiFrom, oaiUntil): self._addRecords(['id:3&3']) self.observer.calledMethods.reset() arguments = {'verb':['ListRecords'], 'metadataPrefix': ['oai_dc']} if oaiFrom: arguments['from'] = [oaiFrom] if oaiUntil: arguments['until'] = [oaiUntil] header, body = ''.join(compose(self.oaiList.listRecords(arguments=arguments, **self.httpkwargs))).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(1, len(xpath(oai, '//oai:error')), body) error = xpath(oai, '//oai:error')[0] return error.attrib['code'] self.assertEquals('badArgument', getError('thisIsNotEvenADateStamp', 'thisIsNotEvenADateStamp')) self.assertEquals('badArgument', getError('2000-01-01T00:00:00Z', '2000-01-01')) self.assertEquals('badArgument', getError('2000-01-01T00:00:00Z', '1999-01-01T00:00:00Z')) def testConcurrentListRequestsDontInterfere(self): self.oaiList = OaiList(batchSize=2, supportXWait=True, repository=OaiRepository()) self.oaiList.addObserver(self.observer) # ListRecords request resultListRecords = compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-wait': ['True']}, **self.httpkwargs)) resultListRecords.next() # ListIdentifiers request resultListIdentifiers = compose(self.oaiList.listRecords(arguments={'verb':['ListIdentifiers'], 'metadataPrefix': ['oai_dc']}, **self.httpkwargs)) resultListIdentifiers.next() # resume ListRecords self._addRecords(['id:1&1']) header, body = ''.join(compose(resultListRecords)).split(CRLF*2) self.assertFalse('</ListIdentifiers>' in body, body) self.assertTrue('</ListRecords>' in body, body) def testXCount(self): self._addRecords(['id%s' % i for i in xrange(99)]) header, body = ''.join(s for s in compose(self.oaiList.listRecords(arguments={'verb': ['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-count': ['True']}, **self.httpkwargs)) if not s is Yield).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record'))) recordsRemaining = xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/@recordsRemaining')[0] self.assertEquals('97', recordsRemaining) continueAfter = self.oaiJazz.getRecord('id97').stamp resumptionToken = str(ResumptionToken(metadataPrefix='oai_dc', continueAfter=continueAfter)) header, body = ''.join(s for s in compose(self.oaiList.listRecords(arguments={'verb': ['ListRecords'], 'resumptionToken': [resumptionToken], 'x-count': ['True']}, **self.httpkwargs)) if not s is Yield).split(CRLF*2) oai = parse(StringIO(body)) self.assertEquals(1, len(xpath(oai, '//mock:record'))) self.assertEquals(0, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/@recordsRemaining'))) selectMethod = self.observer.calledMethods[1] self.assertEquals(dict(continueAfter='0', oaiUntil=None, prefix='oai_dc', oaiFrom=None, sets=[], batchSize=2, shouldCountHits=True, partition=None), selectMethod.kwargs) def testGetMultipleDataWithOtherBatchSize(self): self._addRecords(['id%s' % i for i in xrange(99)]) self.oaiList = OaiList(batchSize=10, dataBatchSize=2, repository=OaiRepository()) self.oaiList.addObserver(self.observer) def getMultipleData(identifiers, **kwargs): return [(id, '<data id="%s"/>' % id) for id in identifiers] self.observer.methods['getMultipleData'] = getMultipleData def oaiRecord(record, metadataPrefix, fetchedRecords=None): yield fetchedRecords[record.identifier] self.observer.methods['oaiRecord'] = oaiRecord body = asString(self.oaiList.listRecords(arguments=dict(verb=['ListRecords'], metadataPrefix=['oai_dc']), **self.httpkwargs)).split(CRLF*2,1)[-1] oai = parse(StringIO(body)) self.assertEquals(['id0', 'id1', 'id2', 'id3', 'id4', 'id5', 'id6', 'id7', 'id8', 'id9'], xpath(oai, '//oai:ListRecords/oai:data/@id')) self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord', 'oaiRecord', 'getMultipleData', 'oaiRecord', 'oaiRecord', 'getMultipleData', 'oaiRecord', 'oaiRecord', 'getMultipleData', 'oaiRecord', 'oaiRecord', 'getMultipleData', 'oaiRecord', 'oaiRecord' ], self.observer.calledMethodNames()) def _addRecords(self, identifiers, sets=None): for identifier in identifiers: self.oaiJazz.addOaiRecord(identifier=identifier, sets=sets, metadataFormats=[('oai_dc', '', '')])
class PleinTest(SeecrTestCase): def setUp(self): SeecrTestCase.setUp(self) self.storage = MultiSequentialStorage(join(self.tempdir, 'store'), name='storage') self.oaiJazz = OaiJazz(join(self.tempdir, 'oai'), name='oaiJazz') self.plein = self._newPlein() self.dna = be( (Observable(), (self.plein, (self.storage,), (self.oaiJazz,), ) )) def testAddInitialRecord(self): uri = "some:uri" rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://www.openarchives.org/OAI/2.0/"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title> <prov:wasDerivedFrom xmlns:prov="http://www.w3.org/ns/prov#"> <prov:Entity> <dcterms:source rdf:resource="http://first.example.org"/> </prov:Entity> </prov:wasDerivedFrom> </rdf:Description>""" % uri lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription)) consume(self.dna.all.add(identifier="identifier", lxmlNode=lxmlNode)) record = self.oaiJazz.getRecord(identifier=uri) expected = XML(lxmltostring(xpathFirst(lxmlNode, '//rdf:RDF'))) cleanup_namespaces(expected) self.assertXmlEquals(expected, self.storage.getData(identifier=record.identifier, name='rdf')) self.assertEquals(set(['rdf']), record.prefixes) self.assertEquals(set(), record.sets) self.plein.close() plein2 = self._newPlein() self.assertEquals(['some:uri'], [fragment.uri for fragment in plein2._fragmentsForRecord('identifier')]) def testAddWithIgnoredOtherKwarg(self): uri = "some:uri" rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://www.openarchives.org/OAI/2.0/"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title> <prov:wasDerivedFrom xmlns:prov="http://www.w3.org/ns/prov#"> <prov:Entity> <dcterms:source rdf:resource="http://first.example.org"/> </prov:Entity> </prov:wasDerivedFrom> </rdf:Description>""" % uri lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription)) consume(self.dna.all.add(identifier="identifier", lxmlNode=lxmlNode, otherKwarg='ignored')) record = self.oaiJazz.getRecord(identifier=uri) self.assertTrue(record, record) def testAddDescriptionsFor2DifferentUris(self): originalIdentifier='original:two_descriptions' lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/"> <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#"> <skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel> </skos:Concept> <rdf:Description rdf:about="http://example.com/first/uri"> <prov:wasDerivedFrom xmlns:prov="http://www.w3.org/ns/prov#"> <prov:Entity> <dcterms:source rdf:resource="http://first.example.org"/> </prov:Entity> </prov:wasDerivedFrom> </rdf:Description> <skos:Concept xmlns:skos="http://www.w3.org/2004/02/skos/core#" rdf:about="http://example.com/second/uri"> <skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel> </skos:Concept> <skos:Concept xmlns:skos="http://www.w3.org/2004/02/skos/core#" rdf:about="http://example.com/second/uri"> <skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel> </skos:Concept> <rdf:Description rdf:about="http://example.com/second/uri"> <prov:wasDerivedFrom xmlns:prov="http://www.w3.org/ns/prov#"> <prov:Entity> <dcterms:source>Second Source</dcterms:source> </prov:Entity> </prov:wasDerivedFrom> </rdf:Description> </rdf:RDF>""")) consume(self.dna.all.add(identifier=originalIdentifier, partname="ignored", lxmlNode=lxmlNode)) record1 = self.oaiJazz.getRecord('http://example.com/first/uri') data = self.storage.getData(identifier=record1.identifier, name='rdf') self.assertTrue('<dcterms:source rdf:resource="http://first.example.org"/>' in data, data) self.assertTrue('<skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel>' in data, data) record2 = self.oaiJazz.getRecord('http://example.com/second/uri') data = self.storage.getData(identifier=record2.identifier, name='rdf') self.assertEquals(1, data.count('<skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel>'), data) self.assertTrue('<dcterms:source>Second Source</dcterms:source>' in data, data) def testAddDescriptionsWithMultipleSameUris(self): lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/"> <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#"> <skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel> </skos:Concept> </rdf:RDF>""")) consume(self.dna.all.add(identifier='original:one_description', partname="ignored", lxmlNode=lxmlNode)) lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/"> <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#"> <skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel> </skos:Concept> </rdf:RDF>""")) consume(self.dna.all.add(identifier='original:two_description', partname="ignored", lxmlNode=lxmlNode)) record = self.oaiJazz.getRecord("http://example.com/first/uri") data = self.storage.getData(identifier=record.identifier, name='rdf') self.assertTrue('<skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel>' in data, data) self.assertTrue('<skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel>' in data, data) def testUpdateRecordWithDifferentFragments(self): uri = "uri:someuri" rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title> </rdf:Description>""" % uri lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription)) consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode)) record = self.oaiJazz.getRecord(uri) data = self.storage.getData(identifier=record.identifier, name='rdf') self.assertTrue('<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>' in data, data) # now add with new title rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">new title</dc:title> </rdf:Description>""" % uri lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription)) consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode)) record = self.oaiJazz.getRecord(uri) data = self.storage.getData(identifier=record.identifier, name='rdf') self.assertFalse('<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>' in data, data) self.assertTrue('<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">new title</dc:title>' in data, data) def testUpdateRecordShouldNotRemoveFragmentThatsInUseByOtherRecord(self): uri1 = "uri:someuri 1" uri2 = "uri:someuri 2" rdfDescription1 = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title> </rdf:Description>""" % uri1 lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription1)) consume(self.dna.all.add(identifier="identifier1", partname="ignored", lxmlNode=lxmlNode)) record1 = self.oaiJazz.getRecord(uri1) rdfDescription2 = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="nl">titel</dc:title> </rdf:Description>""" % uri2 lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s %s </rdf:RDF>""" % (rdfDescription1, rdfDescription2))) consume(self.dna.all.add(identifier="identifier2", partname="ignored", lxmlNode=lxmlNode)) record2 = self.oaiJazz.getRecord(uri2) self.assertEquals(['uri:someuri 1'], [fragment.uri for fragment in self.plein._fragmentsForRecord('identifier1')]) self.assertEquals(['uri:someuri 1', 'uri:someuri 2'], [fragment.uri for fragment in self.plein._fragmentsForRecord('identifier2')]) record = self.oaiJazz.getRecord(uri1) self.assertEquals(record1.stamp, record.stamp) lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription2)) consume(self.dna.all.add(identifier="identifier2", partname="ignored", lxmlNode=lxmlNode)) # nothing has changed from the OAI perspective record = self.oaiJazz.getRecord(uri1) self.assertFalse(record.isDeleted) self.assertEquals(record1.stamp, record.stamp) record = self.oaiJazz.getRecord(uri2) self.assertEquals(record2.stamp, record.stamp) self.plein.close() plein2 = self._newPlein() self.assertEquals(['uri:someuri 1'], [fragment.uri for fragment in plein2._fragmentsForRecord('identifier1')]) self.assertEquals(['uri:someuri 2'], [fragment.uri for fragment in plein2._fragmentsForRecord('identifier2')]) def testRecordUpdateThatOrphansFragmentCausesUriOaiUpdate(self): uri1 = "uri:someuri1" uri2 = "uri:someuri2" lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title> </rdf:Description> </rdf:RDF>""" % uri1)) consume(self.dna.all.add(identifier="identifier1", partname="ignored", lxmlNode=lxmlNode)) lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="nl">titel</dc:title> </rdf:Description> </rdf:RDF>""" % uri1)) consume(self.dna.all.add(identifier="identifier2", partname="ignored", lxmlNode=lxmlNode)) record1 = self.oaiJazz.getRecord(uri1) # now update record 'identifier1' with fragment for different uri lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">another title</dc:title> </rdf:Description> </rdf:RDF>""" % uri2)) consume(self.dna.all.add(identifier="identifier1", partname="ignored", lxmlNode=lxmlNode)) record = self.oaiJazz.getRecord(uri1) self.assertNotEquals(record1.stamp, record.stamp) self.assertEquals(['uri:someuri2'], [fragment.uri for fragment in self.plein._fragmentsForRecord('identifier1')]) self.assertEquals(['uri:someuri1'], [fragment.uri for fragment in self.plein._fragmentsForRecord('identifier2')]) def testUpdateRecordThatOrphansUriCausesUriDelete(self): uri1 = "uri:someuri1" rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title> </rdf:Description>""" % uri1 lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription)) consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode)) record1 = self.oaiJazz.getRecord(uri1) self.assertFalse(record1.isDeleted) # now add with different uri uri2 = "uri:someuri2" rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">new title</dc:title> </rdf:Description>""" % uri2 lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription)) consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode)) record1 = self.oaiJazz.getRecord(uri1) self.assertTrue(record1.isDeleted) def testSpecialCharacterInUri(self): uri = "some:Baháma's:|have pipes ( | ) and spaces " rdfDescription1 = """<rdf:Description xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" rdf:about="%s"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="nl">titel</dc:title> </rdf:Description>""" % uri lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription1)) consume(self.dna.all.add(identifier=unicode(uri), partname="ignored", lxmlNode=lxmlNode)) record = self.oaiJazz.getRecord(identifier=unicode(uri)) data = self.storage.getData(identifier=record.identifier, name='rdf') self.assertTrue(uri in data, data) consume(self.dna.all.delete(identifier=unicode(uri))) record = self.oaiJazz.getRecord(identifier=unicode(uri)) self.assertTrue(record.isDeleted) def testDeleteUnseenRecord(self): try: consume(self.dna.all.delete(identifier="identifier")) except: # The above delete should just be silently ignored and not raise an exception # (as it did on some point). self.fail() def testDeleteRecordWithUniqueFragment(self): uri = "uri:someuri" rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title> </rdf:Description>""" % uri lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">%s</rdf:RDF>""" % rdfDescription)) consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode)) consume(self.dna.all.delete(identifier="identifier")) record = self.oaiJazz.getRecord(uri) self.assertTrue(record.isDeleted) def testDeleteRecordWithNotSoUniqueFragment(self): uri1 = "uri:someuri1" uri2 = "uri:someuri2" rdfDescription1 = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title> </rdf:Description>""" % uri1 lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription1)) consume(self.dna.all.add(identifier="identifier1", partname="ignored", lxmlNode=lxmlNode)) rdfDescription2 = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="nl">titel</dc:title> </rdf:Description>""" % uri2 lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s %s </rdf:RDF>""" % (rdfDescription1, rdfDescription2))) consume(self.dna.all.add(identifier="identifier2", partname="ignored", lxmlNode=lxmlNode)) consume(self.dna.all.delete(identifier="identifier2")) record = self.oaiJazz.getRecord(uri1) self.assertFalse(record.isDeleted) record = self.oaiJazz.getRecord(uri2) self.assertTrue(record.isDeleted) def testAddTwoRecordsWithSameUriAndDeleteLast(self): uri = "uri:someuri" rdfNode, description = createRdfNode(uri) createSubElement(description, "dc:title", text='One') consume(self.dna.all.add(identifier="identifier1", partname="ignored", lxmlNode=rdfNode.getroot())) rdfNode, description = createRdfNode(uri) createSubElement(description, "dc:title", text='Two') consume(self.dna.all.add(identifier="identifier2", partname="ignored", lxmlNode=rdfNode.getroot())) consume(self.dna.all.delete(identifier="identifier2")) record = self.oaiJazz.getRecord(identifier=uri) self.assertEquals(['One'], xpath(XML(self.storage.getData(identifier=record.identifier, name='rdf')), '/rdf:RDF/rdf:Description/dc:title/text()')) def testAddDeleteAddForSameUri(self): uri1 = "uri:someuri1" rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title> </rdf:Description>""" % uri1 lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription)) consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode)) record1 = self.oaiJazz.getRecord(uri1) self.assertFalse(record1.isDeleted) consume(self.dna.all.delete(identifier="identifier")) record1 = self.oaiJazz.getRecord(uri1) self.assertTrue(record1.isDeleted) # a previous bug caused the following to raise an Exception consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode)) record1 = self.oaiJazz.getRecord(uri1) self.assertFalse(record1.isDeleted) def testPossibleShutdownAtWrongTime(self): # We suspect a bad shutdown could have cause a difference between keyvaluestore and the data. uri1 = "uri:someuri1" rdfFillTitle = """<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"><rdf:Description rdf:about="%s" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">%%s</dc:title> </rdf:Description></rdf:RDF>""" % uri1 consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=parse(StringIO(rdfFillTitle % 'title')))) record1 = self.storage.getData(identifier=uri1, name='rdf') self.assertEquals('title', xpathFirst(XML(record1), '/rdf:RDF/rdf:Description/dc:title/text()')) # HACK the data in storage, which could have happened if shutdown while adding. self.storage.addData(identifier=uri1, name='rdf', data=rdfFillTitle % 'other title') # Service is shutdown after adding the uri to the storage, but just before registring the fragmentHashes in the key value store # The next call caused a KeyError while removing old fragmentHashes. with stderr_replaced(): consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=parse(StringIO(rdfFillTitle % 'other title')))) record1 = self.storage.getData(identifier=uri1, name='rdf') self.assertEquals('other title', xpathFirst(XML(record1), '/rdf:RDF/rdf:Description/dc:title/text()')) def testSetSpec(self): rdfNode, description = createRdfNode('uri:some') consume(self.dna.all.add(identifier='identifier', partname='ignored', lxmlNode=rdfNode, oaiArgs={'sets': [('first:example', 'set first:example')]})) self.assertEquals(set(['first', 'first:example']), self.oaiJazz.getAllSets()) def testBackwardsCompatiblePlein(self): uri = "http://data.bibliotheek.nl/CDR/JK115700" rdfNode, description = createRdfNode(uri) self.plein._fragmentAdmin['identifier'] = 'ae5ac42b162064df2cd4ef411b42325b51f91206|%s' % uri with stdout_replaced(): consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=rdfNode)) def testBackwardsCompatiblePleinSpaces(self): uri = "http://data.bibliotheek.nl/CDR/J K11 5700" rdfNode, description = createRdfNode(uri) self.plein._fragmentAdmin['identifier'] = 'ae5ac42b162064df2cd4ef411b42325b51f91206|%s' % uri with stdout_replaced(): consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=rdfNode)) def testFixEncodedFragments(self): from meresco.rdf.plein import fixEncodedFragments, _Fragment ahash = 'ae5ac42b162064df2cd4ef411b42325b51f91206' uri1 = "http://data.bibliotheek.nl/CDR/J K11 5700" uri2 = "http://data.bibliotheek.nl/CDR/J K11 5701" data = '{0}|{1} {2}'.format(ahash, uri1, _Fragment(uri=uri2, hash=ahash).asEncodedString()) result = fixEncodedFragments(data) self.assertFalse('|' in result) fragments = [_Fragment.fromEncodedString(s) for s in result.split(' ')] self.assertEquals([uri1, uri2], [f.uri for f in fragments]) def testFixEncodedFragmentsWithPipes(self): from meresco.rdf.plein import fixEncodedFragments, _Fragment uri = "http://data.bibliotheek.nl/gids/film/Cultureel_festijn_'de_Franse_maand'_Ernest_en_Celestine_(Brammert_en_Tissie)_|_film_6+" ahash = 'ae5ac42b162064df2cd4ef411b42325b51f91206' data = '{0}|{1}'.format(ahash, uri) result = fixEncodedFragments(data) self.assertFalse('|' in result) fragments = [_Fragment.fromEncodedString(s) for s in result.split(' ')] self.assertEquals([uri], [f.uri for f in fragments]) def testFixEncodedFragmentsWithSpacesAndPipes(self): from meresco.rdf.plein import fixEncodedFragments, _Fragment uri = "http://data.bibliotheek.nl/gids/film/Cultureel festijn 'de Franse maand' Ernest en Celestine (Brammert en Tissie) | film 6+" ahash = 'ae5ac42b162064df2cd4ef411b42325b51f91206' data = '{0}|{1}'.format(ahash, uri) result = fixEncodedFragments(data) self.assertFalse('|' in result) fragments = [_Fragment.fromEncodedString(s) for s in result.split(' ')] self.assertEquals([uri], [f.uri for f in fragments]) def testFixEncodedFragmentsAllOfTheAbove(self): from meresco.rdf.plein import fixEncodedFragments, _Fragment ahash = 'ae5ac42b162064df2cd4ef411b42325b51f91206' uri1 = "http://data.bibliotheek.nl/CDR/J K11 5701" uri2 = "http://data.bibliotheek.nl/CDR/J K11 5702" uri3 = "http://data.bibliotheek.nl/CDR/J K| 11 57|03" uri4 = "http://data.bibliotheek.nl/CDR/J K11 5704" data = '{ahash}|{uri1} {fragment2} {ahash}|{uri3} {fragment4}'.format( fragment2=_Fragment(uri=uri2, hash=ahash).asEncodedString(), fragment4=_Fragment(uri=uri4, hash=ahash).asEncodedString(), **locals()) result = fixEncodedFragments(data) self.assertFalse('|' in result) fragments = [_Fragment.fromEncodedString(s) for s in result.split(' ')] self.assertEquals([uri1, uri2, uri3, uri4], [f.uri for f in fragments]) def testAddDeleteAddForSameUriDifferentIdentifier(self): lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/"> <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#"> <skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel> </skos:Concept> </rdf:RDF>""")) consume(self.dna.all.add(identifier='original:one_description', partname="ignored", lxmlNode=lxmlNode)) consume(self.dna.all.delete(identifier='original:one_description')) self.assertRaises(KeyError, lambda: self.storage.getData(identifier="http://example.com/first/uri", name='rdf')) lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/"> <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#"> <skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel> </skos:Concept> </rdf:RDF>""")) consume(self.dna.all.add(identifier='original:two_description', partname="ignored", lxmlNode=lxmlNode)) record = self.oaiJazz.getRecord("http://example.com/first/uri") self.assertEquals("http://example.com/first/uri", record.identifier) data = self.storage.getData(identifier=record.identifier, name='rdf') self.assertFalse('<skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel>' in data, data) self.assertTrue('<skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel>' in data, data) def testReificationStatementGoesWithSubjectUri(self): lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/"> <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#"> <skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel> </skos:Concept> <rdf:Statement> <rdf:subject rdf:resource="http://example.com/first/uri"/> </rdf:Statement> </rdf:RDF>""")) consume(self.dna.all.add(identifier='original:uno', partname="ignored", lxmlNode=lxmlNode)) record = self.oaiJazz.getRecord("http://example.com/first/uri") self.assertEquals("http://example.com/first/uri", record.identifier) data = self.storage.getData(identifier=record.identifier, name='rdf') self.assertTrue('<rdf:subject rdf:resource="http://example.com/first/uri"/>' in data, data) def testCommit(self): self.plein.commit() # No way to assert anything other than that the method exists. def _newPlein(self, storageLabel="storage", oaiAddRecordLabel="oaiJazz"): return Plein(directory=self.tempdir, storageLabel=storageLabel, oaiAddRecordLabel=oaiAddRecordLabel, rdfxsdUrl='http://example.org/rdf.xsd')