def ebook_pid_minter(record_uuid, data, source): """Mint record identifiers. This is a minter specific for ebooks. With the help of :class:`rero_ebooks.providers.EbookPidProvider`, it creates the PID instance with `rec` as predefined `object_type`. Procedure followed: (we will use `control_number` as value of `PIDSTORE_RECID_FIELD` for the simplicity of the documentation.) #. If a `pid` field is already there, a `AssertionError` exception is raised. #. The provider is initialized with the help of :class:`rero_ebooks.providers.EbookPidProvider`. It's called with default value 'rec' for `object_type` and `record_uuid` variable for `object_uuid`. #. The new `id_value` is stored inside `data` as `pid` field. :param record_uuid: The record UUID. :param data: The record metadata. :returns: A fresh `invenio_pidstore.models.PersistentIdentifier` instance. """ pid_field = current_app.config['PIDSTORE_RECID_FIELD'] assert pid_field not in data pid_value = build_ebook_pid(data, source) provider = EbookPidProvider.create(object_type='rec', pid_value=pid_value, object_uuid=record_uuid) data[pid_field] = pid_value oaiid_minter(record_uuid, data) return provider.pid
def load_records(app, filename, schema, tries=5): """Try to index records.""" indexer = RecordIndexer() records = [] with app.app_context(): with mock.patch("invenio_records.api.Record.validate", return_value=None): data_filename = pkg_resources.resource_filename( "invenio_records", filename) records_data = load(data_filename) with db.session.begin_nested(): for item in records_data: record_id = uuid.uuid4() item_dict = dict(marc21.do(item)) item_dict["$schema"] = schema recid_minter(record_id, item_dict) oaiid_minter(record_id, item_dict) record = current_oaiserver.record_cls.create(item_dict, id_=record_id) indexer.index(record) records.append(record.id) db.session.commit() # Wait for indexer to finish for i in range(tries): response = current_search_client.search() if response["hits"]["total"] >= len(records): break current_search.flush_and_refresh("_all") return records
def load_oaiid(uuid): """Mint OAI ID information for the record. :type uuid: str """ rec = Record.get_record(uuid) recid = str(rec['recid']) pid_value = current_app.config['OAISERVER_ID_PREFIX'] + recid try: pid = PersistentIdentifier.query.filter_by(pid_value=pid_value).one() if str(pid.get_assigned_object()) == uuid: rec.setdefault('_oai', {}) rec['_oai']['id'] = pid.pid_value rec.commit() db.session.commit() logger.info('Matching OAI PID ({pid}) for {id}'.format(pid=pid, id=uuid)) else: logger.exception('OAI PID ({pid}) for record {id} ({recid}) is ' 'pointing to a different object ({id2})'.format( pid=pid, id=uuid, id2=str(pid.get_assigned_object()), recid=recid)) except NoResultFound: oaiid_minter(rec.id, rec) rec.commit() db.session.commit() except MultipleResultsFound: logger.exception('Multiple OAI PIDs found for record {id} ' '({recid})'.format(id=uuid, recid=recid))
def add_oai_information(obj, eng): """Adds OAI information like identifier""" recid = obj.data['control_number'] pid = PersistentIdentifier.get('recid', recid) existing_record = Record.get_record(pid.object_uuid) if '_oai' not in existing_record: try: oaiid_minter(pid.object_uuid, existing_record) except PIDAlreadyExists: oai_prefix = current_app.config.get('OAISERVER_ID_PREFIX') existing_record['_oai'] = { 'id': '%s:%s' % (oai_prefix, recid), 'sets': _get_oai_sets(existing_record) } if 'id' not in existing_record['_oai']: current_app.logger.info('adding new oai id') oaiid_minter(pid.object_uuid, existing_record) if 'sets' not in existing_record[ '_oai'] or not existing_record['_oai']['sets']: existing_record['_oai']['sets'] = _get_oai_sets(existing_record) existing_record['_oai']['updated'] = datetime.utcnow().strftime( '%Y-%m-%dT%H:%M:%SZ') existing_record.commit() obj.save() db.session.commit()
def load_records(app, filename, schema, tries=5): """Try to index records.""" indexer = RecordIndexer() records = [] with app.app_context(): with mock.patch('invenio_records.api.Record.validate', return_value=None): data_filename = pkg_resources.resource_filename( 'invenio_records', filename) records_data = load(data_filename) with db.session.begin_nested(): for item in records_data: record_id = uuid.uuid4() item_dict = dict(marc21.do(item)) item_dict['$schema'] = schema recid_minter(record_id, item_dict) oaiid_minter(record_id, item_dict) record = Record.create(item_dict, id_=record_id) indexer.index(record) records.append(record.id) db.session.commit() # Wait for indexer to finish for i in range(tries): response = current_search_client.search() if response['hits']['total'] >= len(records): break current_search.flush_and_refresh('_all') return records
def load_records(app, filename, schema, tries=5): """Try to index records.""" indexer = RecordIndexer() records = [] with app.app_context(): with mock.patch('invenio_records.api.Record.validate', return_value=None): data_filename = pkg_resources.resource_filename( 'invenio_records', filename) records_data = load(data_filename) with db.session.begin_nested(): for item in records_data: record_id = uuid.uuid4() item_dict = dict(marc21.do(item)) item_dict['$schema'] = schema recid_minter(record_id, item_dict) oaiid_minter(record_id, item_dict) record = Record.create(item_dict, id_=record_id) indexer.index(record) records.append(record.id) db.session.commit() # Wait for indexer to finish for i in range(tries): response = current_search_client.search() if response['hits']['total'] >= len(records): break sleep(5) return records
def oaiserver(number): """Initialize OAI-PMH server.""" from invenio_db import db from invenio_oaiserver.models import OAISet from invenio_records.api import Record # create a OAI Set with db.session.begin_nested(): for i in range(number): db.session.add(OAISet( spec='test{0}'.format(i), name='Test{0}'.format(i), description='test desc {0}'.format(i), search_pattern='title:Test{0}'.format(i), )) # create a record schema = { 'type': 'object', 'properties': { 'title': {'type': 'string'}, 'field': {'type': 'boolean'}, }, 'required': ['title'], } with db.session.begin_nested(): for i in range(number): record_id = uuid.uuid4() data = {'title': 'Test{0}'.format(i), '$schema': schema} recid_minter(record_id, data) oaiid_minter(record_id, data) Record.create(data, id_=record_id) db.session.commit()
def load_oaiid(uuid): """Mint OAI ID information for the record. :type uuid: str """ rec = Record.get_record(uuid) recid = str(rec['recid']) pid_value = current_app.config['OAISERVER_ID_PREFIX'] + recid try: pid = PersistentIdentifier.query.filter_by(pid_value=pid_value).one() if str(pid.get_assigned_object()) == uuid: rec.setdefault('_oai', {}) rec['_oai']['id'] = pid.pid_value rec.commit() db.session.commit() logger.info('Matching OAI PID ({pid}) for {id}'.format( pid=pid, id=uuid)) else: logger.exception( 'OAI PID ({pid}) for record {id} ({recid}) is ' 'pointing to a different object ({id2})'.format( pid=pid, id=uuid, id2=str(pid.get_assigned_object()), recid=recid)) except NoResultFound: oaiid_minter(rec.id, rec) rec.commit() db.session.commit() except MultipleResultsFound: logger.exception( 'Multiple OAI PIDs found for record {id} ' '({recid})'.format(id=uuid, recid=recid))
def add_oai_information(obj, eng): """Adds OAI information like identifier""" recid = obj.data['control_number'] pid = PersistentIdentifier.get('recid', recid) existing_record = Record.get_record(pid.object_uuid) if '_oai' not in existing_record: try: oaiid_minter(pid.object_uuid, existing_record) except PIDAlreadyExists: existing_record['_oai'] = { 'id': 'oai:beta.scoap3.org:%s' % recid, 'sets': _get_oai_sets(existing_record) } if 'id' not in existing_record['_oai']: current_app.logger.info('adding new oai id') oaiid_minter(pid.object_uuid, existing_record) if 'sets' not in existing_record['_oai'] or not existing_record['_oai']['sets']: existing_record['_oai']['sets'] = _get_oai_sets(existing_record) existing_record['_oai']['updated'] = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ') existing_record.commit() obj.save() db.session.commit()
def create_record(item_dict): """Create test record.""" record_id = uuid.uuid4() recid_minter(record_id, item_dict) oaiid_minter(record_id, item_dict) record = Record.create(item_dict, id_=record_id) indexer.index(record) return record
def test_listrecords(app): """Test ListRecords.""" schema = { 'type': 'object', 'properties': { 'title': { 'type': 'string' }, 'field': { 'type': 'boolean' }, }, 'required': ['title'], } with app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): record_id = uuid.uuid4() data = {'title': 'Test0', '$schema': schema} recid_minter(record_id, data) oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) db.session.commit() indexer.index_by_id(record_id) sleep(2) with app.test_client() as c: result = c.get('/oai2d?verb=ListRecords&metadataPrefix=oai_dc') tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header', namespaces=NAMESPACES)) == 1 assert len( tree.xpath( '/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:identifier', namespaces=NAMESPACES)) == 1 assert len( tree.xpath( '/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:datestamp', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:metadata', namespaces=NAMESPACES)) == 1
def create_record(item_dict, mint_oaiid=True): """Create test record.""" with app.test_request_context(): record_id = uuid.uuid4() recid_minter(record_id, item_dict) if mint_oaiid: oaiid_minter(record_id, item_dict) record = Record.create(item_dict, id_=record_id) indexer.index(record) return record
def oaiserver(sets, records): """Initialize OAI-PMH server.""" from invenio_db import db from invenio_oaiserver.models import OAISet from invenio_records.api import Record # create a OAI Set with db.session.begin_nested(): for i in range(sets): db.session.add( OAISet( spec='test{0}'.format(i), name='Test{0}'.format(i), description='test desc {0}'.format(i), search_pattern='title_statement.title:Test{0}'.format(i), )) # create a record schema = { 'type': 'object', 'properties': { 'title_statement': { 'type': 'object', 'properties': { 'title': { 'type': 'string', }, }, }, 'field': { 'type': 'boolean' }, }, } search.client.indices.delete_alias('_all', '_all', ignore=[400, 404]) search.client.indices.delete('*') with app.app_context(): indexer = RecordIndexer() with db.session.begin_nested(): for i in range(records): record_id = uuid.uuid4() data = { 'title_statement': { 'title': 'Test{0}'.format(i) }, '$schema': schema, } recid_minter(record_id, data) oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) indexer.index(record) db.session.commit()
def create_record(app, item_dict, mint_oaiid=True): """Create test record.""" indexer = RecordIndexer() with app.test_request_context(): record_id = uuid.uuid4() recid_minter(record_id, item_dict) if mint_oaiid: oaiid_minter(record_id, item_dict) record = Record.create(item_dict, id_=record_id) indexer.index(record) return record
def oaiserver(sets, records): """Initialize OAI-PMH server.""" from invenio_db import db from invenio_oaiserver.models import OAISet from invenio_records.api import Record # create a OAI Set with db.session.begin_nested(): for i in range(sets): db.session.add(OAISet( spec='test{0}'.format(i), name='Test{0}'.format(i), description='test desc {0}'.format(i), search_pattern='title_statement.title:Test{0}'.format(i), )) # create a record schema = { 'type': 'object', 'properties': { 'title_statement': { 'type': 'object', 'properties': { 'title': { 'type': 'string', }, }, }, 'field': {'type': 'boolean'}, }, } search.client.indices.delete_alias('_all', '_all', ignore=[400, 404]) search.client.indices.delete('*') with app.app_context(): indexer = RecordIndexer() with db.session.begin_nested(): for i in range(records): record_id = uuid.uuid4() data = { 'title_statement': {'title': 'Test{0}'.format(i)}, '$schema': schema, } recid_minter(record_id, data) oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) indexer.index(record) db.session.commit()
def zenodo_record_minter(record_uuid, data): """Mint record identifier (and DOI).""" if 'recid' in data: recid = PersistentIdentifier.get('recid', data['recid']) recid.assign('rec', record_uuid) recid.register() else: recid = RecordIdProvider.create(object_type='rec', object_uuid=record_uuid).pid data['recid'] = int(recid.pid_value) zenodo_doi_minter(record_uuid, data) oaiid_minter(record_uuid, data) return recid
def test_listmetadataformats_record(app): """Test ListMetadataFormats for a record.""" schema = { 'type': 'object', 'properties': { 'title': {'type': 'string'}, 'field': {'type': 'boolean'}, }, 'required': ['title'], } with app.test_request_context(): with db.session.begin_nested(): record_id = uuid.uuid4() data = {'title': 'Test0', '$schema': schema} recid_minter(record_id, data) pid = oaiid_minter(record_id, data) Record.create(data, id_=record_id) pid_value = pid.pid_value db.session.commit() _listmetadataformats( app=app, query='/oai2d?verb=ListMetadataFormats&identifier={0}'.format( pid_value))
def test_listmetadataformats_record(app): """Test ListMetadataFormats for a record.""" schema = { 'type': 'object', 'properties': { 'title': { 'type': 'string' }, 'field': { 'type': 'boolean' }, }, 'required': ['title'], } with app.test_request_context(): with db.session.begin_nested(): record_id = uuid.uuid4() data = {'title': 'Test0', '$schema': schema} recid_minter(record_id, data) pid = oaiid_minter(record_id, data) Record.create(data, id_=record_id) pid_value = pid.pid_value db.session.commit() _listmetadataformats( app=app, query='/oai2d?verb=ListMetadataFormats&identifier={0}'.format( pid_value))
def test_getrecord(app): schema = { "type": "object", "properties": {"title": {"type": "string"}, "field": {"type": "boolean"}}, "required": ["title"], } with app.test_request_context(): with db.session.begin_nested(): record = Record.create({"title": "Test0", "$schema": schema}).model recid_minter(record.id, record.json) pid = oaiid_minter(record.id, record.json) db.session.commit() pid_value = pid.pid_value pid_updated = pid.updated with app.test_client() as c: result = c.get("/oai2d?verb=GetRecord&identifier={0}&metadataPrefix=oai_dc".format(pid_value)) assert 200 == result.status_code tree = etree.fromstring(result.data) namespaces = {"x": NS_OAIPMH} assert len(tree.xpath("/x:OAI-PMH", namespaces=namespaces)) == 1 assert len(tree.xpath("/x:OAI-PMH/x:GetRecord", namespaces=namespaces)) == 1 assert len(tree.xpath("/x:OAI-PMH/x:GetRecord/x:header", namespaces=namespaces)) == 1 assert len(tree.xpath("/x:OAI-PMH/x:GetRecord/x:header/x:identifier", namespaces=namespaces)) == 1 identifier = tree.xpath("/x:OAI-PMH/x:GetRecord/x:header/x:identifier/text()", namespaces=namespaces) assert identifier == [str(record.id)] datestamp = tree.xpath("/x:OAI-PMH/x:GetRecord/x:header/x:datestamp/text()", namespaces=namespaces) assert datestamp == [datetime_to_datestamp(pid_updated)] assert len(tree.xpath("/x:OAI-PMH/x:GetRecord/x:metadata", namespaces=namespaces)) == 1
def pids(): """Fetch and register PIDs.""" from invenio_db import db from invenio_oaiserver.fetchers import onaiid_fetcher from invenio_oaiserver.minters import oaiid_minter from invenio_pidstore.errors import PIDDoesNotExistError from invenio_pidstore.models import PIDStatus, PersistentIdentifier from invenio_pidstore.fetchers import recid_fetcher from invenio_records.models import RecordMetadata recids = [r.id for r in RecordMetadata.query.all()] db.session.expunge_all() with click.progressbar(recids) as bar: for record_id in bar: record = RecordMetadata.query.get(record_id) try: pid = recid_fetcher(record.id, record.json) found = PersistentIdentifier.get( pid_type=pid.pid_type, pid_value=pid.pid_value, pid_provider=pid.provider.pid_provider) click.echo('Found {0}.'.format(found)) except PIDDoesNotExistError: db.session.add( PersistentIdentifier.create(pid.pid_type, pid.pid_value, object_type='rec', object_uuid=record.id, status=PIDStatus.REGISTERED)) except KeyError: click.echo('Skiped: {0}'.format(record.id)) continue pid_value = record.json.get('_oai', {}).get('id') if pid_value is None: assert 'control_number' in record.json pid_value = current_app.config.get( 'OAISERVER_ID_PREFIX') + str(record.json['control_number']) record.json.setdefault('_oai', {}) record.json['_oai']['id'] = pid.pid_value pid = oaiid_fetcher(record.id, record.json) try: found = PersistentIdentifier.get( pid_type=pid.pid_type, pid_value=pid.pid_value, pid_provider=pid.provider.pid_provider) click.echo('Found {0}.'.format(found)) except PIDDoesNotExistError: pid = oaiid_minter(record.id, record.json) db.session.add(pid) flag_modified(record, 'json') assert record.json['_oai']['id'] db.session.add(record) db.session.commit() db.session.expunge_all()
def test_listrecords(app): """Test ListRecords.""" schema = { 'type': 'object', 'properties': { 'title': {'type': 'string'}, 'field': {'type': 'boolean'}, }, 'required': ['title'], } with app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): record_id = uuid.uuid4() data = {'title': 'Test0', '$schema': schema} recid_minter(record_id, data) oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) db.session.commit() indexer.index_by_id(record_id) sleep(2) with app.test_client() as c: result = c.get('/oai2d?verb=ListRecords&metadataPrefix=oai_dc') tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:identifier', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:datestamp', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:metadata', namespaces=NAMESPACES)) == 1
def id_minter(record_uuid, data, provider, pid_key='pid', object_type='rec'): """Document PID minter.""" # Create persistent identifier provider = provider.create(object_type=object_type, object_uuid=record_uuid, pid_value=data.get(pid_key)) pid = provider.pid data[pid_key] = pid.pid_value # Mandatory to check if PID for OAI exists, as the minter is called twice # during API calls.. try: oai_pid_value = current_app.config.get('OAISERVER_ID_PREFIX', '') + str(pid.pid_value) OAIIDProvider.get(oai_pid_value, 'oai') except PIDDoesNotExistError: oaiid_minter(record_uuid, data) return pid
def oaiserver(number): """Initialize OAI-PMH server.""" from invenio_db import db from invenio_oaiserver.models import OAISet from invenio_records.api import Record # create a OAI Set with db.session.begin_nested(): for i in range(number): db.session.add( OAISet( spec='test{0}'.format(i), name='Test{0}'.format(i), description='test desc {0}'.format(i), search_pattern='title:Test{0}'.format(i), )) # create a record schema = { 'type': 'object', 'properties': { 'title': { 'type': 'string' }, 'field': { 'type': 'boolean' }, }, 'required': ['title'], } with db.session.begin_nested(): for i in range(number): record_id = uuid.uuid4() data = {'title': 'Test{0}'.format(i), '$schema': schema} recid_minter(record_id, data) oaiid_minter(record_id, data) Record.create(data, id_=record_id) db.session.commit()
def test_listidentifiers(app): """Test verb ListIdentifiers.""" schema = { 'type': 'object', 'properties': { 'title': {'type': 'string'}, 'field': {'type': 'boolean'}, }, 'required': ['title'], } with app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): record_id = uuid.uuid4() data = {'title': 'Test0', '$schema': schema} recid_minter(record_id, data) pid = oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) db.session.commit() indexer.index_by_id(record_id) sleep(1) pid_value = pid.pid_value with app.test_client() as c: result = c.get( "/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc" ) tree = etree.fromstring(result.data) namespaces = {'x': NS_OAIPMH} assert len(tree.xpath('/x:OAI-PMH', namespaces=namespaces)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListIdentifiers', namespaces=namespaces)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListIdentifiers/x:header', namespaces=namespaces)) == 1 identifier = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:identifier', namespaces=namespaces ) assert len(identifier) == 1 assert identifier[0].text == str(pid_value) datestamp = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:datestamp', namespaces=namespaces ) assert len(datestamp) == 1 assert datestamp[0].text == datetime_to_datestamp(record.updated)
def test_getrecord(app): schema = { 'type': 'object', 'properties': { 'title': { 'type': 'string' }, 'field': { 'type': 'boolean' }, }, 'required': ['title'], } with app.test_request_context(): with db.session.begin_nested(): record = Record.create({'title': 'Test0', '$schema': schema}).model recid_minter(record.id, record.json) pid = oaiid_minter(record.id, record.json) db.session.commit() pid_value = pid.pid_value record_updated = record.updated with app.test_client() as c: result = c.get( '/oai2d?verb=GetRecord&identifier={0}&metadataPrefix=oai_dc'. format(pid_value)) assert 200 == result.status_code tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:GetRecord', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:GetRecord/x:header', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:GetRecord/x:header/x:identifier', namespaces=NAMESPACES)) == 1 identifier = tree.xpath( '/x:OAI-PMH/x:GetRecord/x:header/x:identifier/text()', namespaces=NAMESPACES) assert identifier == [str(record.id)] datestamp = tree.xpath( '/x:OAI-PMH/x:GetRecord/x:header/x:datestamp/text()', namespaces=NAMESPACES) assert datestamp == [datetime_to_datestamp(record_updated)] assert len( tree.xpath('/x:OAI-PMH/x:GetRecord/x:metadata', namespaces=NAMESPACES)) == 1
def test_getrecord(app): """Test get record verb.""" with app.test_request_context(): pid_value = 'oai:legacy:1' with db.session.begin_nested(): record_id = uuid.uuid4() data = { '_oai': { 'id': pid_value }, 'title_statement': { 'title': 'Test0' }, } pid = oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) db.session.commit() assert pid_value == pid.pid_value record_updated = record.updated with app.test_client() as c: result = c.get( '/oai2d?verb=GetRecord&identifier={0}&metadataPrefix=oai_dc'. format(pid_value)) assert 200 == result.status_code tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:GetRecord', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:GetRecord/x:record/x:header', namespaces=NAMESPACES)) == 1 assert len( tree.xpath( '/x:OAI-PMH/x:GetRecord/x:record/x:header/x:identifier', namespaces=NAMESPACES)) == 1 identifier = tree.xpath( '/x:OAI-PMH/x:GetRecord/x:record/x:header/x:identifier/text()', namespaces=NAMESPACES) assert identifier == [pid_value] datestamp = tree.xpath( '/x:OAI-PMH/x:GetRecord/x:record/x:header/x:datestamp/text()', namespaces=NAMESPACES) assert datestamp == [datetime_to_datestamp(record_updated)] assert len( tree.xpath('/x:OAI-PMH/x:GetRecord/x:record/x:metadata', namespaces=NAMESPACES)) == 1
def test_listrecords(app): """Test ListRecords.""" with app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): record_id = uuid.uuid4() data = {'title_statement': {'title': 'Test0'}} recid_minter(record_id, data) oaiid_minter(record_id, data) Record.create(data, id_=record_id) db.session.commit() indexer.index_by_id(record_id) sleep(2) with app.test_client() as c: result = c.get('/oai2d?verb=ListRecords&metadataPrefix=oai_dc') tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:identifier', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:datestamp', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:metadata', namespaces=NAMESPACES)) == 1
def sync_record_oai(uuid, cache=None): """Mint OAI ID information for the record. :type uuid: str """ rec = Record.get_record(uuid) recid_s = str(rec['recid']) # Try to get the already existing OAI PID for this record oai_pid_q = PersistentIdentifier.query.filter_by(pid_type='oai', object_uuid=rec.id) if oai_pid_q.count() == 0: pid = oaiid_minter(rec.id, rec) synced_sets = get_synced_sets(rec, cache=cache) rec['_oai']['sets'] = synced_sets rec.commit() db.session.commit() RecordIndexer().bulk_index([ str(rec.id), ]) logger.info('Minted new OAI PID ({pid}) for record {id}'.format( pid=pid, id=uuid)) elif oai_pid_q.count() == 1: pid = oai_pid_q.one() managed_prefixes = current_app.config['OAISERVER_MANAGED_ID_PREFIXES'] if not any(pid.pid_value.startswith(p) for p in managed_prefixes): logger.exception('Unknown OAIID prefix: {0}'.format(pid.pid_value)) elif str(pid.get_assigned_object()) != uuid: logger.exception('OAI PID ({pid}) for record {id} ({recid}) is ' 'pointing to a different object ({id2})'.format( pid=pid, id=uuid, id2=str(pid.get_assigned_object()), recid=recid_s)) elif requires_sync(rec, cache=cache): rec.setdefault('_oai', {}) rec['_oai']['id'] = pid.pid_value rec['_oai']['updated'] = datetime_to_datestamp(datetime.utcnow()) synced_sets = get_synced_sets(rec, cache=cache) rec['_oai']['sets'] = synced_sets if not rec['_oai']['sets']: del rec['_oai']['sets'] # Don't store empty list rec.commit() db.session.commit() RecordIndexer().bulk_index([ str(rec.id), ]) logger.info('Matching OAI PID ({pid}) for record {id}'.format( pid=pid, id=uuid))
def zenodo_record_minter(record_uuid, data): """Zenodo record minter. Mint, or register if previously minted, the Concept RECID and RECID. Mint the Concept DOI and DOI. """ if 'conceptrecid' not in data: zenodo_concept_recid_minter(record_uuid, data) if 'recid' in data: recid = PersistentIdentifier.get('recid', data['recid']) recid.assign('rec', record_uuid) recid.register() else: recid = RecordIdProvider.create( object_type='rec', object_uuid=record_uuid).pid data['recid'] = int(recid.pid_value) zenodo_doi_minter(record_uuid, data) oaiid_minter(record_uuid, data) if 'conceptdoi' not in data: zenodo_concept_doi_minter(record_uuid, data) return recid
def zenodo_record_minter(record_uuid, data): """Zenodo record minter. Mint, or register if previously minted, the Concept RECID and RECID. Mint the Concept DOI and DOI. """ if 'conceptrecid' not in data: zenodo_concept_recid_minter(record_uuid, data) if 'recid' in data: recid = PersistentIdentifier.get('recid', data['recid']) recid.assign('rec', record_uuid) recid.register() else: recid = RecordIdProvider.create(object_type='rec', object_uuid=record_uuid).pid data['recid'] = int(recid.pid_value) zenodo_doi_minter(record_uuid, data) oaiid_minter(record_uuid, data) if 'conceptdoi' not in data: zenodo_concept_doi_minter(record_uuid, data) return recid
def test_listrecords(app): """Test ListRecords.""" schema = { "type": "object", "properties": {"title": {"type": "string"}, "field": {"type": "boolean"}}, "required": ["title"], } with app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): record_id = uuid.uuid4() data = {"title": "Test0", "$schema": schema} recid_minter(record_id, data) oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) db.session.commit() indexer.index_by_id(record_id) sleep(1) with app.test_client() as c: result = c.get("/oai2d?verb=ListRecords&metadataPrefix=oai_dc") tree = etree.fromstring(result.data) namespaces = {"x": NS_OAIPMH} assert len(tree.xpath("/x:OAI-PMH", namespaces=namespaces)) == 1 assert len(tree.xpath("/x:OAI-PMH/x:ListRecords", namespaces=namespaces)) == 1 assert len(tree.xpath("/x:OAI-PMH/x:ListRecords/x:record", namespaces=namespaces)) == 1 assert len(tree.xpath("/x:OAI-PMH/x:ListRecords/x:record/x:header", namespaces=namespaces)) == 1 assert len(tree.xpath("/x:OAI-PMH/x:ListRecords/x:record/x:header" "/x:identifier", namespaces=namespaces)) == 1 assert len(tree.xpath("/x:OAI-PMH/x:ListRecords/x:record/x:header" "/x:datestamp", namespaces=namespaces)) == 1 assert len(tree.xpath("/x:OAI-PMH/x:ListRecords/x:record/x:metadata", namespaces=namespaces)) == 1
def test_listmetadataformats_record(app): """Test ListMetadataFormats for a record.""" with app.test_request_context(): with db.session.begin_nested(): record_id = uuid.uuid4() data = {'title_statement': {'title': 'Test0'}} recid_minter(record_id, data) pid = oaiid_minter(record_id, data) Record.create(data, id_=record_id) pid_value = pid.pid_value db.session.commit() _listmetadataformats( app=app, query='/oai?verb=ListMetadataFormats&identifier={0}'.format(pid_value))
def test_listmetadataformats_record(app): """Test ListMetadataFormats for a record.""" with app.test_request_context(): with db.session.begin_nested(): record_id = uuid.uuid4() data = {'title_statement': {'title': 'Test0'}} recid_minter(record_id, data) pid = oaiid_minter(record_id, data) Record.create(data, id_=record_id) pid_value = pid.pid_value db.session.commit() _listmetadataformats( app=app, query='/oai2d?verb=ListMetadataFormats&identifier={0}'.format( pid_value))
def sync_record_oai(uuid, cache=None): """Mint OAI ID information for the record. :type uuid: str """ rec = Record.get_record(uuid) recid_s = str(rec['recid']) # Try to get the already existing OAI PID for this record oai_pid_q = PersistentIdentifier.query.filter_by(pid_type='oai', object_uuid=rec.id) if oai_pid_q.count() == 0: pid = oaiid_minter(rec.id, rec) synced_sets = get_synced_sets(rec, cache=cache) rec['_oai']['sets'] = synced_sets rec.commit() db.session.commit() RecordIndexer().bulk_index([str(rec.id), ]) logger.info('Minted new OAI PID ({pid}) for record {id}'.format( pid=pid, id=uuid)) elif oai_pid_q.count() == 1: pid = oai_pid_q.one() managed_prefixes = current_app.config['OAISERVER_MANAGED_ID_PREFIXES'] if not any(pid.pid_value.startswith(p) for p in managed_prefixes): logger.exception('Unknown OAIID prefix: {0}'.format(pid.pid_value)) elif str(pid.get_assigned_object()) != uuid: logger.exception( 'OAI PID ({pid}) for record {id} ({recid}) is ' 'pointing to a different object ({id2})'.format( pid=pid, id=uuid, id2=str(pid.get_assigned_object()), recid=recid_s)) elif requires_sync(rec, cache=cache): rec.setdefault('_oai', {}) rec['_oai']['id'] = pid.pid_value rec['_oai']['updated'] = datetime_to_datestamp(datetime.utcnow()) synced_sets = get_synced_sets(rec, cache=cache) rec['_oai']['sets'] = synced_sets if not rec['_oai']['sets']: del rec['_oai']['sets'] # Don't store empty list rec.commit() db.session.commit() RecordIndexer().bulk_index([str(rec.id), ]) logger.info('Matching OAI PID ({pid}) for record {id}'.format( pid=pid, id=uuid))
def test_getrecord(app): schema = { 'type': 'object', 'properties': { 'title': {'type': 'string'}, 'field': {'type': 'boolean'}, }, 'required': ['title'], } with app.test_request_context(): with db.session.begin_nested(): record = Record.create({'title': 'Test0', '$schema': schema}).model recid_minter(record.id, record.json) pid = oaiid_minter(record.id, record.json) db.session.commit() pid_value = pid.pid_value record_updated = record.updated with app.test_client() as c: result = c.get( '/oai2d?verb=GetRecord&identifier={0}&metadataPrefix=oai_dc' .format(pid_value)) assert 200 == result.status_code tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:GetRecord', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:GetRecord/x:header', namespaces=NAMESPACES)) == 1 assert len(tree.xpath( '/x:OAI-PMH/x:GetRecord/x:header/x:identifier', namespaces=NAMESPACES)) == 1 identifier = tree.xpath( '/x:OAI-PMH/x:GetRecord/x:header/x:identifier/text()', namespaces=NAMESPACES) assert identifier == [str(record.id)] datestamp = tree.xpath( '/x:OAI-PMH/x:GetRecord/x:header/x:datestamp/text()', namespaces=NAMESPACES) assert datestamp == [datetime_to_datestamp(record_updated)] assert len(tree.xpath('/x:OAI-PMH/x:GetRecord/x:metadata', namespaces=NAMESPACES)) == 1
def test_listmetadataformats_record(app): """Test ListMetadataFormats for a record.""" schema = { "type": "object", "properties": {"title": {"type": "string"}, "field": {"type": "boolean"}}, "required": ["title"], } with app.test_request_context(): with db.session.begin_nested(): record_id = uuid.uuid4() data = {"title": "Test0", "$schema": schema} recid_minter(record_id, data) pid = oaiid_minter(record_id, data) Record.create(data, id_=record_id) pid_value = pid.pid_value db.session.commit() _listmetadataformats(app=app, query="/oai2d?verb=ListMetadataFormats&identifier={0}".format(pid_value))
def test_getrecord(app): """Test get record verb.""" with app.test_request_context(): pid_value = 'oai:legacy:1' with db.session.begin_nested(): record_id = uuid.uuid4() data = { '_oai': {'id': pid_value}, 'title_statement': {'title': 'Test0'}, } pid = oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) db.session.commit() assert pid_value == pid.pid_value record_updated = record.updated with app.test_client() as c: result = c.get( '/oai2d?verb=GetRecord&identifier={0}&metadataPrefix=oai_dc' .format(pid_value)) assert 200 == result.status_code tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:GetRecord', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:GetRecord/x:record/x:header', namespaces=NAMESPACES)) == 1 assert len(tree.xpath( '/x:OAI-PMH/x:GetRecord/x:record/x:header/x:identifier', namespaces=NAMESPACES)) == 1 identifier = tree.xpath( '/x:OAI-PMH/x:GetRecord/x:record/x:header/x:identifier/text()', namespaces=NAMESPACES) assert identifier == [str(record.id)] datestamp = tree.xpath( '/x:OAI-PMH/x:GetRecord/x:record/x:header/x:datestamp/text()', namespaces=NAMESPACES) assert datestamp == [datetime_to_datestamp(record_updated)] assert len(tree.xpath('/x:OAI-PMH/x:GetRecord/x:record/x:metadata', namespaces=NAMESPACES)) == 1
def test_listrecords(app): """Test ListRecords.""" total = 12 record_ids = [] with app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): for idx in range(total): record_id = uuid.uuid4() data = {'title_statement': {'title': 'Test{0}'.format(idx)}} recid_minter(record_id, data) oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) record_ids.append(record_id) db.session.commit() for record_id in record_ids: indexer.index_by_id(record_id) current_search.flush_and_refresh('_all') with app.test_client() as c: result = c.get('/oai2d?verb=ListRecords&metadataPrefix=oai_dc') tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 10 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header', namespaces=NAMESPACES)) == 10 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:identifier', namespaces=NAMESPACES)) == 10 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:datestamp', namespaces=NAMESPACES)) == 10 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:metadata', namespaces=NAMESPACES)) == 10 resumption_token = tree.xpath( '/x:OAI-PMH/x:ListRecords/x:resumptionToken', namespaces=NAMESPACES )[0] assert resumption_token.text with app.test_client() as c: result = c.get( '/oai2d?verb=ListRecords&resumptionToken={0}'.format( resumption_token.text ) ) tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 2 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header', namespaces=NAMESPACES)) == 2 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:identifier', namespaces=NAMESPACES)) == 2 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:datestamp', namespaces=NAMESPACES)) == 2 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:metadata', namespaces=NAMESPACES)) == 2 resumption_token = tree.xpath( '/x:OAI-PMH/x:ListRecords/x:resumptionToken', namespaces=NAMESPACES )[0] assert not resumption_token.text # Check from:until range with app.test_client() as c: # Check date and datetime timestamps. for granularity in (False, True): result = c.get( '/oai2d?verb=ListRecords&metadataPrefix=oai_dc' '&from={0}&until={1}'.format( datetime_to_datestamp( record.updated - timedelta(days=1), day_granularity=granularity), datetime_to_datestamp( record.updated + timedelta(days=1), day_granularity=granularity), ) ) assert result.status_code == 200 tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 10
def minter(record_uuid, data): rminter = recid_minter(record_uuid, data) oaiid_minter(record_uuid, data) return rminter
def test_listrecords(app): """Test ListRecords.""" total = 12 record_ids = [] with app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): for idx in range(total): record_id = uuid.uuid4() data = {'title_statement': {'title': 'Test{0}'.format(idx)}} recid_minter(record_id, data) oaiid_minter(record_id, data) Record.create(data, id_=record_id) record_ids.append(record_id) db.session.commit() for record_id in record_ids: indexer.index_by_id(record_id) sleep(5) with app.test_client() as c: result = c.get('/oai2d?verb=ListRecords&metadataPrefix=oai_dc') tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 10 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header', namespaces=NAMESPACES)) == 10 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:identifier', namespaces=NAMESPACES)) == 10 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:datestamp', namespaces=NAMESPACES)) == 10 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:metadata', namespaces=NAMESPACES)) == 10 resumption_token = tree.xpath( '/x:OAI-PMH/x:ListRecords/x:resumptionToken', namespaces=NAMESPACES )[0] assert resumption_token.text with app.test_client() as c: result = c.get( '/oai2d?verb=ListRecords&resumptionToken={0}'.format( resumption_token.text ) ) tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 2 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header', namespaces=NAMESPACES)) == 2 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:identifier', namespaces=NAMESPACES)) == 2 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:datestamp', namespaces=NAMESPACES)) == 2 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:metadata', namespaces=NAMESPACES)) == 2 resumption_token = tree.xpath( '/x:OAI-PMH/x:ListRecords/x:resumptionToken', namespaces=NAMESPACES )[0] assert not resumption_token.text
def test_listrecords(app): """Test ListRecords.""" total = 12 record_ids = [] with app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): for idx in range(total): record_id = uuid.uuid4() data = {'title_statement': {'title': 'Test{0}'.format(idx)}} recid_minter(record_id, data) oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) record_ids.append(record_id) db.session.commit() for record_id in record_ids: indexer.index_by_id(record_id) current_search.flush_and_refresh('_all') with app.test_client() as c: result = c.get('/oai2d?verb=ListRecords&metadataPrefix=oai_dc') tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 10 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header', namespaces=NAMESPACES)) == 10 assert len( tree.xpath( '/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:identifier', namespaces=NAMESPACES)) == 10 assert len( tree.xpath( '/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:datestamp', namespaces=NAMESPACES)) == 10 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:metadata', namespaces=NAMESPACES)) == 10 resumption_token = tree.xpath( '/x:OAI-PMH/x:ListRecords/x:resumptionToken', namespaces=NAMESPACES)[0] assert resumption_token.text with app.test_client() as c: result = c.get( '/oai2d?verb=ListRecords&resumptionToken={0}'.format( resumption_token.text)) tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 2 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header', namespaces=NAMESPACES)) == 2 assert len( tree.xpath( '/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:identifier', namespaces=NAMESPACES)) == 2 assert len( tree.xpath( '/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:datestamp', namespaces=NAMESPACES)) == 2 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:metadata', namespaces=NAMESPACES)) == 2 resumption_token = tree.xpath( '/x:OAI-PMH/x:ListRecords/x:resumptionToken', namespaces=NAMESPACES)[0] assert not resumption_token.text # Check from:until range with app.test_client() as c: # Check date and datetime timestamps. for granularity in (False, True): result = c.get('/oai2d?verb=ListRecords&metadataPrefix=oai_dc' '&from={0}&until={1}'.format( datetime_to_datestamp( record.updated - timedelta(days=1), day_granularity=granularity), datetime_to_datestamp( record.updated + timedelta(days=1), day_granularity=granularity), )) assert result.status_code == 200 tree = etree.fromstring(result.data) assert len( tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 10
def test_listidentifiers(app): """Test verb ListIdentifiers.""" from invenio_oaiserver.models import OAISet with app.app_context(): current_oaiserver.unregister_signals_oaiset() # create new OAI Set with db.session.begin_nested(): oaiset = OAISet( spec='test0', name='Test0', description='test desc 0', search_pattern='title_statement.title:Test0', ) db.session.add(oaiset) db.session.commit() run_after_insert_oai_set() with app.test_request_context(): indexer = RecordIndexer() # create a new record (inside the OAI Set) with db.session.begin_nested(): record_id = uuid.uuid4() data = {'title_statement': {'title': 'Test0'}} recid_minter(record_id, data) pid = oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) db.session.commit() indexer.index_by_id(record_id) current_search.flush_and_refresh('_all') pid_value = pid.pid_value # get the list of identifiers with app.test_client() as c: result = c.get('/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc') tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListIdentifiers', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListIdentifiers/x:header', namespaces=NAMESPACES)) == 1 identifier = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:identifier', namespaces=NAMESPACES) assert len(identifier) == 1 assert identifier[0].text == str(pid_value) datestamp = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:datestamp', namespaces=NAMESPACES) assert len(datestamp) == 1 assert datestamp[0].text == datetime_to_datestamp(record.updated) # Check from:until range with app.test_client() as c: # Check date and datetime timestamps. for granularity in (False, True): result = c.get( '/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc' '&from={0}&until={1}&set=test0'.format( datetime_to_datestamp(record.updated - timedelta(1), day_granularity=granularity), datetime_to_datestamp(record.updated + timedelta(1), day_granularity=granularity), )) assert result.status_code == 200 tree = etree.fromstring(result.data) identifier = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:identifier', namespaces=NAMESPACES) assert len(identifier) == 1
def load_records_with_files(records, upload_dir, max=0, verbose=0, files=True, cache=True, skip=False): """Load records with files support. It also: - create thumbnail for pdf - extract text for pdf - append files to the bibliographic record :param records: list of records in JSON format. :param upload_dir: directory for temporary files will be used for cache. :param max: max records to load. :param verbose: verbose level. :param files: attach files if True. :param cache: use cache if True. :param skip: skip invalid records. :returns: list of touched uuids for indexing. """ Logger.verbose = verbose rec_uuids = [] n = 0 resolver = Resolver('recid', 'rec', Record.get_record) if not files: warning('files are ignored') # stop if max record is reached if max: records = records[:max] count = len(records) click.secho('Starting loading {0} record ...'.format(len(records)), fg='green') with click.progressbar(records, length=count) as bar: for record in bar: # ignore record if does not contains document if not record.get('document'): if verbose > 1: warning('%s do not contains document' % record.get('recid')) continue recid = record.get('recid', '-1') if recid: info('record: %s detected...' % recid) update = True try: # record already exists in db? try: pid, rec = resolver.resolve(recid) rec_uuid = pid.object_uuid info('record: %s exists, updating...' % recid) rec.update(record) rec.commit() # create new record except PIDDoesNotExistError: update = False # generate a new uuid rec_uuid = uuid.uuid4() # create mapping between recid and uuid pid = PersistentIdentifier.create('recid', recid, object_type='rec', object_uuid=rec_uuid) # create Record rec = Record.create(record, id_=rec_uuid) bucket = Bucket.create() RecordsBuckets.create(record=rec.model, bucket=bucket) pid.register() info('%s record created' % rec.get('recid')) oaiid_minter(rec_uuid, rec) if files: rec_upload_dir = os.path.join(upload_dir, recid) try: os.makedirs(rec_upload_dir) except FileExistsError: pass for document in record.get('document'): file_name = upload_file(document.get('url'), rec_upload_dir, force=not cache) if file_name: name = document.get('name') rec.files[name] = open(file_name, 'rb') rec.files[name]['filetype'] = 'main' append_thumbnail(rec, document, rec_upload_dir, not cache) append_extracted_text(rec, document, rec_upload_dir, not cache) rec.commit() except ValidationError as e: if not update: pid.delete() else: info('Record %s untouched' % recid) error('Invalid record (%s)' % recid) warning('Validation error: %s' % e) if not skip: raise e continue else: db.session.flush() # touched record rec_uuids.append(rec_uuid) n += 1 db.session.commit() return rec_uuids
def test_listidentifiers(app): """Test verb ListIdentifiers.""" from invenio_oaiserver.models import OAISet with app.app_context(): current_oaiserver.unregister_signals_oaiset() # create new OAI Set with db.session.begin_nested(): oaiset = OAISet( spec='test0', name='Test0', description='test desc 0', search_pattern='title_statement.title:Test0', ) db.session.add(oaiset) db.session.commit() run_after_insert_oai_set() with app.test_request_context(): indexer = RecordIndexer() # create a new record (inside the OAI Set) with db.session.begin_nested(): record_id = uuid.uuid4() data = {'title_statement': {'title': 'Test0'}} recid_minter(record_id, data) pid = oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) db.session.commit() indexer.index_by_id(record_id) current_search.flush_and_refresh('_all') pid_value = pid.pid_value # get the list of identifiers with app.test_client() as c: result = c.get( '/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc' ) tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListIdentifiers', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListIdentifiers/x:header', namespaces=NAMESPACES)) == 1 identifier = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:identifier', namespaces=NAMESPACES ) assert len(identifier) == 1 assert identifier[0].text == str(pid_value) datestamp = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:datestamp', namespaces=NAMESPACES ) assert len(datestamp) == 1 assert datestamp[0].text == datetime_to_datestamp(record.updated) # Check from:until range with app.test_client() as c: # Check date and datetime timestamps. for granularity in (False, True): result = c.get( '/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc' '&from={0}&until={1}&set=test0'.format( datetime_to_datestamp( record.updated - timedelta(1), day_granularity=granularity), datetime_to_datestamp( record.updated + timedelta(1), day_granularity=granularity), ) ) assert result.status_code == 200 tree = etree.fromstring(result.data) identifier = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:identifier', namespaces=NAMESPACES ) assert len(identifier) == 1
def test_listidentifiers(app): """Test verb ListIdentifiers.""" schema = { 'type': 'object', 'properties': { 'title': { 'type': 'string' }, 'field': { 'type': 'boolean' }, }, 'required': ['title'], } from invenio_oaiserver.models import OAISet with app.app_context(): with db.session.begin_nested(): db.session.add( OAISet( spec='test0', name='Test0', description='test desc 0', search_pattern='title:Test0', )) db.session.commit() with app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): record_id = uuid.uuid4() data = {'title': 'Test0', '$schema': schema} recid_minter(record_id, data) pid = oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) db.session.commit() indexer.index_by_id(record_id) sleep(2) pid_value = pid.pid_value with app.test_client() as c: result = c.get('/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc') tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListIdentifiers', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListIdentifiers/x:header', namespaces=NAMESPACES)) == 1 identifier = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:identifier', namespaces=NAMESPACES) assert len(identifier) == 1 assert identifier[0].text == str(pid_value) datestamp = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:datestamp', namespaces=NAMESPACES) assert len(datestamp) == 1 assert datestamp[0].text == datetime_to_datestamp(record.updated) # Check from_:until range with app.test_client() as c: result = c.get('/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc' '&from_={0}&until={1}&set=test0'.format( datetime_to_datestamp(record.updated - datetime.timedelta(1)), datetime_to_datestamp(record.updated + datetime.timedelta(1)), )) tree = etree.fromstring(result.data) identifier = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:identifier', namespaces=NAMESPACES) assert len(identifier) == 1
def pids(): """Fetch and register PIDs.""" from invenio_db import db from invenio_oaiserver.fetchers import onaiid_fetcher from invenio_oaiserver.minters import oaiid_minter from invenio_pidstore.errors import PIDDoesNotExistError from invenio_pidstore.models import PIDStatus, PersistentIdentifier from invenio_pidstore.fetchers import recid_fetcher from invenio_records.models import RecordMetadata recids = [r.id for r in RecordMetadata.query.all()] db.session.expunge_all() with click.progressbar(recids) as bar: for record_id in bar: record = RecordMetadata.query.get(record_id) try: pid = recid_fetcher(record.id, record.json) found = PersistentIdentifier.get( pid_type=pid.pid_type, pid_value=pid.pid_value, pid_provider=pid.provider.pid_provider ) click.echo('Found {0}.'.format(found)) except PIDDoesNotExistError: db.session.add( PersistentIdentifier.create( pid.pid_type, pid.pid_value, object_type='rec', object_uuid=record.id, status=PIDStatus.REGISTERED ) ) except KeyError: click.echo('Skiped: {0}'.format(record.id)) continue pid_value = record.json.get('_oai', {}).get('id') if pid_value is None: assert 'control_number' in record.json pid_value = current_app.config.get( 'OAISERVER_ID_PREFIX' ) + str(record.json['control_number']) record.json.setdefault('_oai', {}) record.json['_oai']['id'] = pid.pid_value pid = oaiid_fetcher(record.id, record.json) try: found = PersistentIdentifier.get( pid_type=pid.pid_type, pid_value=pid.pid_value, pid_provider=pid.provider.pid_provider ) click.echo('Found {0}.'.format(found)) except PIDDoesNotExistError: pid = oaiid_minter(record.id, record.json) db.session.add(pid) flag_modified(record, 'json') assert record.json['_oai']['id'] db.session.add(record) db.session.commit() db.session.expunge_all()
def test_listidentifiers(app): """Test verb ListIdentifiers.""" from invenio_oaiserver.models import OAISet with app.app_context(): with db.session.begin_nested(): db.session.add(OAISet( spec='test0', name='Test0', description='test desc 0', search_pattern='title_statement.title:Test0', )) db.session.commit() with app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): record_id = uuid.uuid4() data = {'title_statement': {'title': 'Test0'}} recid_minter(record_id, data) pid = oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) db.session.commit() indexer.index_by_id(record_id) sleep(2) pid_value = pid.pid_value with app.test_client() as c: result = c.get( '/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc' ) tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListIdentifiers', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListIdentifiers/x:header', namespaces=NAMESPACES)) == 1 identifier = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:identifier', namespaces=NAMESPACES ) assert len(identifier) == 1 assert identifier[0].text == str(pid_value) datestamp = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:datestamp', namespaces=NAMESPACES ) assert len(datestamp) == 1 assert datestamp[0].text == record['_oai']['updated'] # Check from_:until range with app.test_client() as c: result = c.get( '/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc' '&from_={0}&until={1}&set=test0'.format( datetime_to_datestamp(record.updated - datetime.timedelta( 1)), datetime_to_datestamp(record.updated + datetime.timedelta( 1)), ) ) tree = etree.fromstring(result.data) identifier = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:identifier', namespaces=NAMESPACES ) assert len(identifier) == 1