def add_oai_information(obj, eng): """Adds OAI information like identifier""" recid = obj.data['control_number'] pid = PersistentIdentifier.get('recid', recid) existing_record = Record.get_record(pid.object_uuid) if '_oai' not in existing_record: try: oaiid_minter(pid.object_uuid, existing_record) except PIDAlreadyExists: existing_record['_oai'] = { 'id': 'oai:beta.scoap3.org:%s' % recid, 'sets': _get_oai_sets(existing_record) } if 'id' not in existing_record['_oai']: current_app.logger.info('adding new oai id') oaiid_minter(pid.object_uuid, existing_record) if 'sets' not in existing_record[ '_oai'] or not existing_record['_oai']['sets']: existing_record['_oai']['sets'] = _get_oai_sets(existing_record) existing_record['_oai']['updated'] = datetime.utcnow().strftime( '%Y-%m-%dT%H:%M:%SZ') existing_record.commit() obj.save() db.session.commit() indexer = RecordIndexer() indexer.index_by_id(pid.object_uuid)
def records(): """Load test data fixture.""" import uuid from invenio_records.api import Record from invenio_pidstore.models import PersistentIdentifier, PIDStatus create_test_user() indexer = RecordIndexer() # Record 1 - Live record with db.session.begin_nested(): rec_uuid = uuid.uuid4() pid1 = PersistentIdentifier.create( 'recid', '1', object_type='rec', object_uuid=rec_uuid, status=PIDStatus.REGISTERED) Record.create({ 'title': 'Registered', 'description': 'This is an awesome description', 'control_number': '1', 'access_right': 'restricted', 'access_conditions': 'fuu', 'owners': [1, 2], 'recid': 1 }, id_=rec_uuid) indexer.index_by_id(pid1.object_uuid) db.session.commit() sleep(3)
def migrate_chunk(chunk, broken_output=None, dry_run=False): from invenio_indexer.api import RecordIndexer from ..pidstore.minters import inspire_recid_minter indexer = RecordIndexer() index_queue = [] for raw_record in chunk: record = marc_create_record(raw_record, keep_singletons=False) json_record = create_record(record) if '$schema' in json_record: json_record['$schema'] = url_for( 'invenio_jsonschemas.get_schema', schema_path="records/{0}".format(json_record['$schema']) ) rec_uuid = str(Record.create(json_record, id_=None).id) # Create persistent identifier. pid = inspire_recid_minter(rec_uuid, json_record) index_queue.append(pid.object_uuid) db.session.commit() # Request record indexing for i in index_queue: indexer.index_by_id(i) # Send task to migrate files. return rec_uuid
def store_record(obj, *args, **kwargs): """Create and index new record in main record space.""" assert "$schema" in obj.data, "No $schema attribute found!" # Create record # FIXME: Do some preprocessing of obj.data before creating a record so that # we're sure that the schema will be validated without touching the full # holdingpen stack. record = Record.create(obj.data, id_=None) # Create persistent identifier. pid = inspire_recid_minter(str(record.id), record) # Commit any changes to record record.commit() # Dump any changes to record obj.data = record.dumps() # Commit to DB before indexing db.session.commit() # Index record indexer = RecordIndexer() indexer.index_by_id(pid.object_uuid)
def records(): """Load test data fixture.""" import uuid from invenio_records.api import Record from invenio_pidstore.models import PersistentIdentifier, PIDStatus create_test_user() indexer = RecordIndexer() # Record 1 - Live record with db.session.begin_nested(): rec_uuid = uuid.uuid4() pid1 = PersistentIdentifier.create('recid', '1', object_type='rec', object_uuid=rec_uuid, status=PIDStatus.REGISTERED) Record.create( { 'title': 'Registered', 'description': 'This is an awesome description', 'control_number': '1', 'access_right': 'restricted', 'access_conditions': 'fuu', 'owners': [1, 2], 'recid': 1 }, id_=rec_uuid) indexer.index_by_id(pid1.object_uuid) db.session.commit() sleep(3)
def continuous_migration(): """Task to continuously migrate what is pushed up by Legacy.""" indexer = RecordIndexer() redis_url = current_app.config.get('CACHE_REDIS_URL') r = StrictRedis.from_url(redis_url) try: while r.llen('legacy_records'): raw_record = r.lpop('legacy_records') if raw_record: # FIXME use migrate_and_insert_record(raw_record) # The record might be None, in case a parallel # continuous_migration task has already consumed the queue. raw_record = zlib.decompress(raw_record) record = marc_create_record(raw_record, keep_singletons=False) recid = int(record['001'][0]) prod_record = InspireProdRecords(recid=recid) prod_record.marcxml = raw_record json_record = create_record(record) with db.session.begin_nested(): try: record = record_upsert(json_record) except ValidationError as e: # Invalid record, will not get indexed errors = "ValidationError: Record {0}: {1}".format( recid, e ) prod_record.valid = False prod_record.errors = errors db.session.merge(prod_record) continue indexer.index_by_id(record.id) finally: db.session.commit() db.session.close()
def store_record(obj, eng): """Stores record in database""" if 'Italiana di Fisica'.lower( ) in obj.data['abstracts'][0]['source'].lower(): obj.data['abstracts'][0]['source'] = 'Springer/SIF' if 'Italiana di Fisica'.lower( ) in obj.data['acquisition_source']['source'].lower(): obj.data['acquisition_source']['source'] = 'Springer/SIF' obj.data['record_creation_year'] = parse_date( obj.data['record_creation_date']).year try: record = Record.create(obj.data, id_=None) # Create persistent identifier. pid = scoap3_recid_minter(str(record.id), record) obj.save() record.commit() # Commit to DB before indexing db.session.commit() obj.data['control_number'] = record['control_number'] obj.save() # Index record indexer = RecordIndexer() indexer.index_by_id(pid.object_uuid) except ValidationError as err: __halt_and_notify("Validation error: %s. Skipping..." % (err, ), obj, eng) except PIDAlreadyExists: __halt_and_notify("Record with this id already in DB", obj, eng)
def test_listrecords(app): """Test ListRecords.""" schema = { 'type': 'object', 'properties': { 'title': { 'type': 'string' }, 'field': { 'type': 'boolean' }, }, 'required': ['title'], } with app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): record_id = uuid.uuid4() data = {'title': 'Test0', '$schema': schema} recid_minter(record_id, data) oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) db.session.commit() indexer.index_by_id(record_id) sleep(2) with app.test_client() as c: result = c.get('/oai2d?verb=ListRecords&metadataPrefix=oai_dc') tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header', namespaces=NAMESPACES)) == 1 assert len( tree.xpath( '/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:identifier', namespaces=NAMESPACES)) == 1 assert len( tree.xpath( '/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:datestamp', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:metadata', namespaces=NAMESPACES)) == 1
def update_authors_recid(record_id, uuid, profile_recid): """Update author profile for a given signature. The method receives UUIDs representing record and signature respectively together with an author profile recid. The new recid will be placed in the signature with the given UUID. :param record_id: A string representing UUID of a given record. Example: record_id = "a5afb151-8f75-4e91-8dc1-05e7e8e8c0b8" :param uuid: A string representing UUID of a given signature. Example: uuid = "c2f432bd-2f52-4c16-ac66-096f168c762f" :param profile_recid: A string representing author profile recid, that updated signature should point to. Example: profile_recid = "1" """ try: record = Record.get_record(record_id) update_flag = False for author in record['authors']: if author['uuid'] == uuid: author['recid'] = str(profile_recid) update_flag = True if update_flag: # Disconnect the signal on insert of a new record. before_record_index.disconnect(append_updated_record_to_queue) # Update the record in the database. record.commit() db.session.commit() # Update the record in Elasticsearch. indexer = RecordIndexer() indexer.index_by_id(record.id) except StaleDataError as exc: raise update_authors_recid.retry(exc=exc) finally: # Reconnect the disconnected signal. before_record_index.connect(append_updated_record_to_queue) # Report. logger.info("Updated signature %s with profile %s", uuid, profile_recid)
def index_record(obj, eng): """ Index the record. It only should be indexed when every other step finished successfully. """ recid = obj.data['control_number'] pid = PersistentIdentifier.get('recid', recid) indexer = RecordIndexer() indexer.index_by_id(pid.object_uuid)
def create_keyword(data): """Create a fixture for keyword.""" with db.session.begin_nested(): keyword = Keyword.create(data) db.session.commit() indexer = RecordIndexer() indexer.index_by_id(keyword.id) return keyword
def test_listidentifiers(app): """Test verb ListIdentifiers.""" schema = { 'type': 'object', 'properties': { 'title': {'type': 'string'}, 'field': {'type': 'boolean'}, }, 'required': ['title'], } with app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): record_id = uuid.uuid4() data = {'title': 'Test0', '$schema': schema} recid_minter(record_id, data) pid = oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) db.session.commit() indexer.index_by_id(record_id) sleep(1) pid_value = pid.pid_value with app.test_client() as c: result = c.get( "/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc" ) tree = etree.fromstring(result.data) namespaces = {'x': NS_OAIPMH} assert len(tree.xpath('/x:OAI-PMH', namespaces=namespaces)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListIdentifiers', namespaces=namespaces)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListIdentifiers/x:header', namespaces=namespaces)) == 1 identifier = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:identifier', namespaces=namespaces ) assert len(identifier) == 1 assert identifier[0].text == str(pid_value) datestamp = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:datestamp', namespaces=namespaces ) assert len(datestamp) == 1 assert datestamp[0].text == datetime_to_datestamp(record.updated)
def create_category(api_app, db, data): """Create a fixture for category.""" with db.session.begin_nested(): record_id = uuid.uuid4() catid_minter(record_id, data) category = Category.create(data) db.session.commit() indexer = RecordIndexer() indexer.index_by_id(category.id) return category
def store_record(obj, *args, **kwargs): """Create and index new record in main record space.""" if '$schema' in obj.data: obj.data['$schema'] = url_for( 'invenio_jsonschemas.get_schema', schema_path="records/{0}".format(obj.data['$schema']) ) # Create record rec_uuid = str(Record.create(obj.data, id_=None).id) # Create persistent identifier. pid = inspire_recid_minter(rec_uuid, obj.data) db.session.commit() # Index record indexer = RecordIndexer() indexer.index_by_id(pid.object_uuid)
def test_listrecords(app): """Test ListRecords.""" schema = { 'type': 'object', 'properties': { 'title': {'type': 'string'}, 'field': {'type': 'boolean'}, }, 'required': ['title'], } with app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): record_id = uuid.uuid4() data = {'title': 'Test0', '$schema': schema} recid_minter(record_id, data) oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) db.session.commit() indexer.index_by_id(record_id) sleep(2) with app.test_client() as c: result = c.get('/oai2d?verb=ListRecords&metadataPrefix=oai_dc') tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:identifier', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:datestamp', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:metadata', namespaces=NAMESPACES)) == 1
def store_record(obj, *args, **kwargs): """Create and index new record in main record space.""" assert "$schema" in obj.data, "No $schema attribute found!" # Create record record = Record.create(obj.data, id_=None) # Create persistent identifier. pid = inspire_recid_minter(str(record.id), record) # Commit any changes to record record.commit() # Dump any changes to record obj.data = record.dumps() # Commit to DB before indexing db.session.commit() # Index record indexer = RecordIndexer() indexer.index_by_id(pid.object_uuid)
def test_listidentifiers(app): """Test verb ListIdentifiers.""" schema = { "type": "object", "properties": {"title": {"type": "string"}, "field": {"type": "boolean"}}, "required": ["title"], } with app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): record_id = uuid.uuid4() data = {"title": "Test0", "$schema": schema} recid_minter(record_id, data) pid = oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) db.session.commit() indexer.index_by_id(record_id) sleep(1) pid_value = pid.pid_value with app.test_client() as c: result = c.get("/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc") tree = etree.fromstring(result.data) namespaces = {"x": NS_OAIPMH} assert len(tree.xpath("/x:OAI-PMH", namespaces=namespaces)) == 1 assert len(tree.xpath("/x:OAI-PMH/x:ListIdentifiers", namespaces=namespaces)) == 1 assert len(tree.xpath("/x:OAI-PMH/x:ListIdentifiers/x:header", namespaces=namespaces)) == 1 identifier = tree.xpath("/x:OAI-PMH/x:ListIdentifiers/x:header/x:identifier", namespaces=namespaces) assert len(identifier) == 1 assert identifier[0].text == str(pid_value) datestamp = tree.xpath("/x:OAI-PMH/x:ListIdentifiers/x:header/x:datestamp", namespaces=namespaces) assert len(datestamp) == 1 assert datestamp[0].text == datetime_to_datestamp(record.updated)
def test_listrecords(app): """Test ListRecords.""" with app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): record_id = uuid.uuid4() data = {'title_statement': {'title': 'Test0'}} recid_minter(record_id, data) oaiid_minter(record_id, data) Record.create(data, id_=record_id) db.session.commit() indexer.index_by_id(record_id) sleep(2) with app.test_client() as c: result = c.get('/oai2d?verb=ListRecords&metadataPrefix=oai_dc') tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:identifier', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:datestamp', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:metadata', namespaces=NAMESPACES)) == 1
from dojson.contrib.marc21.utils import create_record, split_stream from scoap3.hep.model import hep from invenio_records import Record from invenio_db import db from invenio_indexer.api import RecordIndexer from scoap3.modules.pidstore.minters import scoap3_recid_minter recs = [ hep.do(create_record(data)) for data in split_stream(open('../data/scoap3export.xml', 'r')) ] for i, obj in enumerate(recs, start=1): print("Creating record {}/{}".format(i, len(recs))) record = Record.create(data, id_=None) print record # Create persistent identifier. pid = scoap3_recid_minter(str(record.id), record) print(pid.object_uuid) # Commit any changes to record record.commit() # Commit to DB before indexing db.session.commit() # Index record indexer = RecordIndexer() indexer.index_by_id(pid.object_uuid)
def test_listidentifiers(app): """Test verb ListIdentifiers.""" from invenio_oaiserver.models import OAISet with app.app_context(): with db.session.begin_nested(): db.session.add(OAISet( spec='test0', name='Test0', description='test desc 0', search_pattern='title_statement.title:Test0', )) db.session.commit() with app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): record_id = uuid.uuid4() data = {'title_statement': {'title': 'Test0'}} recid_minter(record_id, data) pid = oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) db.session.commit() indexer.index_by_id(record_id) sleep(2) pid_value = pid.pid_value with app.test_client() as c: result = c.get( '/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc' ) tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListIdentifiers', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListIdentifiers/x:header', namespaces=NAMESPACES)) == 1 identifier = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:identifier', namespaces=NAMESPACES ) assert len(identifier) == 1 assert identifier[0].text == str(pid_value) datestamp = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:datestamp', namespaces=NAMESPACES ) assert len(datestamp) == 1 assert datestamp[0].text == record['_oai']['updated'] # Check from_:until range with app.test_client() as c: result = c.get( '/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc' '&from_={0}&until={1}&set=test0'.format( datetime_to_datestamp(record.updated - datetime.timedelta( 1)), datetime_to_datestamp(record.updated + datetime.timedelta( 1)), ) ) tree = etree.fromstring(result.data) identifier = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:identifier', namespaces=NAMESPACES ) assert len(identifier) == 1
def test_listrecords(app): """Test ListRecords.""" total = 12 record_ids = [] with app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): for idx in range(total): record_id = uuid.uuid4() data = {'title_statement': {'title': 'Test{0}'.format(idx)}} recid_minter(record_id, data) oaiid_minter(record_id, data) Record.create(data, id_=record_id) record_ids.append(record_id) db.session.commit() for record_id in record_ids: indexer.index_by_id(record_id) sleep(5) with app.test_client() as c: result = c.get('/oai2d?verb=ListRecords&metadataPrefix=oai_dc') tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 10 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header', namespaces=NAMESPACES)) == 10 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:identifier', namespaces=NAMESPACES)) == 10 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:datestamp', namespaces=NAMESPACES)) == 10 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:metadata', namespaces=NAMESPACES)) == 10 resumption_token = tree.xpath( '/x:OAI-PMH/x:ListRecords/x:resumptionToken', namespaces=NAMESPACES )[0] assert resumption_token.text with app.test_client() as c: result = c.get( '/oai2d?verb=ListRecords&resumptionToken={0}'.format( resumption_token.text ) ) tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 2 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header', namespaces=NAMESPACES)) == 2 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:identifier', namespaces=NAMESPACES)) == 2 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:datestamp', namespaces=NAMESPACES)) == 2 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:metadata', namespaces=NAMESPACES)) == 2 resumption_token = tree.xpath( '/x:OAI-PMH/x:ListRecords/x:resumptionToken', namespaces=NAMESPACES )[0] assert not resumption_token.text
from dojson.contrib.marc21.utils import create_record, split_stream from scoap3.dojson.hep.model import hep from invenio_records import Record from invenio_db import db from invenio_indexer.api import RecordIndexer from scoap3.modules.pidstore.minters import scoap3_recid_minter recs = [hep.do(create_record(data)) for data in split_stream(open('../data/scoap3export.xml', 'r'))] for i, obj in enumerate(recs, start=1): print("Creating record {}/{}".format(i, len(recs))) record = Record.create(data, id_=None) print record # Create persistent identifier. pid = scoap3_recid_minter(str(record.id), record) print(pid.object_uuid) # Commit any changes to record record.commit() # Commit to DB before indexing db.session.commit() # Index record indexer = RecordIndexer() indexer.index_by_id(pid.object_uuid)
def test_listrecords(app): """Test ListRecords.""" total = 12 record_ids = [] with app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): for idx in range(total): record_id = uuid.uuid4() data = {'title_statement': {'title': 'Test{0}'.format(idx)}} recid_minter(record_id, data) oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) record_ids.append(record_id) db.session.commit() for record_id in record_ids: indexer.index_by_id(record_id) current_search.flush_and_refresh('_all') with app.test_client() as c: result = c.get('/oai2d?verb=ListRecords&metadataPrefix=oai_dc') tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 10 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header', namespaces=NAMESPACES)) == 10 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:identifier', namespaces=NAMESPACES)) == 10 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:datestamp', namespaces=NAMESPACES)) == 10 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:metadata', namespaces=NAMESPACES)) == 10 resumption_token = tree.xpath( '/x:OAI-PMH/x:ListRecords/x:resumptionToken', namespaces=NAMESPACES )[0] assert resumption_token.text with app.test_client() as c: result = c.get( '/oai2d?verb=ListRecords&resumptionToken={0}'.format( resumption_token.text ) ) tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 2 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header', namespaces=NAMESPACES)) == 2 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:identifier', namespaces=NAMESPACES)) == 2 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:datestamp', namespaces=NAMESPACES)) == 2 assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:metadata', namespaces=NAMESPACES)) == 2 resumption_token = tree.xpath( '/x:OAI-PMH/x:ListRecords/x:resumptionToken', namespaces=NAMESPACES )[0] assert not resumption_token.text # Check from:until range with app.test_client() as c: # Check date and datetime timestamps. for granularity in (False, True): result = c.get( '/oai2d?verb=ListRecords&metadataPrefix=oai_dc' '&from={0}&until={1}'.format( datetime_to_datestamp( record.updated - timedelta(days=1), day_granularity=granularity), datetime_to_datestamp( record.updated + timedelta(days=1), day_granularity=granularity), ) ) assert result.status_code == 200 tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 10
def test_listidentifiers(app): """Test verb ListIdentifiers.""" from invenio_oaiserver.models import OAISet with app.app_context(): current_oaiserver.unregister_signals_oaiset() # create new OAI Set with db.session.begin_nested(): oaiset = OAISet( spec='test0', name='Test0', description='test desc 0', search_pattern='title_statement.title:Test0', ) db.session.add(oaiset) db.session.commit() run_after_insert_oai_set() with app.test_request_context(): indexer = RecordIndexer() # create a new record (inside the OAI Set) with db.session.begin_nested(): record_id = uuid.uuid4() data = {'title_statement': {'title': 'Test0'}} recid_minter(record_id, data) pid = oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) db.session.commit() indexer.index_by_id(record_id) current_search.flush_and_refresh('_all') pid_value = pid.pid_value # get the list of identifiers with app.test_client() as c: result = c.get( '/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc' ) tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListIdentifiers', namespaces=NAMESPACES)) == 1 assert len(tree.xpath('/x:OAI-PMH/x:ListIdentifiers/x:header', namespaces=NAMESPACES)) == 1 identifier = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:identifier', namespaces=NAMESPACES ) assert len(identifier) == 1 assert identifier[0].text == str(pid_value) datestamp = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:datestamp', namespaces=NAMESPACES ) assert len(datestamp) == 1 assert datestamp[0].text == datetime_to_datestamp(record.updated) # Check from:until range with app.test_client() as c: # Check date and datetime timestamps. for granularity in (False, True): result = c.get( '/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc' '&from={0}&until={1}&set=test0'.format( datetime_to_datestamp( record.updated - timedelta(1), day_granularity=granularity), datetime_to_datestamp( record.updated + timedelta(1), day_granularity=granularity), ) ) assert result.status_code == 200 tree = etree.fromstring(result.data) identifier = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:identifier', namespaces=NAMESPACES ) assert len(identifier) == 1
def records(): """Load test data fixture.""" import uuid from invenio_records.api import Record from invenio_pidstore.models import PersistentIdentifier, PIDStatus indexer = RecordIndexer() index_queue = [] # Record 1 - Live record with db.session.begin_nested(): rec_uuid = uuid.uuid4() pid1 = PersistentIdentifier.create( 'recid', '1', object_type='rec', object_uuid=rec_uuid, status=PIDStatus.REGISTERED) Record.create({ 'title': 'Registered', 'description': 'This is an awesome description', # "mint" the record as recid minter does 'control_number': '1', }, id_=rec_uuid) index_queue.append(pid1.object_uuid) # Record 2 - Deleted PID with record rec_uuid = uuid.uuid4() pid = PersistentIdentifier.create( 'recid', '2', object_type='rec', object_uuid=rec_uuid, status=PIDStatus.REGISTERED) Record.create({ 'title': 'Live ', 'control_number': '2', }, id_=rec_uuid) pid.delete() # Record 3 - Deleted PID without a record PersistentIdentifier.create( 'recid', '3', status=PIDStatus.DELETED) # Record 4 - Registered PID without a record PersistentIdentifier.create( 'recid', '4', status=PIDStatus.REGISTERED) # Record 5 - Redirected PID pid = PersistentIdentifier.create( 'recid', '5', status=PIDStatus.REGISTERED) pid.redirect(pid1) # Record 6 - Redirected non existing endpoint doi = PersistentIdentifier.create( 'doi', '10.1234/foo', status=PIDStatus.REGISTERED) pid = PersistentIdentifier.create( 'recid', '6', status=PIDStatus.REGISTERED) pid.redirect(doi) # Record 7 - Unregistered PID PersistentIdentifier.create( 'recid', '7', status=PIDStatus.RESERVED) for rec_idx in range(len(record_examples)): rec_uuid = uuid.uuid4() rec_pid = 8 + rec_idx pid1 = PersistentIdentifier.create( 'recid', str(rec_pid), object_type='rec', object_uuid=rec_uuid, status=PIDStatus.REGISTERED) # "mint" the record as recid minter does record = dict(record_examples[rec_idx]) record['control_number'] = str(rec_pid) # create the record Record.create(record, id_=rec_uuid) index_queue.append(rec_uuid) db.session.commit() for i in index_queue: indexer.index_by_id(i)
def create_author(profile): """Create a new author profile based on a given signature. The method receives a dictionary representing an author. Based on the values, it creates a dictionary in the invenio_records format. After all the fields are processed, the method calls create_record from invenio_records.api to put the new record. :param profile: A signature representing an author's to be created as a profile. Example: profile = {u'affiliations': [{u'value': u'Yerevan Phys. Inst.'}], u'alternative_name': None, u'curated_relation': False, u'email': None, u'full_name': u'Chatrchyan, Serguei', u'inspire_id': None, u'orcid': None, u'profile': u'', u'recid': None, u'role': None, u'uuid': u'd63537a8-1df4-4436-b5ed-224da5b5028c'} :return: A recid, where the new profile can be accessed. Example: "1234" """ name = profile.get('full_name') # Template of an initial record. record = {'collections': [{'primary': 'HEPNAMES'}], 'name': {'value': name}, '$schema': _get_author_schema()} # The author's email address. # Unfortunately the method will not correlate a given e-mail address # with an affiliation. if 'email' in profile: email = profile.get('email') record['positions'] = [] record['positions'].append({'email': email}) # The author can be a member of more than one affiliation. if 'affiliations' in profile: affiliations = profile.get('affiliations') if 'positions' not in record: record['positions'] = [] for affiliation in affiliations: name = affiliation.get('value') recid = affiliation.get('recid', None) if recid: record['positions'].append( {'institution': {'name': name, 'recid': recid}}) else: record['positions'].append( {'institution': {'name': name}}) # FIXME: The method should also collect the useful data # from the publication, like category field, subject, # etc. # Disconnect the signal on insert of a new record. after_record_insert.disconnect(append_new_record_to_queue) # Create a new author profile. record = Record.create(record, id_=None) # Create Inspire recid. record_pid = inspire_recid_minter(record.id, record) # Extend the new record with Inspire recid and self key. record['control_number'] = record_pid.pid_value record['self'] = inspire_dojson_utils.get_record_ref( record_pid.pid_value, 'authors') # Apply the changes. record.commit() db.session.commit() # Add the record to Elasticsearch. indexer = RecordIndexer() indexer.index_by_id(record_pid.object_uuid) # Reconnect the disconnected signal. after_record_insert.connect(append_new_record_to_queue) # Report. logger.info("Created profile: %s", record_pid.pid_value) # Return the recid of new profile to which signatures will point to. return record_pid.pid_value
def test_listidentifiers(app): """Test verb ListIdentifiers.""" schema = { 'type': 'object', 'properties': { 'title': { 'type': 'string' }, 'field': { 'type': 'boolean' }, }, 'required': ['title'], } from invenio_oaiserver.models import OAISet with app.app_context(): with db.session.begin_nested(): db.session.add( OAISet( spec='test0', name='Test0', description='test desc 0', search_pattern='title:Test0', )) db.session.commit() with app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): record_id = uuid.uuid4() data = {'title': 'Test0', '$schema': schema} recid_minter(record_id, data) pid = oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) db.session.commit() indexer.index_by_id(record_id) sleep(2) pid_value = pid.pid_value with app.test_client() as c: result = c.get('/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc') tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListIdentifiers', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListIdentifiers/x:header', namespaces=NAMESPACES)) == 1 identifier = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:identifier', namespaces=NAMESPACES) assert len(identifier) == 1 assert identifier[0].text == str(pid_value) datestamp = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:datestamp', namespaces=NAMESPACES) assert len(datestamp) == 1 assert datestamp[0].text == datetime_to_datestamp(record.updated) # Check from_:until range with app.test_client() as c: result = c.get('/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc' '&from_={0}&until={1}&set=test0'.format( datetime_to_datestamp(record.updated - datetime.timedelta(1)), datetime_to_datestamp(record.updated + datetime.timedelta(1)), )) tree = etree.fromstring(result.data) identifier = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:identifier', namespaces=NAMESPACES) assert len(identifier) == 1
def test_listidentifiers(app): """Test verb ListIdentifiers.""" from invenio_oaiserver.models import OAISet with app.app_context(): current_oaiserver.unregister_signals_oaiset() # create new OAI Set with db.session.begin_nested(): oaiset = OAISet( spec='test0', name='Test0', description='test desc 0', search_pattern='title_statement.title:Test0', ) db.session.add(oaiset) db.session.commit() run_after_insert_oai_set() with app.test_request_context(): indexer = RecordIndexer() # create a new record (inside the OAI Set) with db.session.begin_nested(): record_id = uuid.uuid4() data = {'title_statement': {'title': 'Test0'}} recid_minter(record_id, data) pid = oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) db.session.commit() indexer.index_by_id(record_id) current_search.flush_and_refresh('_all') pid_value = pid.pid_value # get the list of identifiers with app.test_client() as c: result = c.get('/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc') tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListIdentifiers', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListIdentifiers/x:header', namespaces=NAMESPACES)) == 1 identifier = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:identifier', namespaces=NAMESPACES) assert len(identifier) == 1 assert identifier[0].text == str(pid_value) datestamp = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:datestamp', namespaces=NAMESPACES) assert len(datestamp) == 1 assert datestamp[0].text == datetime_to_datestamp(record.updated) # Check from:until range with app.test_client() as c: # Check date and datetime timestamps. for granularity in (False, True): result = c.get( '/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc' '&from={0}&until={1}&set=test0'.format( datetime_to_datestamp(record.updated - timedelta(1), day_granularity=granularity), datetime_to_datestamp(record.updated + timedelta(1), day_granularity=granularity), )) assert result.status_code == 200 tree = etree.fromstring(result.data) identifier = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:identifier', namespaces=NAMESPACES) assert len(identifier) == 1
def test_listrecords(app): """Test ListRecords.""" total = 12 record_ids = [] with app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): for idx in range(total): record_id = uuid.uuid4() data = {'title_statement': {'title': 'Test{0}'.format(idx)}} recid_minter(record_id, data) oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) record_ids.append(record_id) db.session.commit() for record_id in record_ids: indexer.index_by_id(record_id) current_search.flush_and_refresh('_all') with app.test_client() as c: result = c.get('/oai2d?verb=ListRecords&metadataPrefix=oai_dc') tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 10 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header', namespaces=NAMESPACES)) == 10 assert len( tree.xpath( '/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:identifier', namespaces=NAMESPACES)) == 10 assert len( tree.xpath( '/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:datestamp', namespaces=NAMESPACES)) == 10 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:metadata', namespaces=NAMESPACES)) == 10 resumption_token = tree.xpath( '/x:OAI-PMH/x:ListRecords/x:resumptionToken', namespaces=NAMESPACES)[0] assert resumption_token.text with app.test_client() as c: result = c.get( '/oai2d?verb=ListRecords&resumptionToken={0}'.format( resumption_token.text)) tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 2 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:header', namespaces=NAMESPACES)) == 2 assert len( tree.xpath( '/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:identifier', namespaces=NAMESPACES)) == 2 assert len( tree.xpath( '/x:OAI-PMH/x:ListRecords/x:record/x:header' '/x:datestamp', namespaces=NAMESPACES)) == 2 assert len( tree.xpath('/x:OAI-PMH/x:ListRecords/x:record/x:metadata', namespaces=NAMESPACES)) == 2 resumption_token = tree.xpath( '/x:OAI-PMH/x:ListRecords/x:resumptionToken', namespaces=NAMESPACES)[0] assert not resumption_token.text # Check from:until range with app.test_client() as c: # Check date and datetime timestamps. for granularity in (False, True): result = c.get('/oai2d?verb=ListRecords&metadataPrefix=oai_dc' '&from={0}&until={1}'.format( datetime_to_datestamp( record.updated - timedelta(days=1), day_granularity=granularity), datetime_to_datestamp( record.updated + timedelta(days=1), day_granularity=granularity), )) assert result.status_code == 200 tree = etree.fromstring(result.data) assert len( tree.xpath('/x:OAI-PMH/x:ListRecords/x:record', namespaces=NAMESPACES)) == 10