def test_save_roots(workflow_app): head = InspireRecord.create_or_update(fake_record('title1', 123), skip_files=False) head.commit() update = InspireRecord.create_or_update(fake_record('title2', 456), skip_files=False) update.commit() obj = workflow_object_class.create( data={}, data_type='hep' ) obj.extra_data['head_uuid'] = str(head.id) obj.extra_data['update_uuid'] = str(update.id) obj.save() # Union: keep the most recently created/updated root from each source. insert_wf_record_source(json={'version': 'original'}, record_uuid=head.id, source='arxiv') insert_wf_record_source(json={'version': 'updated'}, record_uuid=update.id, source='arxiv') insert_wf_record_source(json={'version': 'updated'}, record_uuid=update.id, source='publisher') save_roots(obj, None) arxiv_rec = read_wf_record_source(head.id, 'arxiv') assert arxiv_rec.json == {'version': 'updated'} pub_rec = read_wf_record_source(head.id, 'publisher') assert pub_rec.json == {'version': 'updated'} assert not read_wf_record_source(update.id, 'arxiv') assert not read_wf_record_source(update.id, 'publisher')
def record_with_two_revisions(app): record = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 111, 'document_type': [ 'article', ], 'titles': [ {'title': 'record rev0'}, ], 'self': { '$ref': 'http://localhost:5000/schemas/records/hep.json', }, '_collections': ['Literature'] } with db.session.begin_nested(): record = InspireRecord.create_or_update(record) record.commit() db.session.commit() record['titles'][0]['title'] = 'record rev1' with db.session.begin_nested(): record = InspireRecord.create_or_update(record) record.commit() db.session.commit() yield _delete_record('lit', 111)
def test_save_roots(workflow_app): head = InspireRecord.create_or_update(fake_record('title1', 123), skip_files=False) head.commit() update = InspireRecord.create_or_update(fake_record('title2', 456), skip_files=False) update.commit() obj = workflow_object_class.create(data={}, data_type='hep') obj.extra_data['head_uuid'] = str(head.id) obj.extra_data['update_uuid'] = str(update.id) obj.save() insert_wf_record_source(json={}, record_uuid=head.id, source='a') insert_wf_record_source(json={}, record_uuid=head.id, source='b') # this will not be saved because there's already an entry with source `a` insert_wf_record_source(json={}, record_uuid=update.id, source='a') insert_wf_record_source(json={}, record_uuid=update.id, source='c') save_roots(obj, None) assert read_wf_record_source(str(head.id), 'a') assert read_wf_record_source(str(head.id), 'b') assert read_wf_record_source(str(head.id), 'c')
def record_with_two_revisions(app): record = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 111, 'document_type': [ 'article', ], 'titles': [ {'title': 'record rev0'}, ], 'self': { '$ref': 'http://localhost:5000/schemas/records/hep.json', }, '_collections': ['Literature'] } with db.session.begin_nested(): record = InspireRecord.create_or_update(record) record.commit() db.session.commit() record['titles'][0]['title'] = 'record rev1' with db.session.begin_nested(): record = InspireRecord.create_or_update(record) record.commit() db.session.commit() yield _delete_record('lit', 111)
def test_manual_merge_existing_records(workflow_app): json_head = fake_record('This is the HEAD', 1) json_update = fake_record('While this is the update', 2) # this two fields will create a merging conflict json_head['core'] = True json_update['core'] = False head = InspireRecord.create_or_update(json_head, skip_files=False) head.commit() update = InspireRecord.create_or_update(json_update, skip_files=False) update.commit() head_id = head.id update_id = update.id obj_id = start_merger( head_id=1, update_id=2, current_user_id=1, ) do_resolve_manual_merge_wf(workflow_app, obj_id) # retrieve it again, otherwise Detached Instance Error obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data['approved'] is True assert obj.extra_data['auto-approved'] is False # no root present before last_root = read_wf_record_source(head_id, 'arxiv') assert last_root is None update_source = LiteratureReader(update).source root_update = read_wf_record_source(update_id, update_source) assert root_update is None # check that head's content has been replaced by merged deleted_record = RecordMetadata.query.filter_by(id=update_id).one() latest_record = get_db_record('lit', 1) assert deleted_record.json['deleted'] is True # check deleted record is linked in the latest one deleted_rec_ref = {'$ref': 'http://localhost:5000/api/literature/2'} assert [deleted_rec_ref] == latest_record['deleted_records'] # check the merged record is linked in the deleted one new_record_metadata = {'$ref': 'http://localhost:5000/api/literature/1'} assert new_record_metadata == deleted_record.json['new_record'] del latest_record['deleted_records'] assert latest_record == obj.data # -> resulted merged record
def test_manual_merge_existing_records(workflow_app): json_head = fake_record('This is the HEAD', 1) json_update = fake_record('While this is the update', 2) # this two fields will create a merging conflict json_head['core'] = True json_update['core'] = False head = InspireRecord.create_or_update(json_head, skip_files=False) head.commit() update = InspireRecord.create_or_update(json_update, skip_files=False) update.commit() head_id = head.id update_id = update.id obj_id = start_merger( head_id=1, update_id=2, current_user_id=1, ) do_resolve_manual_merge_wf(workflow_app, obj_id) # retrieve it again, otherwise Detached Instance Error obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data['approved'] is True assert obj.extra_data['auto-approved'] is False # no root present before last_root = read_wf_record_source(head_id, 'arxiv') assert last_root is None update_source = LiteratureReader(update).source root_update = read_wf_record_source(update_id, update_source) assert root_update is None # check that head's content has been replaced by merged deleted_record = RecordMetadata.query.filter_by(id=update_id).one() latest_record = get_db_record('lit', 1) assert deleted_record.json['deleted'] is True # check deleted record is linked in the latest one deleted_rec_ref = {'$ref': 'http://localhost:5000/api/literature/2'} assert [deleted_rec_ref] == latest_record['deleted_records'] # check the merged record is linked in the deleted one new_record_metadata = {'$ref': 'http://localhost:5000/api/literature/1'} assert new_record_metadata == deleted_record.json['new_record'] del latest_record['deleted_records'] assert latest_record == obj.data # -> resulted merged record
def book_with_another_document_type(app): """Temporarily add another document type to a book record.""" record = get_db_record('lit', 1373790) record['document_type'] = ['book', 'proceedings'] record = InspireRecord.create_or_update(record) record.commit() yield record = get_db_record('lit', 1373790) record['document_type'] = ['book'] record = InspireRecord.create_or_update(record) record.commit()
def jhep_with_malformed_title(app): """Temporarily add a malformed title to the JHEP record.""" record = get_db_record('jou', 1213103) record['title_variants'].append('+++++') record = InspireRecord.create_or_update(record) record.commit() yield record = get_db_record('jou', 1213103) record['title_variants'] = record['title_variants'][:-1] record = InspireRecord.create_or_update(record) record.commit()
def book_with_another_document_type(app): """Temporarily add another document type to a book record.""" record = get_db_record('lit', 1373790) record['document_type'] = ['book', 'proceedings'] record = InspireRecord.create_or_update(record) record.commit() yield record = get_db_record('lit', 1373790) record['document_type'] = ['book'] record = InspireRecord.create_or_update(record) record.commit()
def jhep_with_malformed_title(app): """Temporarily add a malformed title to the JHEP record.""" record = get_db_record('jou', 1213103) record['title_variants'].append('+++++') record = InspireRecord.create_or_update(record) record.commit() yield record = get_db_record('jou', 1213103) record['title_variants'] = record['title_variants'][:-1] record = InspireRecord.create_or_update(record) record.commit()
def cern_with_hal_id(app): """Temporarily add the HAL id to the CERN record.""" record = get_db_record('ins', 902725) record['external_system_identifiers'] = [{'schema': 'HAL', 'value': '300037'}] record = InspireRecord.create_or_update(record) record.commit() es.indices.refresh('records-institutions') yield record = get_db_record('ins', 902725) del record['external_system_identifiers'] record = InspireRecord.create_or_update(record) record.commit() es.indices.refresh('records-institutions')
def author_in_isolated_app(isolated_app): record = { '$schema': 'http://localhost:5000/schemas/records/authors.json', '_collections': ['Authors'], 'control_number': 123456789, # FIXME remove when there is an easy way to insert new records 'ids': [ { 'schema': 'INSPIRE BAI', 'value': 'J.Smith.1', }, { 'schema': 'ORCID', 'value': '0000-0002-1825-0097', }, ], 'name': { 'value': 'Smith, John' }, } assert validate(record, 'authors') is None record = InspireRecord.create_or_update(record) record.commit() yield record['control_number']
def test_get_literature_recids_for_orcid_still_works_if_author_has_no_orcid_id(isolated_app): record = get_db_record('aut', 1061000) record['ids'] = [{'schema': 'INSPIRE BAI', 'value': 'Maurizio.Martinelli.1'}] record = InspireRecord.create_or_update(record) record.commit() with pytest.raises(NoResultFound): get_literature_recids_for_orcid('0000-0003-4792-9178')
def test_get_literature_recids_for_orcid_still_works_if_author_has_no_ids(isolated_app): record = get_db_record('aut', 1061000) del record['ids'] record = InspireRecord.create_or_update(record) record.commit() with pytest.raises(NoResultFound): get_literature_recids_for_orcid('0000-0003-4792-9178')
def test_get_literature_recids_for_orcid_raises_if_two_authors_are_found(isolated_app): record = get_db_record('aut', 1061000) record['control_number'] = 1061001 record = InspireRecord.create_or_update(record) record.commit() with pytest.raises(MultipleResultsFound): get_literature_recids_for_orcid('0000-0003-4792-9178')
def _create_record(record_json): with db.session.begin_nested(): record = InspireRecord.create_or_update(record_json) record.commit() db.session.commit() es.indices.refresh() return record_json
def _create_record(record_json): with db.session.begin_nested(): record = InspireRecord.create_or_update(record_json) record.commit() db.session.commit() es.indices.refresh() return record_json
def test_get_literature_recids_for_orcid_raises_if_two_authors_are_found( isolated_app): record = get_db_record('aut', 1061000) record['control_number'] = 1061001 record = InspireRecord.create_or_update(record) record.commit() with pytest.raises(MultipleResultsFound): get_literature_recids_for_orcid('0000-0003-4792-9178')
def test_get_literature_recids_for_orcid_still_works_if_author_has_no_ids( isolated_app): record = get_db_record('aut', 1061000) del record['ids'] record = InspireRecord.create_or_update(record) record.commit() with pytest.raises(NoResultFound): get_literature_recids_for_orcid('0000-0003-4792-9178')
def cern_with_hal_id(app): """Temporarily add the HAL id to the CERN record.""" record = get_db_record('ins', 902725) record['external_system_identifiers'] = [{ 'schema': 'HAL', 'value': '300037' }] record = InspireRecord.create_or_update(record) record.commit() es.indices.refresh('records-institutions') yield record = get_db_record('ins', 902725) del record['external_system_identifiers'] record = InspireRecord.create_or_update(record) record.commit() es.indices.refresh('records-institutions')
def test_get_literature_recids_for_orcid_still_works_if_author_has_no_orcid_id( isolated_app): record = get_db_record('aut', 1061000) record['ids'] = [{ 'schema': 'INSPIRE BAI', 'value': 'Maurizio.Martinelli.1' }] record = InspireRecord.create_or_update(record) record.commit() with pytest.raises(NoResultFound): get_literature_recids_for_orcid('0000-0003-4792-9178')
def test_manual_merge_existing_records(mock_put_record_to_hep, mock_store_records, workflow_app): json_head = fake_record('This is the HEAD', 1) json_update = fake_record('While this is the update', 2) # this two fields will create a merging conflict json_head['core'] = True json_update['core'] = False head = InspireRecord.create_or_update(json_head, skip_files=False) head.commit() update = InspireRecord.create_or_update(json_update, skip_files=False) update.commit() head_id = head.id update_id = update.id obj_id = start_merger( head_id=1, update_id=2, current_user_id=1, ) do_resolve_manual_merge_wf(workflow_app, obj_id) mock_put_record_to_hep.assert_called() # retrieve it again, otherwise Detached Instance Error obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data['approved'] is True assert obj.extra_data['auto-approved'] is False # no root present before last_root = read_wf_record_source(head_id, 'arxiv') assert last_root is None update_source = LiteratureReader(update).source root_update = read_wf_record_source(update_id, update_source) assert root_update is None
def test_manual_merge_with_none_record(workflow_app): json_head = fake_record('This is the HEAD', 1) head = InspireRecord.create_or_update(json_head, skip_files=False) head.commit() non_existing_id = 123456789 with pytest.raises(RecordGetterError): start_merger( head_id=1, update_id=non_existing_id, current_user_id=1, )
def test_manual_merge_with_none_record(workflow_app): json_head = fake_record('This is the HEAD', 1) head = InspireRecord.create_or_update(json_head, skip_files=False) head.commit() non_existing_id = 123456789 with pytest.raises(RecordGetterError): start_merger( head_id=1, update_id=non_existing_id, current_user_id=1, )
def test_save_roots(workflow_app): head = InspireRecord.create_or_update(fake_record('title1', 123), skip_files=False) head.commit() update = InspireRecord.create_or_update(fake_record('title2', 456), skip_files=False) update.commit() obj = workflow_object_class.create(data={}, data_type='hep') obj.extra_data['head_uuid'] = str(head.id) obj.extra_data['update_uuid'] = str(update.id) obj.save() # Union: keep the most recently created/updated root from each source. insert_wf_record_source(json={'version': 'original'}, record_uuid=head.id, source='arxiv') insert_wf_record_source(json={'version': 'updated'}, record_uuid=update.id, source='arxiv') insert_wf_record_source(json={'version': 'updated'}, record_uuid=update.id, source='publisher') save_roots(obj, None) arxiv_rec = read_wf_record_source(head.id, 'arxiv') assert arxiv_rec.json == {'version': 'updated'} pub_rec = read_wf_record_source(head.id, 'publisher') assert pub_rec.json == {'version': 'updated'} assert not read_wf_record_source(update.id, 'arxiv') assert not read_wf_record_source(update.id, 'publisher')
def insert_journals_in_db(workflow_app): """Temporarily add few journals in the DB""" journal_no_pro_and_ref = json.loads(pkg_resources.resource_string( __name__, os.path.join('fixtures', 'jou_record_refereed.json'))) journal_pro_and_ref = json.loads(pkg_resources.resource_string( __name__, os.path.join('fixtures', 'jou_record_refereed_and_proceedings.json'))) with db.session.begin_nested(): journal_no_pro_and_ref = InspireRecord.create_or_update( journal_no_pro_and_ref, skip_files=False) journal_no_pro_and_ref.commit() journal_pro_and_ref = InspireRecord.create_or_update( journal_pro_and_ref, skip_files=False) journal_pro_and_ref.commit() db.session.commit() es.indices.refresh('records-journals') yield _delete_record('jou', 1936475) _delete_record('jou', 1936476) es.indices.refresh('records-journals')
def test_get_head_source_return_arxiv_when_one_arxive_source_present( app, simple_record): # XXX: for some reason, this must be internal. from inspirehep.modules.records.api import InspireRecord rec = InspireRecord.create_or_update(simple_record) rec.commit() uuid = rec.id # two sources for the same record insert_wf_record_source(json=simple_record, record_uuid=uuid, source='ejl') assert get_head_source(uuid) == 'publisher' insert_wf_record_source(json=simple_record, record_uuid=uuid, source='arxiv') assert get_head_source(uuid) == 'arxiv'
def migrate_record_from_mirror(prod_record, skip_files=False): """Migrate a mirrored legacy record into an Inspire record. Args: prod_record(LegacyRecordsMirror): the mirrored record to migrate. skip_files(bool): flag indicating whether the files in the record metadata should be copied over from legacy and attach to the record. Returns: dict: the migrated record metadata, which is also inserted into the database. """ try: json_record = marcxml2record(prod_record.marcxml) except Exception as exc: LOGGER.exception('Migrator DoJSON Error') prod_record.error = exc db.session.merge(prod_record) return None if '$schema' in json_record: ensure_valid_schema(json_record) try: with db.session.begin_nested(): record = InspireRecord.create_or_update(json_record, skip_files=skip_files) record.commit() except ValidationError as exc: pattern = u'Migrator Validator Error: {}, Value: %r, Record: %r' LOGGER.error(pattern.format('.'.join(exc.schema_path)), exc.instance, prod_record.recid) prod_record.error = exc db.session.merge(prod_record) except Exception as exc: LOGGER.exception('Migrator Record Insert Error') prod_record.error = exc db.session.merge(prod_record) else: prod_record.valid = True db.session.merge(prod_record) return record
def migrate_record_from_mirror(prod_record, skip_files=False): """Migrate a mirrored legacy record into an Inspire record. Args: prod_record(LegacyRecordsMirror): the mirrored record to migrate. skip_files(bool): flag indicating whether the files in the record metadata should be copied over from legacy and attach to the record. Returns: dict: the migrated record metadata, which is also inserted into the database. """ try: json_record = marcxml2record(prod_record.marcxml) except Exception as exc: LOGGER.exception('Migrator DoJSON Error') prod_record.error = exc db.session.merge(prod_record) return None if '$schema' in json_record: ensure_valid_schema(json_record) try: with db.session.begin_nested(): record = InspireRecord.create_or_update(json_record, skip_files=skip_files) record.commit() except ValidationError as exc: pattern = u'Migrator Validator Error: {}, Value: %r, Record: %r' LOGGER.error(pattern.format('.'.join(exc.schema_path)), exc.instance, prod_record.recid) prod_record.error = exc db.session.merge(prod_record) except Exception as exc: LOGGER.exception('Migrator Record Insert Error') prod_record.error = exc db.session.merge(prod_record) else: prod_record.valid = True db.session.merge(prod_record) return record
def author_in_isolated_app(isolated_app): record = { '$schema': 'http://localhost:5000/schemas/records/authors.json', '_collections': ['Authors'], 'control_number': 123456789, # FIXME remove when there is an easy way to insert new records 'ids': [ { 'schema': 'INSPIRE BAI', 'value': 'J.Smith.1', }, { 'schema': 'ORCID', 'value': '0000-0002-1825-0097', }, ], 'name': {'value': 'Smith, John'}, } assert validate(record, 'authors') is None record = InspireRecord.create_or_update(record) record.commit() yield record['control_number']
def _create_record(json): """Insert or replace a record.""" record = InspireRecord.create_or_update(json, skip_files=False) record.commit() return record
def _create_record(json): """Insert or replace a record.""" record = InspireRecord.create_or_update(json, skip_files=False) record.commit() return record