def test_create_with_source_record_with_different_control_number(isolated_app): expected_file_content = 'dummy body' rec1_expected_key = '1_Fulltext.pdf' rec2_expected_key = '2_Fulltext.pdf' record1_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 1, 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], '_collections': [ 'Literature' ], # DESY harvest 'documents': [{ 'key': 'Fulltext.pdf', 'url': '/some/non/existing/path.pdf', }], } record2_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 2, 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], '_collections': [ 'Literature' ], # DESY harvest } record1 = InspireRecord.create(record1_json) rec1_file_content = open( record1.files[rec1_expected_key].obj.file.uri ).read() assert rec1_file_content == expected_file_content record2_json['documents'] = copy.deepcopy(record1['documents']) record2 = InspireRecord.create(record2_json, files_src_records=[record1]) assert len(record2.files) == len(record2_json['documents']) assert len(record2['documents']) == len(record2_json['documents']) assert record2['documents'][0]['url'] != record1['documents'][0]['url'] rec2_file_content = open( record2.files[rec2_expected_key].obj.file.uri ).read() assert rec2_file_content == expected_file_content
def store_record(obj, eng): """Insert or replace a record.""" is_update = obj.extra_data.get('is-update') is_authors = eng.workflow_definition.data_type == 'authors' if is_update: if not is_authors and not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER', False): obj.log.info( 'skipping update record, feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is disabled.' ) return record = InspireRecord.get_record(obj.extra_data['head_uuid']) obj.data['control_number'] = record['control_number'] record.clear() record.update(obj.data, files_src_records=[obj]) else: # Skip the files to avoid issues in case the record has already pid # TODO: remove the skip files once labs becomes master record = InspireRecord.create(obj.data, id_=None, skip_files=True) # Create persistent identifier. # Now that we have a recid, we can properly download the documents record.download_documents_and_figures(src_records=[obj]) obj.data['control_number'] = record['control_number'] # store head_uuid to store the root later obj.extra_data['head_uuid'] = str(record.id) record.commit() obj.save() db.session.commit()
def test_literature_citations_api_without_results(api_client): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 111, 'titles': [ { 'title': 'Jessica Jones', }, ], '_collections': ['Literature'] } record = InspireRecord.create(record_json) record.commit() es.indices.refresh('records-hep') response = api_client.get( '/literature/111/citations', headers={'Accept': 'application/json'} ) result = json.loads(response.get_data(as_text=True)) expected_metadata = { "citation_count": 0, "citations": [], } assert response.status_code == 200 assert expected_metadata == result['metadata'] _delete_record('lit', 111)
def test_create_handles_figures(isolated_app): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 1, 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], '_collections': [ 'Literature' ], 'figures': [{ 'key': 'graph.png', 'url': '/afs/cern.ch/project/inspire/PROD/var/data/files/g151/3037619/graph.png;1', }] # record/1628455/export/xme } record = InspireRecord.create(record_json) expected_file_content = 'dummy body' expected_key = '1_graph.png' assert expected_key in record.files.keys assert len(record.files) == 1 assert len(record['figures']) == len(record_json['figures']) file_content = open(record.files[expected_key].obj.file.uri).read() assert file_content == expected_file_content
def test_create_with_skip_files_param_overrides_records_skip_files_conf_and_does_not_add_documents_or_figures(isolated_app): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 1, 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], '_collections': [ 'Literature' ], 'figures': [{ 'key': 'graph.png', 'url': 'http://www.mdpi.com/2218-1997/3/1/24/png', }], 'documents': [{ 'key': 'arXiv:1710.01187.pdf', 'url': '/afs/cern.ch/project/inspire/PROD/var/data/files/g151/3037619/content.pdf;1', }] # record/1628455/export/xme -- with some modification } with patch.dict(isolated_app.config, {'RECORDS_SKIP_FILES': False}): record = InspireRecord.create(record_json, skip_files=True) assert len(record.files) == 0 assert record['documents'] == record_json['documents'] assert record['figures'] == record_json['figures']
def test_record_with_non_valid_content_is_cleaned_and_created_properly( isolated_app): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 1, 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], '_collections': [ 'Literature' ], # these two fields make the record not valid 'documents': [], 'urls': [ {'url': ''}, ], # record/1628455/export/xme -- with some modification } non_valid = False try: validate(record_json) except ValidationError: non_valid = True assert non_valid record = InspireRecord.create(record_json) validate(record)
def test_new_record(self): recid = 9999912587 record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': recid, 'titles': [ { 'title': 'Jessica Jones', }, ], '_collections': ['Literature'], 'references': [{'record': { '$ref': 'http://localhost:5000/api/literature/1498589'}}] } inspire_record = InspireRecord.create(record_json) with override_config(FEATURE_FLAG_ENABLE_ORCID_PUSH=True, FEATURE_FLAG_ORCID_PUSH_WHITELIST_REGEX='.*', ORCID_APP_CREDENTIALS={'consumer_key': '0000-0001-8607-8906'}), \ mock.patch('inspirehep.modules.records.receivers.push_access_tokens') as mock_push_access_tokens, \ mock.patch('inspirehep.modules.orcid.tasks.orcid_push.apply_async') as mock_apply_async: mock_push_access_tokens.get_access_tokens.return_value = [('myorcid', 'mytoken')] inspire_record.commit() mock_apply_async.assert_called_once_with( kwargs={'orcid': 'myorcid', 'oauth_token': 'mytoken', 'kwargs_to_pusher': {'record_db_version': inspire_record.model.version_id}, 'rec_id': recid}, queue='orcid_push') _delete_record('lit', recid)
def test_creating_deleted_record_and_undeleting_created_record_in_es(app): search = LiteratureSearch() json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], 'deleted': True, '_collections': ['Literature'] } # When a record is created in the DB with deleted flag True, it is not created in ES. record = InspireRecord.create(json) record.commit() db.session.commit() with pytest.raises(NotFoundError): search.get_source(record.id) # When a record is undeleted, it is created in ES. record['deleted'] = False record.commit() db.session.commit() search.get_source(record.id) record._delete(force=True)
def test_deleting_record_triggers_delete_in_es(app): search = LiteratureSearch() json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], '_collections': ['Literature'] } # When a record is created in the DB, it is also created in ES. record = InspireRecord.create(json) record.commit() db.session.commit() search.get_source(record.id) # When a record is updated with deleted flag true, it is deleted in ES record['deleted'] = True record.commit() db.session.commit() with pytest.raises(NotFoundError): search.get_source(record.id)
def _create_and_index_record(record): record = InspireRecord.create(record) inspire_recid_minter(record.id, record) db.session.commit() es.indices.refresh('records-hep') return record
def test_that_db_changes_are_mirrored_in_es(app): search = LiteratureSearch() json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], } # When a record is created in the DB, it is also created in ES. record = InspireRecord.create(json) es_record = search.get_source(record.id) assert get_title(es_record) == 'foo' # When a record is updated in the DB, is is also updated in ES. record['titles'][0]['title'] = 'bar' record.commit() es_record = search.get_source(record.id) assert get_title(es_record) == 'bar' # When a record is deleted in the DB, it is also deleted in ES. record._delete(force=True) with pytest.raises(NotFoundError): es_record = search.get_source(record.id)
def test_download_local_file(isolated_app): with NamedTemporaryFile(suffix=';1') as temp_file: file_location = 'file://{0}'.format(quote(temp_file.name)) file_name = os.path.basename(temp_file.name) data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': [ 'Literature' ], 'document_type': [ 'article' ], 'titles': [ { 'title': 'h' }, ], 'documents': [ { 'key': file_name, 'url': file_location, }, ], } record = InspireRecord.create(data) documents = record['documents'] files = record['_files'] assert 1 == len(documents) assert 1 == len(files)
def record_insert_or_replace(json, skip_files=False): """Insert or replace a record.""" pid_type = get_pid_type_from_schema(json['$schema']) control_number = json['control_number'] try: pid = PersistentIdentifier.get(pid_type, control_number) record = InspireRecord.get_record(pid.object_uuid) record.clear() record.update(json, skip_files=skip_files) if json.get('legacy_creation_date'): record.model.created = datetime.strptime(json['legacy_creation_date'], '%Y-%m-%d') record.commit() except PIDDoesNotExistError: record = InspireRecord.create(json, id_=None, skip_files=skip_files) if json.get('legacy_creation_date'): record.model.created = datetime.strptime(json['legacy_creation_date'], '%Y-%m-%d') inspire_recid_minter(str(record.id), json) if json.get('deleted'): new_recid = get_recid_from_ref(json.get('new_record')) if not new_recid: record.delete() return record
def test_create_does_not_save_zombie_identifiers_if_record_creation_fails(isolated_app): invalid_record = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': [ 'Literature', ], 'control_number': 1936477, } with pytest.raises(ValidationError): InspireRecord.create(invalid_record) record_identifier = RecordIdentifier.query.filter_by(recid=1936477).one_or_none() persistent_identifier = PersistentIdentifier.query.filter_by(pid_value='1936477').one_or_none() assert not record_identifier assert not persistent_identifier
def test_index_after_commit_indexes_raises_if_cited_records_are_not_in_db( mocked_indexing_task, mocked_permission_check, app, ): # this test doesn't use the isolated_app because it needs to commit to # the DB in order to create records versions. citing_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{'title': 'Record citing the first one'}], '_collections': ['Literature'], 'control_number': 8888, 'references': [ {"reference": {'authors': [{'full_name': 'Smith, J.'}]}} ] } record = InspireRecord.create(data=citing_json, skip_files=True) record.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = 'lit', record['control_number'], 2 mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) references = { 'references': [ { "curated_relation": False, "record": { "$ref": "http://localhost:5000/api/literature/9999" }, "reference": { 'authors': [{'full_name': 'Smith, J.'}], } } ] } citing_json.update(references) record.clear() record.update(citing_json) record.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', record['control_number'], 3) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task with pytest.raises(MissingCitedRecordError): index_modified_citations_from_record(*expected_args) _delete_record('lit', 8888)
def test_selecting_2_facets_generates_search_with_must_query(api_client): record_json = { 'control_number': 843386527, '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{'title': 'Article 1'}], '_collections': ['Literature'], 'authors': [{'full_name': 'John Doe'}] } rec = InspireRecord.create(data=record_json) rec.commit() record_json2 = { 'control_number': 843386521, '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{'title': 'Article 2'}], '_collections': ['Literature'], 'authors': [{'full_name': 'John Doe'}, {'full_name': 'John Doe2'}] } rec2 = InspireRecord.create(data=record_json2) rec2.commit() db.session.commit() es.indices.refresh('records-hep') response = api_client.get('/literature?q=&author=BAI_John%20Doe') data = json.loads(response.data) response_recids = [record['metadata']['control_number'] for record in data['hits']['hits']] assert rec['control_number'] in response_recids assert rec2['control_number'] in response_recids response = api_client.get('/literature?q=&author=BAI_John%20Doe&author=BAI_John%20Doe2') data = json.loads(response.data) response_recids = [record['metadata']['control_number'] for record in data['hits']['hits']] assert rec['control_number'] not in response_recids assert rec2['control_number'] in response_recids _delete_record('lit', 843386527) _delete_record('lit', 843386521) db.session.commit()
def dummy_record(workflow_app): record = InspireRecord.create({ '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': ['Literature'], 'document_type': ['thesis'], 'titles': [{'title': 'foo'}], }) yield record record._delete(force=True)
def record_to_merge(workflow_app): json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': [ 'Literature' ], 'authors': [ { 'full_name': 'Jessica, Jones', }, ], 'document_type': [ 'thesis' ], 'number_of_pages': 100, 'preprint_date': '2016-11-16', 'public_notes': [ { 'source': 'arXiv', 'value': '100 pages, 36 figures' } ], 'titles': [ { 'title': 'Alias Investigations' } ], 'dois': [ { 'value': '10.1007/978-3-319-15001-7' } ], } record = InspireRecord.create(json, id_=None, skip_files=True) record.commit() rec_uuid = record.id db.session.commit() es.indices.refresh('records-hep') yield record record = InspireRecord.get_record(rec_uuid) pid = PersistentIdentifier.get( pid_type='lit', pid_value=record['control_number'] ) pid.unassign() pid.delete() record.delete() record.commit()
def dummy_record(workflow_app): record = InspireRecord.create({ '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': ['Literature'], 'document_type': ['thesis'], 'titles': [{ 'title': 'foo' }], }) yield record record._delete(force=True)
def test_create_with_skip_files_param_overrides_records_skip_files_conf_and_does_add_documents_or_figures(app): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 1, 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], '_collections': [ 'Literature' ], 'figures': [{ 'key': 'graph.png', 'url': 'http://www.mdpi.com/2218-1997/3/1/24/png', }], 'documents': [{ 'key': 'arXiv:1710.01187.pdf', 'url': '/afs/cern.ch/project/inspire/PROD/var/data/files/g151/3037619/content.pdf;1', }] # record/1628455/export/xme -- with some modification } expected_document_file_content = 'dummy body' expected_document_key = '1_graph.png' expected_figure_file_content = 'dummy body' expected_figure_key = '1_graph.png' with patch.dict(app.config, {'RECORDS_SKIP_FILES': True}): with patch( 'inspirehep.modules.records.api.fsopen', mock_open(read_data=expected_figure_file_content), ): record = InspireRecord.create(record_json, skip_files=False) assert len(record.files) == 2 assert expected_document_key in record.files.keys assert len(record['documents']) == len(record_json['documents']) document_file_content = open( record.files[expected_document_key].obj.file.uri ).read() assert document_file_content == expected_document_file_content assert expected_figure_key in record.files.keys assert len(record['figures']) == len(record_json['figures']) figure_file_content = open( record.files[expected_figure_key].obj.file.uri ).read() assert figure_file_content == expected_figure_file_content
def test_create_with_skip_files_param_overrides_records_skip_files_conf_and_does_add_documents_or_figures(isolated_app): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 1, 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], '_collections': [ 'Literature' ], 'figures': [{ 'key': 'graph.png', 'url': 'http://www.mdpi.com/2218-1997/3/1/24/png', }], 'documents': [{ 'key': 'arXiv:1710.01187.pdf', 'url': '/afs/cern.ch/project/inspire/PROD/var/data/files/g151/3037619/content.pdf;1', }] # record/1628455/export/xme -- with some modification } expected_document_file_content = 'dummy body' expected_document_key = '1_graph.png' expected_figure_file_content = 'dummy body' expected_figure_key = '1_graph.png' with patch.dict(isolated_app.config, {'RECORDS_SKIP_FILES': True}): with patch( 'inspirehep.modules.records.api.fsopen', mock_open(read_data=expected_figure_file_content), ): record = InspireRecord.create(record_json, skip_files=False) assert len(record.files) == 2 assert expected_document_key in record.files.keys assert len(record['documents']) == len(record_json['documents']) document_file_content = open( record.files[expected_document_key].obj.file.uri ).read() assert document_file_content == expected_document_file_content assert expected_figure_key in record.files.keys assert len(record['figures']) == len(record_json['figures']) figure_file_content = open( record.files[expected_figure_key].obj.file.uri ).read() assert figure_file_content == expected_figure_file_content
def record_from_db(workflow_app): json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': ['Literature'], 'document_type': ['article'], 'titles': [{ 'title': 'Fancy title for a new record' }], 'arxiv_eprints': [{ 'categories': ['hep-th'], 'value': '1407.7587' }], 'control_number': 1234, 'authors': [ { 'full_name': 'Maldacena, J.' }, { 'full_name': 'Strominger, A.' }, ], 'abstracts': [{ 'source': 'arxiv', 'value': 'A basic abstract.' }], 'report_numbers': [{ 'value': 'DESY-17-036' }] } record = InspireRecord.create(json, id_=None, skip_files=True) record.commit() rec_uuid = record.id db.session.commit() es.indices.refresh('records-hep') yield record record = InspireRecord.get_record(rec_uuid) pid = PersistentIdentifier.get(pid_type='lit', pid_value=record['control_number']) pid.unassign() pid.delete() record.delete() record.commit()
def store_record(obj, eng): """Insert or replace a record.""" def _get_updated_record(obj): """TODO: use only head_uuid once we have the merger.""" if 'head_uuid' in obj.extra_data: updated_record = InspireRecord.get_record( obj.extra_data['head_uuid'], ) else: pid_type = get_pid_type_from_schema(obj.data['$schema']) updated_record_id = obj.extra_data['matches']['approved'] updated_record = get_db_record(pid_type, updated_record_id) return updated_record is_update = obj.extra_data.get('is-update') is_authors = eng.workflow_definition.data_type == 'authors' if is_update: if not is_authors and not current_app.config.get( 'FEATURE_FLAG_ENABLE_MERGER', False): obj.log.info( 'skipping update record, feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is disabled.' ) return record = _get_updated_record(obj) obj.data['control_number'] = record['control_number'] record.clear() record.update(obj.data, files_src_records=[obj]) else: # Skip the files to avoid issues in case the record has already pid # TODO: remove the skip files once labs becomes master record = InspireRecord.create(obj.data, id_=None, skip_files=True) # Create persistent identifier. # Now that we have a recid, we can properly download the documents record.download_documents_and_figures(src_records=[obj]) obj.data['control_number'] = record['control_number'] # store head_uuid to store the root later obj.extra_data['head_uuid'] = str(record.id) record.commit() obj.save() db.session.commit()
def test_update_handles_figures(app): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 1, 'document_type': [ 'article', ], 'titles': [ { 'title': 'foo' }, ], '_collections': ['Literature'], # DESY harvest } update_to_record = { 'figures': [{ 'key': 'graph.png', 'url': 'http://www.mdpi.com/2218-1997/3/1/24/png', }], } expected_file_content = 'dummy body' expected_key = '1_graph.png' record = InspireRecord.create(record_json) assert not len(record.files) record.clear() updated_json = record_json updated_json.update(copy.deepcopy(update_to_record)) mocked_addresses = [{ 'method': 'GET', 'url': 'http://www.mdpi.com/2218-1997/3/1/24/png', 'body': StringIO.StringIO(expected_file_content), }] with mock_addresses(mocked_addresses): record.update(updated_json) assert expected_key in record.files.keys assert len(record.files) == len(update_to_record['figures']) assert len(record['figures']) == len(update_to_record['figures']) file_content = open(record.files[expected_key].obj.file.uri).read() assert file_content == expected_file_content
def test_records_files_attached_correctly(app): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], '_collections': [ 'Literature' ] } record = InspireRecord.create(record_json) record.files['fulltext.pdf'] = StringIO.StringIO() record.commit() assert 'fulltext.pdf' in record.files
def test_receive_after_model_commit(app): """Test if records are correctly synced with ElasticSearch.""" json = { "$schema": "http://localhost:5000/schemas/records/hep.json", "Hello": "World" } record = InspireRecord.create(json) search = LiteratureSearch() es_record = search.get_source(record.id) assert es_record["Hello"] == "World" record["Hello"] = "INSPIRE" record.commit() es_record = search.get_source(record.id) assert es_record["Hello"] == "INSPIRE" record._delete(force=True) with pytest.raises(NotFoundError): es_record = search.get_source(record.id)
def test_update_handles_documents(app): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 1, 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], '_collections': [ 'Literature' ], # DESY harvest } update_to_record = { 'documents': [{ 'key': 'Fulltext.pdf', 'url': 'http://www.mdpi.com/2218-1997/3/1/24/pdf', }], } expected_file_content = 'dummy body' expected_key = '1_Fulltext.pdf' record = InspireRecord.create(record_json) assert not len(record.files) record.clear() updated_json = record_json updated_json.update(copy.deepcopy(update_to_record)) with patch( 'inspirehep.modules.records.api.fsopen', mock_open(read_data=expected_file_content), ): record.update(updated_json) assert expected_key in record.files.keys assert len(record.files) == len(update_to_record['documents']) assert len(record['documents']) == len(update_to_record['documents']) file_content = open(record.files[expected_key].obj.file.uri).read() assert file_content == expected_file_content
def test_update_handles_figures(isolated_app): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 1, 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], '_collections': [ 'Literature' ], # DESY harvest } update_to_record = { 'figures': [{ 'key': 'graph.png', 'url': 'http://www.mdpi.com/2218-1997/3/1/24/png', }], } expected_file_content = 'dummy body' expected_key = '1_graph.png' record = InspireRecord.create(record_json) assert not len(record.files) record.clear() updated_json = record_json updated_json.update(copy.deepcopy(update_to_record)) with patch( 'inspirehep.modules.records.api.fsopen', mock_open(read_data=expected_file_content), ): record.update(updated_json) assert expected_key in record.files.keys assert len(record.files) == len(update_to_record['figures']) assert len(record['figures']) == len(update_to_record['figures']) file_content = open(record.files[expected_key].obj.file.uri).read() assert file_content == expected_file_content
def test_records_files_attached_correctly(isolated_app): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], '_collections': [ 'Literature' ] } record = InspireRecord.create(record_json) record.files['fulltext.pdf'] = StringIO.StringIO() record.commit() assert 'fulltext.pdf' in record.files
def test_new_record(self): recid = 9999912587 record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': recid, 'titles': [ { 'title': 'Jessica Jones', }, ], '_collections': ['Literature'], 'references': [{ 'record': { '$ref': 'http://localhost:5000/api/literature/1498589' } }] } inspire_record = InspireRecord.create(record_json) with override_config(FEATURE_FLAG_ENABLE_ORCID_PUSH=True, FEATURE_FLAG_ORCID_PUSH_WHITELIST_REGEX='.*', ORCID_APP_CREDENTIALS={'consumer_key': '0000-0001-8607-8906'}), \ mock.patch('inspirehep.modules.records.receivers.push_access_tokens') as mock_push_access_tokens, \ mock.patch('inspirehep.modules.orcid.tasks.orcid_push.apply_async') as mock_apply_async: mock_push_access_tokens.get_access_tokens.return_value = [ ('myorcid', 'mytoken') ] inspire_record.commit() mock_apply_async.assert_called_once_with(kwargs={ 'orcid': 'myorcid', 'oauth_token': 'mytoken', 'kwargs_to_pusher': { 'record_db_version': inspire_record.model.version_id }, 'rec_id': recid }, queue='orcid_push') _delete_record('lit', recid)
def record_to_merge(workflow_app): json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': ['Literature'], 'authors': [ { 'full_name': 'Jessica, Jones', }, ], 'document_type': ['thesis'], 'number_of_pages': 100, 'preprint_date': '2016-11-16', 'public_notes': [{ 'source': 'arXiv', 'value': '100 pages, 36 figures' }], 'titles': [{ 'title': 'Alias Investigations' }], 'dois': [{ 'value': '10.1007/978-3-319-15001-7' }], } record = InspireRecord.create(json, id_=None, skip_files=True) record.commit() rec_uuid = record.id db.session.commit() es.indices.refresh('records-hep') yield record record = InspireRecord.get_record(rec_uuid) pid = PersistentIdentifier.get(pid_type='lit', pid_value=record['control_number']) pid.unassign() pid.delete() record.delete() record.commit()
def test_that_db_changes_are_mirrored_in_es(app): search = LiteratureSearch() json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'titles': [ { 'title': 'foo' }, ], '_collections': ['Literature'] } # When a record is created in the DB, it is also created in ES. record = InspireRecord.create(json) record.commit() db.session.commit() es_record = search.get_source(record.id) assert get_title(es_record) == 'foo' # When a record is updated in the DB, is is also updated in ES. record['titles'][0]['title'] = 'bar' record.commit() db.session.commit() es_record = search.get_source(record.id) assert get_title(es_record) == 'bar' # When a record is deleted in the DB, it is also deleted in ES. record._delete(force=True) db.session.commit() with pytest.raises(NotFoundError): es_record = search.get_source(record.id)
def record_insert_or_replace(json): """Insert or replace a record.""" control_number = json.get('control_number', json.get('recid')) if control_number: pid_type = get_pid_type_from_schema(json['$schema']) try: pid = PersistentIdentifier.get(pid_type, control_number) record = InspireRecord.get_record(pid.object_uuid) record.clear() record.update(json) record.commit() except PIDDoesNotExistError: record = InspireRecord.create(json, id_=None) # Create persistent identifier. inspire_recid_minter(str(record.id), json) if json.get('deleted'): new_recid = get_recid_from_ref(json.get('new_record')) if not new_recid: record.delete() return record
def store_record(obj, eng): """Insert or replace a record.""" is_update = obj.extra_data.get('is-update') if is_update: record = InspireRecord.get_record(obj.extra_data['head_uuid']) record.clear() record.update(obj.data, files_src_records=[obj]) else: record = InspireRecord.create(obj.data, id_=None) # Create persistent identifier. created_pid = inspire_recid_minter(str(record.id), record).pid_value # Now that we have a recid, we can properly download the documents record.download_documents_and_figures(src_records=[obj]) obj.data['control_number'] = created_pid # store head_uuid to store the root later obj.extra_data['head_uuid'] = str(record.id) record.commit() obj.save() db.session.commit()
def record_from_db(workflow_app): json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': ['Literature'], 'document_type': ['article'], 'titles': [{'title': 'Fancy title for a new record'}], 'arxiv_eprints': [ {'categories': ['hep-th'], 'value': '1407.7587'} ], 'control_number': 1234, 'authors': [ {'full_name': 'Maldacena, J.'}, {'full_name': 'Strominger, A.'}, ], 'abstracts': [ {'source': 'arxiv', 'value': 'A basic abstract.'} ], 'report_numbers': [{'value': 'DESY-17-036'}] } record = InspireRecord.create(json, id_=None, skip_files=True) record.commit() rec_uuid = record.id db.session.commit() es.indices.refresh('records-hep') yield record record = InspireRecord.get_record(rec_uuid) pid = PersistentIdentifier.get( pid_type='lit', pid_value=record['control_number'] ) pid.unassign() pid.delete() record.delete() record.commit()
def store_record(obj, eng): """Insert or replace a record.""" def _get_updated_record(obj): """TODO: use only head_uuid once we have them merger.""" if 'head_uuid' in obj.extra_data: updated_record = InspireRecord.get_record( obj.extra_data['head_uuid'], ) else: pid_type = get_pid_type_from_schema(obj.data['$schema']) updated_record_id = obj.extra_data['record_matches'][0] updated_record = get_db_record(pid_type, updated_record_id) return updated_record is_update = obj.extra_data.get('is-update') if is_update: record = _get_updated_record(obj) obj.data['control_number'] = record['control_number'] record.clear() record.update(obj.data, files_src_records=[obj]) else: # Skip the files to avoid issues in case the record has already pid # TODO: remove the skip files once labs becomes master record = InspireRecord.create(obj.data, id_=None, skip_files=True) # Create persistent identifier. created_pid = inspire_recid_minter(str(record.id), record).pid_value # Now that we have a recid, we can properly download the documents record.download_documents_and_figures(src_records=[obj]) obj.data['control_number'] = created_pid # store head_uuid to store the root later obj.extra_data['head_uuid'] = str(record.id) record.commit() obj.save() db.session.commit()
def store_record(obj, eng): """Insert or replace a record.""" is_update = obj.extra_data.get('is-update') is_authors = eng.workflow_definition.data_type == 'authors' if not current_app.config.get( "FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT"): with db.session.begin_nested(): if is_update: if not is_authors and not current_app.config.get( 'FEATURE_FLAG_ENABLE_MERGER', False): obj.log.info( 'skipping update record, feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is disabled.' ) return record = InspireRecord.get_record(obj.extra_data['head_uuid']) obj.data['control_number'] = record['control_number'] record.clear() record.update(obj.data, files_src_records=[obj]) else: # Skip the files to avoid issues in case the record has already pid # TODO: remove the skip files once labs becomes master record = InspireRecord.create(obj.data, id_=None, skip_files=True) # Create persistent identifier. # Now that we have a recid, we can properly download the documents record.download_documents_and_figures(src_records=[obj]) obj.data['control_number'] = record['control_number'] # store head_uuid to store the root later obj.extra_data['head_uuid'] = str(record.id) record.commit() obj.save() else: store_record_inspirehep_api(obj, eng, is_update, is_authors)
def store_record(obj, *args, **kwargs): """Create and index new record in main record space.""" obj.log.debug('Storing record: \n%s', pformat(obj.data)) assert "$schema" in obj.data, "No $schema attribute found!" # Create record # FIXME: Do some preprocessing of obj.data before creating a record so that # we're sure that the schema will be validated without touching the full # holdingpen stack. record = InspireRecord.create(obj.data, id_=None) # Create persistent identifier. inspire_recid_minter(str(record.id), record) # Commit any changes to record record.commit() # Dump any changes to record obj.data = record.dumps() # Commit to DB before indexing db.session.commit()
def store_record(obj, *args, **kwargs): """Create and index new record in main record space.""" obj.log.debug('Storing record: \n%s', pformat(obj.data)) assert "$schema" in obj.data, "No $schema attribute found!" # Create record # FIXME: Do some preprocessing of obj.data before creating a record so that # we're sure that the schema will be validated without touching the full # holdingpen stack. record = InspireRecord.create(obj.data, id_=None) # Create persistent identifier. inspire_recid_minter(str(record.id), record) # Commit any changes to record record.commit() # Dump any changes to record obj.data = record.dumps() # Commit to DB before indexing db.session.commit()
def store_record(obj, eng): """Insert or replace a record.""" def _get_updated_record(obj): """TODO: use only head_uuid once we have them merger.""" if 'head_uuid' in obj.extra_data: updated_record = InspireRecord.get_record( obj.extra_data['head_uuid'], ) else: pid_type = get_pid_type_from_schema(obj.data['$schema']) updated_record_id = obj.extra_data['record_matches'][0] updated_record = get_db_record(pid_type, updated_record_id) return updated_record is_update = obj.extra_data.get('is-update') if is_update: record = _get_updated_record(obj) obj.data['control_number'] = record['control_number'] record.clear() record.update(obj.data, files_src_records=[obj]) else: # Skip the files to avoid issues in case the record has already pid # TODO: remove the skip files once labs becomes master record = InspireRecord.create(obj.data, id_=None, skip_files=True) # Create persistent identifier. created_pid = inspire_recid_minter(str(record.id), record).pid_value # Now that we have a recid, we can properly download the documents record.download_documents_and_figures(src_records=[obj]) obj.data['control_number'] = created_pid # store head_uuid to store the root later obj.extra_data['head_uuid'] = str(record.id) record.commit() obj.save() db.session.commit()
def test_record_enhanced_in_es_and_not_enhanced_in_db(app): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 111, 'titles': [ { 'title': 'Jessica Jones', }, ], '_collections': ['Literature'], 'references': [{'record': {'$ref': 'http://localhost:5000/api/literature/1498589'}}] } record = InspireRecord.create(record_json) record.commit() db.session.commit() es.indices.refresh('records-hep') rec1 = get_db_record('lit', 111) rec2 = get_es_record('lit', 111) assert 'facet_author_name' not in rec1 assert 'facet_author_name' in rec2 _delete_record('lit', 111)
def test_regression_author_count_10_does_not_display_zero_facet(isolated_api_client): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{'title': 'Article with 10 authors'}], '_collections': ['Literature'], 'authors': [] } for i in range(10): record_json['authors'].append({'full_name': 'Pincopallino' + str(i)}) rec = InspireRecord.create(data=record_json) rec.commit() db.session.commit() es.indices.refresh('records-hep') response_facets = isolated_api_client.get('/literature/facets?q=ac%2010') response_records = isolated_api_client.get('/literature?q=ac%2010') # we don't have isolation on tests and are inconsistent between test # environments. data_facets = json.loads(response_facets.data) data_records = json.loads(response_records.data) assert data_facets['aggregations']['author_count']['buckets'][0]['doc_count'] == data_records['hits']['total']
def test_regression_index_after_commit_retries_for_new_record_not_yet_in_db( mocked_indexing_task, mocked_permission_check, app, ): # this test doesn't use the isolated_app because it needs to commit to # the DB in order to create records versions. json_data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'This is the record being cited' }], 'control_number': 9999, '_collections': ['Literature'] } cited = InspireRecord.create(data=json_data, skip_files=True) cited.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', 9999, 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 0 assert get_citations_from_es(es_rec).total == 0 citing_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'Record citing the first one' }], '_collections': ['Literature'], 'control_number': 8888, 'references': [{ 'record': { '$ref': 'http://localhost:5000/api/literature/9999' }, 'reference': { 'authors': [{ 'full_name': 'Smith, J.' }], } }] } record = InspireRecord.create(data=citing_json, skip_files=True) record.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', record['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task pretending record is not committed yet to DB _delete_record('lit', record['control_number']) with pytest.raises(RecordGetterError): # XXX: celery in eager mode does not retry, so it raises the first time index_modified_citations_from_record(*expected_args) _delete_record('lit', cited['control_number'])
def test_index_after_commit_indexes_raises_if_cited_records_are_not_in_db( mocked_indexing_task, mocked_permission_check, app, ): # this test doesn't use the isolated_app because it needs to commit to # the DB in order to create records versions. citing_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'Record citing the first one' }], '_collections': ['Literature'], 'control_number': 8888, 'references': [{ "reference": { 'authors': [{ 'full_name': 'Smith, J.' }] } }] } record = InspireRecord.create(data=citing_json, skip_files=True) record.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = 'lit', record['control_number'], 2 mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) references = { 'references': [{ "curated_relation": False, "record": { "$ref": "http://localhost:5000/api/literature/9999" }, "reference": { 'authors': [{ 'full_name': 'Smith, J.' }], } }] } citing_json.update(references) record.clear() record.update(citing_json) record.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', record['control_number'], 3) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task with pytest.raises(MissingCitedRecordError): index_modified_citations_from_record(*expected_args) _delete_record('lit', 8888)
def test_index_after_commit_indexes_also_cites_record_when_new_citation_is_added( mocked_indexing_task, mocked_permission_check, app, ): # this test doesn't use the isolated_app because it needs to commit to # the DB in order to create records versions. json_data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'This is the record being cited' }], 'control_number': 9999, '_collections': ['Literature'] } cited = InspireRecord.create(data=json_data, skip_files=True) db.session.commit() es.indices.refresh('records-hep') expected_args = 'lit', cited['control_number'], 1 mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 0 assert get_citations_from_es(es_rec).total == 0 citing_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'Record citing the first one' }], '_collections': ['Literature'], 'control_number': 8888, 'references': [{ "reference": { 'authors': [{ 'full_name': 'Smith, J.' }] } }] } record = InspireRecord.create(data=citing_json, skip_files=True) db.session.commit() es.indices.refresh('records-hep') expected_args = 'lit', record['control_number'], 1 mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 0 assert get_citations_from_es(es_rec).total == 0 references = { 'references': [{ "curated_relation": False, "record": { "$ref": "http://localhost:5000/api/literature/9999" }, "reference": { 'authors': [{ 'full_name': 'Smith, J.' }], } }] } citing_json.update(references) record.clear() record.update(citing_json) record.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = 'lit', record['control_number'], 2 mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 1 assert get_citations_from_es(es_rec).total == 1 _delete_record('lit', 8888) _delete_record('lit', 9999)
def test_index_after_commit_indexes_also_cites_two_records( mocked_indexing_task, mocked_permission_check, app, ): # this test doesn't use the isolated_app because it needs to commit to # the DB in order to create records versions. json1 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'This is the record being cited' }], 'control_number': 9999, '_collections': ['Literature'] } cited1 = InspireRecord.create(data=json1, skip_files=True) cited1.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', cited1['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) json2 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'This also is the record being cited' }], 'control_number': 9998, '_collections': ['Literature'] } cited2 = InspireRecord.create(data=json2, skip_files=True) cited2.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', cited2['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec1 = get_es_record('lit', 9999) es_rec2 = get_es_record('lit', 9998) assert es_rec1['citation_count'] == 0 assert es_rec2['citation_count'] == 0 assert get_citations_from_es(es_rec1).total == 0 assert get_citations_from_es(es_rec2).total == 0 citing_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'Record citing the first one' }], '_collections': ['Literature'], 'control_number': 8888, 'references': [{ 'reference': { 'authors': [{ 'full_name': 'Smith, J.' }], } }] } record = InspireRecord.create(data=citing_json, skip_files=True) record.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', record['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec1 = get_es_record('lit', 9999) es_rec2 = get_es_record('lit', 9998) assert es_rec1['citation_count'] == 0 assert es_rec2['citation_count'] == 0 assert get_citations_from_es(es_rec1).total == 0 assert get_citations_from_es(es_rec2).total == 0 references = { 'references': [{ 'record': { '$ref': 'http://localhost:5000/api/literature/9998' }, }, { 'record': { '$ref': 'http://localhost:5000/api/literature/9999' }, }] } citing_json.update(references) record.clear() record.update(citing_json) record.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', record['control_number'], 3) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec1 = get_es_record('lit', 9999) es_rec2 = get_es_record('lit', 9998) assert es_rec1['citation_count'] == 1 assert es_rec2['citation_count'] == 1 assert get_citations_from_es(es_rec1).total == 1 assert get_citations_from_es(es_rec2).total == 1 _delete_record('lit', record['control_number']) _delete_record('lit', cited1['control_number']) _delete_record('lit', cited2['control_number'])
def test_index_after_commit_indexes_also_cites_record_when_citer_is_deleted( mocked_indexing_task, mocked_permission_check, app, ): # this test doesn't use the isolated_app because it needs to commit to # the DB in order to create records versions. json_data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'This is the record being cited' }], 'control_number': 9999, '_collections': ['Literature'] } cited = InspireRecord.create(data=json_data, skip_files=True) cited.commit() db.session.commit() current_search.flush_and_refresh('records-hep') expected_args = ('lit', 9999, 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 0 assert LiteratureSearch.citations(es_rec).total == 0 citing_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'Record citing the first one' }], '_collections': ['Literature'], 'control_number': 8888, 'references': [{ 'record': { '$ref': 'http://localhost:5000/api/literature/9999' }, 'reference': { 'authors': [{ 'full_name': 'Smith, J.' }], } }] } record = InspireRecord.create(data=citing_json, skip_files=True) record.commit() db.session.commit() current_search.flush_and_refresh('records-hep') expected_args = ('lit', record['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 1 assert LiteratureSearch.citations(es_rec).total == 1 record.delete() record.commit() db.session.commit() current_search.flush_and_refresh('records-hep') expected_args = ('lit', record['control_number'], 3) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 0 assert LiteratureSearch.citations(es_rec).total == 0 _delete_record('lit', record['control_number']) _delete_record('lit', cited['control_number'])
def test_update_with_only_new(app): doc1_expected_file_content = 'doc1 body' doc1_expected_key = '1_Fulltext.pdf' doc2_expected_file_content = 'doc2 body' doc2_expected_key = '1_Fulltext.pdf_1' record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 1, 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], '_collections': [ 'Literature' ], # DESY harvest 'documents': [{ 'key': 'Fulltext.pdf', 'url': '/some/non/existing/path.pdf', }], } update_to_record = { 'documents': [ { 'key': doc1_expected_key, 'url': '/api/files/somebucket/somefile', }, { 'key': 'Fulltext.pdf', 'url': 'http://www.mdpi.com/2218-1997/3/1/24/pdf', }, ], } record = InspireRecord.create(record_json) assert doc1_expected_key in record.files.keys assert len(record.files) == len(record_json['documents']) assert len(record['documents']) == len(record_json['documents']) file_content = open(record.files[doc1_expected_key].obj.file.uri).read() assert file_content == doc1_expected_file_content doc1_old_api_url = record['documents'][0]['url'] record.clear() record_json.update(copy.deepcopy(update_to_record)) with patch( 'inspirehep.modules.records.api.fsopen', mock_open(read_data=doc2_expected_file_content), ): record.update(record_json, only_new=True) assert len(record['documents']) == len(update_to_record['documents']) for document in record['documents']: assert document['key'] in [doc1_expected_key, doc2_expected_key] if document['key'] == doc1_expected_key: file_content = open( record.files[doc1_expected_key].obj.file.uri ).read() assert file_content == doc1_expected_file_content assert document['url'] == doc1_old_api_url elif document['key'] == doc2_expected_key: file_content = open( record.files[doc2_expected_key].obj.file.uri ).read() assert file_content == doc2_expected_file_content
def test_literature_citations_api_with_superseded_records(app, api_client): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 111, 'titles': [ { 'title': 'Jessica Jones', }, ], '_collections': ['Literature'] } record = InspireRecord.create(record_json) record.commit() citing_superseded_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'related_records': [{ 'record': { '$ref': 'https://link-to-successor' }, 'relation': 'successor' }], 'document_type': [ 'article', ], 'control_number': 222, 'titles': [ { 'title': 'Frank Castle', }, ], 'references': [{ 'record': { '$ref': record._get_ref() } }], '_collections': ['Literature'] } citing_superseded_record = InspireRecord.create(citing_superseded_json) citing_superseded_record.commit() db.session.commit() current_search.flush_and_refresh('records-hep') response = api_client.get('/literature/111/citations', headers={'Accept': 'application/json'}) result = json.loads(response.get_data(as_text=True)) expected_metadata = {"citation_count": 0, "citations": []} expected_metadata['citations'].sort() result['metadata']['citations'].sort() assert response.status_code == 200 assert expected_metadata == result['metadata'] _delete_record('lit', 111) _delete_record('lit', 222)
def test_literature_citations_api_sorted_by_earliest_date(api_client): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 111, 'titles': [ { 'title': 'Jessica Jones', }, ], '_collections': ['Literature'] } record = InspireRecord.create(record_json) record.commit() record_json_ref_1 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 222, 'titles': [ { 'title': 'Frank Castle', }, ], 'preprint_date': '2013-10-08', 'references': [ { 'record': { '$ref': record._get_ref() } } ], '_collections': ['Literature'] } record_ref_1 = InspireRecord.create(record_json_ref_1) record_ref_1.commit() record_json_ref_2 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'preprint_date': '2015-10-08', 'control_number': 333, 'titles': [ { 'title': 'Luke Cage', }, ], 'references': [ { 'record': { '$ref': record._get_ref() } } ], '_collections': ['Literature'] } record_ref_2 = InspireRecord.create(record_json_ref_2) record_ref_2.commit() record_json_ref_3 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'preprint_date': '2015-11-08', 'control_number': 444, 'titles': [ { 'title': 'John Doe', }, ], 'references': [ { 'record': { '$ref': record._get_ref() } } ], '_collections': ['Literature'] } record_ref_3 = InspireRecord.create(record_json_ref_3) record_ref_3.commit() db.session.commit() es.indices.refresh('records-hep') response = api_client.get( '/literature/111/citations', headers={'Accept': 'application/json'} ) result = json.loads(response.get_data(as_text=True)) expected_metadata = { "citation_count": 3, "citations": [ { "control_number": 444, "titles": [ { "title": "John Doe" } ], "earliest_date": "2015-11-08" }, { "control_number": 333, "titles": [ { "title": "Luke Cage" } ], "earliest_date": "2015-10-08" }, { "control_number": 222, "titles": [ { "title": "Frank Castle" } ], "earliest_date": "2013-10-08" } ] } assert response.status_code == 200 assert expected_metadata == result['metadata'] _delete_record('lit', 111) _delete_record('lit', 222) _delete_record('lit', 333) _delete_record('lit', 444)
def test_literature_citations_api_with_not_existing_pid_value(api_client): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 111, 'titles': [ { 'title': 'Jessica Jones', }, ], '_collections': ['Literature'] } record = InspireRecord.create(record_json) record.commit() record_json_ref_1 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 222, 'titles': [ { 'title': 'Frank Castle', }, ], 'references': [{ 'record': { '$ref': record._get_ref() } }], '_collections': ['Literature'] } record_ref_1 = InspireRecord.create(record_json_ref_1) record_ref_1.commit() record_json_ref_2 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 333, 'titles': [ { 'title': 'Luke Cage', }, ], 'references': [{ 'record': { '$ref': record._get_ref() } }], '_collections': ['Literature'] } record_ref_2 = InspireRecord.create(record_json_ref_2) record_ref_2.commit() db.session.commit() current_search.flush_and_refresh('records-hep') response = api_client.get('/literature/444/citations', headers={'Accept': 'application/json'}) assert response.status_code == 404 _delete_record('lit', 111) _delete_record('lit', 222) _delete_record('lit', 333)
def test_literature_citations_api_with_full_citing_record(api_client): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 111, 'titles': [ { 'title': 'Jessica Jones', }, ], '_collections': ['Literature'] } record = InspireRecord.create(record_json) record.commit() record_json_ref_1 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 222, 'titles': [ { 'title': 'Frank Castle', }, ], 'authors': [{ "full_name": "Urhan, Ahmet", }], 'publication_info': [{ "artid": "HAL Id : hal-01735421, https://hal.archives-ouvertes.fr/hal-01735421", "page_start": "1", }], 'references': [{ 'record': { '$ref': record._get_ref() } }], '_collections': ['Literature'] } record_ref_1 = InspireRecord.create(record_json_ref_1) record_ref_1.commit() record_json_ref_2 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 333, 'titles': [ { 'title': 'Luke Cage', }, ], 'references': [{ 'record': { '$ref': record._get_ref() } }], '_collections': ['Literature'] } record_ref_2 = InspireRecord.create(record_json_ref_2) record_ref_2.commit() db.session.commit() current_search.flush_and_refresh('records-hep') response = api_client.get('/literature/111/citations', headers={'Accept': 'application/json'}) result = json.loads(response.get_data(as_text=True)) result['metadata']['citations'].sort() expected_metadata = { "citation_count": 2, "citations": [{ 'authors': [{ "full_name": "Urhan, Ahmet", "first_name": "Ahmet", "last_name": "Urhan", "signature_block": "URANa", "uuid": result['metadata']['citations'][1]['authors'][0]['uuid'] }], 'control_number': 222, 'titles': [ { 'title': 'Frank Castle', }, ] }, { "control_number": 333, "titles": [{ "title": "Luke Cage" }] }] } assert response.status_code == 200 expected_metadata['citations'].sort() assert expected_metadata == result['metadata'] _delete_record('lit', 111) _delete_record('lit', 222) _delete_record('lit', 333)
def test_create_with_multiple_source_records(isolated_app): expected_file_content = 'dummy body' rec1_expected_key = '1_Fulltext.pdf' rec2_expected_key = '2_Fulltext.pdf' rec3_expected_keys = [ '3_Fulltext.pdf', '3_Fulltext.pdf_1', ] record1_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 1, 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], '_collections': [ 'Literature' ], # DESY harvest 'documents': [{ 'key': 'Fulltext.pdf', 'url': '/some/non/existing/path.pdf', 'description': 'record1 document', }], } record2_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 2, 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], '_collections': [ 'Literature' ], # DESY harvest 'documents': [{ 'key': 'Fulltext.pdf', 'url': '/some/non/existing/path.pdf', 'description': 'record2 document', }], } record3_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 3, 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], '_collections': [ 'Literature' ], # DESY harvest } record1 = InspireRecord.create(record1_json) rec1_file_content = open( record1.files[rec1_expected_key].obj.file.uri ).read() assert rec1_file_content == expected_file_content record2 = InspireRecord.create(record2_json) rec2_file_content = open( record2.files[rec2_expected_key].obj.file.uri ).read() assert rec2_file_content == expected_file_content record3_json['documents'] = copy.deepcopy(record1['documents']) record3_json['documents'].extend(copy.deepcopy(record2['documents'])) record3 = InspireRecord.create( record3_json, files_src_records=[record1, record2], ) assert len(record3.files) == ( len(record1_json['documents']) + len(record2_json['documents']) ) assert rec3_expected_keys == record3.files.keys for file_key in record3.files.keys: rec3_file_content = open( record3.files[file_key].obj.file.uri ).read() assert rec3_file_content == expected_file_content expected_descs = [ orig_doc['description'] for orig_doc in record3_json['documents'] ] current_descs = [ doc['description'] for doc in record3['documents'] ] assert current_descs == expected_descs
def create_author(profile): """Create a new author profile based on a given signature. The method receives a dictionary representing an author. Based on the values, it creates a dictionary in the invenio_records format. After all the fields are processed, the method calls create_record from invenio_records.api to put the new record. :param profile: A signature representing an author's to be created as a profile. Example: profile = {u'affiliations': [{u'value': u'Yerevan Phys. Inst.'}], u'alternative_name': None, u'curated_relation': False, u'email': None, u'full_name': u'Chatrchyan, Serguei', u'inspire_id': None, u'orcid': None, u'profile': u'', u'recid': None, u'role': None, u'uuid': u'd63537a8-1df4-4436-b5ed-224da5b5028c'} :return: A recid, where the new profile can be accessed. Example: "1234" """ name = profile.get('full_name') # Template of an initial record. record = {'collections': [{'primary': 'HEPNAMES'}], 'name': {'value': name}, '$schema': _get_author_schema()} # The author's email address. # Unfortunately the method will not correlate a given e-mail address # with an affiliation. if 'email' in profile: email = profile.get('email') record['positions'] = [] record['positions'].append({'email': email}) # The author can be a member of more than one affiliation. if 'affiliations' in profile: affiliations = profile.get('affiliations') if 'positions' not in record: record['positions'] = [] for affiliation in affiliations: name = affiliation.get('value') recid = affiliation.get('recid', None) if recid: record['positions'].append( {'institution': {'name': name, 'recid': recid}}) else: record['positions'].append( {'institution': {'name': name}}) # FIXME: The method should also collect the useful data # from the publication, like category field, subject, # etc. # Disconnect the signal on insert of a new record. after_record_insert.disconnect(append_new_record_to_queue) # Create a new author profile. record = InspireRecord.create(record, id_=None) # Create Inspire recid. record_pid = inspire_recid_minter(record.id, record) # Extend the new record with Inspire recid and self key. record['control_number'] = record_pid.pid_value record['self'] = inspire_dojson_utils.get_record_ref( record_pid.pid_value, 'authors') # Apply the changes. record.commit() db.session.commit() # Reconnect the disconnected signal. after_record_insert.connect(append_new_record_to_queue) # Report. logger.info("Created profile: %s", record_pid.pid_value) # Return the recid of new profile to which signatures will point to. return record_pid.pid_value
def test_index_after_commit_indexes_also_cites_two_records( mocked_indexing_task, mocked_permission_check, app, ): # this test doesn't use the isolated_app because it needs to commit to # the DB in order to create records versions. json1 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{'title': 'This is the record being cited'}], 'control_number': 9999, '_collections': ['Literature'] } cited1 = InspireRecord.create(data=json1, skip_files=True) cited1.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', cited1['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) json2 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{'title': 'This also is the record being cited'}], 'control_number': 9998, '_collections': ['Literature'] } cited2 = InspireRecord.create(data=json2, skip_files=True) cited2.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', cited2['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec1 = get_es_record('lit', 9999) es_rec2 = get_es_record('lit', 9998) assert es_rec1['citation_count'] == 0 assert es_rec2['citation_count'] == 0 assert LiteratureSearch.citations(es_rec1).total == 0 assert LiteratureSearch.citations(es_rec2).total == 0 citing_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{'title': 'Record citing the first one'}], '_collections': ['Literature'], 'control_number': 8888, 'references': [ { 'reference': { 'authors': [{'full_name': 'Smith, J.'}], } } ] } record = InspireRecord.create(data=citing_json, skip_files=True) record.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', record['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec1 = get_es_record('lit', 9999) es_rec2 = get_es_record('lit', 9998) assert es_rec1['citation_count'] == 0 assert es_rec2['citation_count'] == 0 assert LiteratureSearch.citations(es_rec1).total == 0 assert LiteratureSearch.citations(es_rec2).total == 0 references = { 'references': [ { 'record': { '$ref': 'http://localhost:5000/api/literature/9998' }, }, { 'record': { '$ref': 'http://localhost:5000/api/literature/9999' }, } ] } citing_json.update(references) record.clear() record.update(citing_json) record.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', record['control_number'], 3) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec1 = get_es_record('lit', 9999) es_rec2 = get_es_record('lit', 9998) assert es_rec1['citation_count'] == 1 assert es_rec2['citation_count'] == 1 assert LiteratureSearch.citations(es_rec1).total == 1 assert LiteratureSearch.citations(es_rec2).total == 1 _delete_record('lit', record['control_number']) _delete_record('lit', cited1['control_number']) _delete_record('lit', cited2['control_number'])
def test_index_after_commit_indexes_also_cites_record_when_new_citation_is_added( mocked_indexing_task, mocked_permission_check, app, ): # this test doesn't use the isolated_app because it needs to commit to # the DB in order to create records versions. json_data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{'title': 'This is the record being cited'}], 'control_number': 9999, '_collections': ['Literature'] } cited = InspireRecord.create(data=json_data, skip_files=True) db.session.commit() es.indices.refresh('records-hep') expected_args = 'lit', cited['control_number'], 1 mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 0 assert LiteratureSearch.citations(es_rec).total == 0 citing_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{'title': 'Record citing the first one'}], '_collections': ['Literature'], 'control_number': 8888, 'references': [ {"reference": {'authors': [{'full_name': 'Smith, J.'}]}} ] } record = InspireRecord.create(data=citing_json, skip_files=True) db.session.commit() es.indices.refresh('records-hep') expected_args = 'lit', record['control_number'], 1 mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 0 assert LiteratureSearch.citations(es_rec).total == 0 references = { 'references': [ { "curated_relation": False, "record": { "$ref": "http://localhost:5000/api/literature/9999" }, "reference": { 'authors': [{'full_name': 'Smith, J.'}], } } ] } citing_json.update(references) record.clear() record.update(citing_json) record.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = 'lit', record['control_number'], 2 mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 1 assert LiteratureSearch.citations(es_rec).total == 1 _delete_record('lit', 8888) _delete_record('lit', 9999)
def test_create_with_multiple_source_records(app): expected_file_content = 'dummy body' rec1_expected_key = '1_Fulltext.pdf' rec2_expected_key = '2_Fulltext.pdf' rec3_expected_keys = [ '3_Fulltext.pdf', '3_Fulltext.pdf_1', ] record1_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 1, 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], '_collections': [ 'Literature' ], # DESY harvest 'documents': [{ 'key': 'Fulltext.pdf', 'url': '/some/non/existing/path.pdf', 'description': 'record1 document', }], } record2_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 2, 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], '_collections': [ 'Literature' ], # DESY harvest 'documents': [{ 'key': 'Fulltext.pdf', 'url': '/some/non/existing/path.pdf', 'description': 'record2 document', }], } record3_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'control_number': 3, 'document_type': [ 'article', ], 'titles': [ {'title': 'foo'}, ], '_collections': [ 'Literature' ], # DESY harvest } record1 = InspireRecord.create(record1_json) rec1_file_content = open( record1.files[rec1_expected_key].obj.file.uri ).read() assert rec1_file_content == expected_file_content record2 = InspireRecord.create(record2_json) rec2_file_content = open( record2.files[rec2_expected_key].obj.file.uri ).read() assert rec2_file_content == expected_file_content record3_json['documents'] = copy.deepcopy(record1['documents']) record3_json['documents'].extend(copy.deepcopy(record2['documents'])) record3 = InspireRecord.create( record3_json, files_src_records=[record1, record2], ) assert len(record3.files) == ( len(record1_json['documents']) + len(record2_json['documents']) ) assert rec3_expected_keys == record3.files.keys for file_key in record3.files.keys: rec3_file_content = open( record3.files[file_key].obj.file.uri ).read() assert rec3_file_content == expected_file_content expected_descs = [ orig_doc['description'] for orig_doc in record3_json['documents'] ] current_descs = [ doc['description'] for doc in record3['documents'] ] assert current_descs == expected_descs
def test_literature_citations_api_with_parameter_page_2(api_client): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 111, 'titles': [ { 'title': 'Jessica Jones', }, ], '_collections': ['Literature'] } record = InspireRecord.create(record_json) record.commit() record_json_ref_1 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 222, 'titles': [ { 'title': 'Frank Castle', }, ], 'references': [{ 'record': { '$ref': record._get_ref() } }], '_collections': ['Literature'] } record_ref_1 = InspireRecord.create(record_json_ref_1) record_ref_1.commit() record_json_ref_2 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 333, 'titles': [ { 'title': 'Luke Cage', }, ], 'references': [{ 'record': { '$ref': record._get_ref() } }], '_collections': ['Literature'] } record_ref_2 = InspireRecord.create(record_json_ref_2) record_ref_2.commit() db.session.commit() current_search.flush_and_refresh('records-hep') response = api_client.get('/literature/111/citations?size=1&page=2', headers={'Accept': 'application/json'}) result = json.loads(response.get_data(as_text=True)) expected_metadata = [{ "citation_count": 2, "citations": [ { "control_number": 222, "titles": [{ "title": "Frank Castle" }] }, ] }, { "citation_count": 2, "citations": [ { "control_number": 333, "titles": [{ "title": "Luke Cage" }] }, ] }] assert response.status_code == 200 assert result['metadata'] in expected_metadata _delete_record('lit', 111) _delete_record('lit', 222) _delete_record('lit', 333)
def test_regression_index_after_commit_retries_for_new_record_not_yet_in_db( mocked_indexing_task, mocked_permission_check, app, ): # this test doesn't use the isolated_app because it needs to commit to # the DB in order to create records versions. json_data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{'title': 'This is the record being cited'}], 'control_number': 9999, '_collections': ['Literature'] } cited = InspireRecord.create(data=json_data, skip_files=True) cited.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', 9999, 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 0 assert LiteratureSearch.citations(es_rec).total == 0 citing_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{'title': 'Record citing the first one'}], '_collections': ['Literature'], 'control_number': 8888, 'references': [ { 'record': { '$ref': 'http://localhost:5000/api/literature/9999' }, 'reference': { 'authors': [{'full_name': 'Smith, J.'}], } } ] } record = InspireRecord.create(data=citing_json, skip_files=True) record.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', record['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task pretending record is not committed yet to DB _delete_record('lit', record['control_number']) with pytest.raises(RecordGetterError): # XXX: celery in eager mode does not retry, so it raises the first time index_modified_citations_from_record(*expected_args) _delete_record('lit', cited['control_number'])