def test_process_references_in_records_reindexes_conferences_when_pub_info_changes( inspire_app, celery_app_with_context, celery_session_worker): # disconnect this signal so records don't get indexed models_committed.disconnect(index_after_commit) conference_data = faker.record("con", with_control_number=True) conference_record = InspireRecord.create(conference_data) conference_control_number = conference_record["control_number"] conf_ref = f"http://localhost:8000/api/conferences/{conference_control_number}" data = faker.record("lit", with_control_number=True) data["publication_info"] = [{"conference_record": {"$ref": conf_ref}}] data["document_type"] = ["conference paper"] record = InspireRecord.create(data) db.session.commit() # reconnect signal before we call process_references_in_records models_committed.connect(index_after_commit) uuids = [record.id] task = process_references_in_records.delay(uuids) result = task.get(timeout=5) conference_record_es = InspireSearch.get_record_data_from_es( conference_record) expected_number_of_contributions = 1 assert (expected_number_of_contributions == conference_record_es["number_of_contributions"])
def test_process_references_in_records_reindexes_experiments_when_linked_experiments_change( app, celery_app_with_context, celery_session_worker): # disconnect this signal so records don't get indexed models_committed.disconnect(index_after_commit) experiment_data = faker.record("exp", with_control_number=True) experiment = InspireRecord.create(experiment_data) db.session.commit() experiment_control_number = experiment["control_number"] exp_ref = f"http://localhost:8000/api/experiments/{experiment_control_number}" data = faker.record("lit", with_control_number=True) data["accelerator_experiments"] = [{ "legacy_name": "LIGO", "record": { "$ref": exp_ref } }] record = InspireRecord.create(data) db.session.commit() models_committed.connect(index_after_commit) task = process_references_in_records.delay([record.id]) task.get(timeout=5) experiment_record_es = InspireSearch.get_record_data_from_es(experiment) expected_number_of_paper = 1 assert expected_number_of_paper == experiment_record_es["number_of_papers"]
def test_process_references_in_records_with_different_type_of_records_doesnt_throw_an_exception( inspire_app, celery_app_with_context, celery_session_worker): # disconnect this signal so records don't get indexed models_committed.disconnect(index_after_commit) cited_record_1 = LiteratureRecord.create(faker.record("lit")) cited_record_2 = LiteratureRecord.create(faker.record("lit")) data_citing_record_1 = faker.record( "lit", literature_citations=[cited_record_1["control_number"]]) citing_record_1 = LiteratureRecord.create(data_citing_record_1) data_citing_record_2 = faker.record( "lit", literature_citations=[cited_record_2["control_number"]]) citing_record_2 = LiteratureRecord.create(data_citing_record_2) db.session.commit() records = [ create_record_async("aut"), create_record_async("job"), create_record_async("jou"), create_record_async("exp"), create_record_async("con"), create_record_async("dat"), create_record_async("ins"), ] # reconnect signal before we call process_references_in_records models_committed.connect(index_after_commit) uuids = [record.id for record in records] + [citing_record_1.id, citing_record_2.id] task = process_references_in_records.delay(uuids) results = task.get(timeout=5) uuids = [str(uuid) for uuid in uuids] assert results == uuids result_cited_record_1 = InspireSearch.get_record_data_from_es( cited_record_1) expected_result_cited_record_1_citation_count = 1 assert (expected_result_cited_record_1_citation_count == result_cited_record_1["citation_count"]) result_cited_record_2 = InspireSearch.get_record_data_from_es( cited_record_2) expected_result_cited_record_2_citation_count = 1 assert (expected_result_cited_record_2_citation_count == result_cited_record_2["citation_count"])
def test_process_references_in_records_process_author_records( mock_batch_index, inspire_app, clean_celery_session): author_record = AuthorsRecord.create(faker.record("aut")) lit_record = LiteratureRecord.create( faker.record( "lit", data={ "authors": [{ "full_name": author_record["name"]["value"], "record": author_record["self"], }] }, )) lit_record_2 = LiteratureRecord.create( faker.record( "lit", data={ "authors": [{ "full_name": author_record["name"]["value"], "record": author_record["self"], }] }, )) db.session.commit() def assert_records_in_es(): lit_record_from_es = InspireSearch.get_record_data_from_es(lit_record) lit_record_from_es_2 = InspireSearch.get_record_data_from_es( lit_record_2) aut_record_from_es = InspireSearch.get_record_data_from_es( author_record) assert lit_record_from_es and aut_record_from_es and lit_record_from_es_2 retry_until_pass(assert_records_in_es, retry_interval=5) models_committed.disconnect(index_after_commit) author_record["name"]["value"] = "Another Name" author_record.update(dict(author_record)) db.session.commit() # reconnect signal before we call process_references_in_records models_committed.connect(index_after_commit) task = process_references_in_records.delay([author_record.id]) task.get(timeout=5) assert sorted(mock_batch_index.mock_calls[0][1][0]) == sorted( [str(lit_record.id), str(lit_record_2.id)])
def test_process_references_in_records_process_conference_records( mock_batch_index, inspire_app, clean_celery_session): conf_record = ConferencesRecord.create( faker.record("con", data={"titles": [{ "title": "Test conference" }]})) lit_data = { "publication_info": [{ "conference_record": { "$ref": conf_record["self"]["$ref"] } }], "document_type": ["conference paper"], } lit_record = LiteratureRecord.create(faker.record("lit", data=lit_data)) lit_record_2 = LiteratureRecord.create(faker.record("lit", data=lit_data)) db.session.commit() def assert_records_in_es(): lit_record_from_es = InspireSearch.get_record_data_from_es(lit_record) lit_record_from_es_2 = InspireSearch.get_record_data_from_es( lit_record_2) aut_record_from_es = InspireSearch.get_record_data_from_es(conf_record) assert lit_record_from_es and aut_record_from_es and lit_record_from_es_2 retry_until_pass(assert_records_in_es, retry_interval=5) models_committed.disconnect(index_after_commit) conf_record["titles"] = [{"title": "Southern California Strings Seminar "}] conf_record.update(dict(conf_record)) db.session.commit() # reconnect signal before we call process_references_in_records models_committed.connect(index_after_commit) task = process_references_in_records.delay([conf_record.id]) task.get(timeout=5) assert sorted(mock_batch_index.mock_calls[0][1][0]) == sorted( [lit_record.id, lit_record_2.id])
def test_process_references_in_records(inspire_app, celery_app_with_context, celery_session_worker): # disconnect this signal so records don't get indexed models_committed.disconnect(index_after_commit) cited_record_1 = LiteratureRecord.create(faker.record("lit")) cited_record_2 = LiteratureRecord.create(faker.record("lit")) data_citing_record_1 = faker.record( "lit", literature_citations=[cited_record_1["control_number"]]) citing_record_1 = LiteratureRecord.create(data_citing_record_1) data_citing_record_2 = faker.record( "lit", literature_citations=[cited_record_2["control_number"]]) citing_record_2 = LiteratureRecord.create(data_citing_record_2) db.session.commit() # reconnect signal before we call process_references_in_records models_committed.connect(index_after_commit) uuids = [citing_record_1.id, citing_record_2.id] task = process_references_in_records.delay(uuids) result = task.get(timeout=5) result_cited_record_1 = InspireSearch.get_record_data_from_es( cited_record_1) expected_result_cited_record_1_citation_count = 1 assert (expected_result_cited_record_1_citation_count == result_cited_record_1["citation_count"]) result_cited_record_2 = InspireSearch.get_record_data_from_es( cited_record_2) expected_result_cited_record_2_citation_count = 1 assert (expected_result_cited_record_2_citation_count == result_cited_record_2["citation_count"])
def test_process_references_in_records_reindexes_institutions_when_linked_institutions_change( inspire_app, celery_app_with_context, celery_session_worker): # disconnect this signal so records don't get indexed models_committed.disconnect(index_after_commit) institution_data = faker.record("ins", with_control_number=True) institution = InspireRecord.create(institution_data) institution_control_number = institution["control_number"] inst_ref = f"http://localhost:8000/api/institutions/{institution_control_number}" data = faker.record("lit", with_control_number=True) data.update({ "authors": [{ "full_name": "John Doe", "affiliations": [{ "value": "Institution", "record": { "$ref": inst_ref } }], }] }) record_authors_aff = InspireRecord.create(data) db.session.commit() data = faker.record("lit", with_control_number=True) data.update( {"thesis_info": { "institutions": [{ "record": { "$ref": inst_ref } }] }}) record_thesis_info = InspireRecord.create(data) db.session.commit() data = faker.record("lit", with_control_number=True) data.update({ "record_affiliations": [{ "record": { "$ref": inst_ref }, "value": "Institution" }] }) record_affiliations = InspireRecord.create(data) db.session.commit() # reconnect signal before we call process_references_in_records models_committed.connect(index_after_commit) task = process_references_in_records.delay( [record_authors_aff.id, record_thesis_info.id, record_affiliations.id]) task.get(timeout=5) institution_record_es = InspireSearch.get_record_data_from_es(institution) expected_number_of_paper = 3 assert expected_number_of_paper == institution_record_es[ "number_of_papers"]
def test_process_references_in_records_process_self_citations( mock_batch_index, inspire_app, clean_celery_session, enable_self_citations): author_record = AuthorsRecord.create( faker.record( "aut", data={ "name": { "value": "'t Hooft, Gerardus", "name_variants": ["'t Hooft, Gerard", "Hooft, Gerard T."], "preferred_name": "Gerardus 't Hooft", }, "ids": [ { "value": "INSPIRE-00060582", "schema": "INSPIRE ID" }, { "value": "G.tHooft.1", "schema": "INSPIRE BAI" }, ], }, )) author_record_2 = AuthorsRecord.create( faker.record( "aut", data={ "name": { "value": "'t Hooft, Gerardus Marcus", "preferred_name": "Gerardus Marcus 't Hooft", }, "ids": [ { "value": "INSPIRE-00060583", "schema": "INSPIRE ID" }, { "value": "G.tHooft.2", "schema": "INSPIRE BAI" }, ], }, )) lit_record = LiteratureRecord.create( faker.record( "lit", data={ "authors": [{ "ids": [ { "value": "INSPIRE-00060582", "schema": "INSPIRE ID" }, { "value": "G.tHooft.1", "schema": "INSPIRE BAI" }, ], "full_name": author_record["name"]["value"], "record": author_record["self"], }] }, )) lit_record_2 = LiteratureRecord.create( faker.record( "lit", literature_citations=[lit_record["control_number"]], data={ "authors": [{ "ids": [ { "value": "INSPIRE-00060583", "schema": "INSPIRE ID" }, { "value": "G.tHooft.2", "schema": "INSPIRE BAI" }, ], "full_name": author_record_2["name"]["value"], "record": author_record_2["self"], }] }, )) db.session.commit() def assert_records_in_es(): lit_record_from_es = InspireSearch.get_record_data_from_es(lit_record) lit_record_from_es_2 = InspireSearch.get_record_data_from_es( lit_record_2) aut_record_from_es = InspireSearch.get_record_data_from_es( author_record) assert lit_record_from_es and aut_record_from_es and lit_record_from_es_2 retry_until_pass(assert_records_in_es, retry_interval=5) models_committed.disconnect(index_after_commit) lit_record["authors"].append({ "ids": [ { "value": "INSPIRE-00060583", "schema": "INSPIRE ID" }, { "value": "G.tHooft.2", "schema": "INSPIRE BAI" }, ], "full_name": author_record_2["name"]["value"], "record": author_record_2["self"], }) lit_record.update(dict(lit_record)) db.session.commit() # reconnect signal before we call process_references_in_records models_committed.connect(index_after_commit) task = process_references_in_records.delay([lit_record.id]) task.get(timeout=5) assert sorted(mock_batch_index.mock_calls[0][1][0]) == sorted( [lit_record_2.id])