def test_disambiguate_authors_on_first_last_name_and_initials(
        inspire_app, clean_celery_session, enable_disambiguation):
    literature_data = faker.record("lit", with_control_number=True)
    literature_data.update({
        "authors": [{
            "full_name": "'t Hooft, Gerard",
            "curated_relation": True,
            "record": {
                "$ref": "http://localhost:5000/api/authors/999108"
            },
            "ids": [{
                "schema": "INSPIRE BAI",
                "value": "G.Hooft.2"
            }],
        }]
    })
    literature_record = LiteratureRecord.create(literature_data)

    literature_data_2 = faker.record("lit", with_control_number=True)
    literature_data_2.update({
        "authors": [{
            "full_name": "'t Hooft, Gerard Antonio",
            "curated_relation": True,
            "record": {
                "$ref": "http://localhost:5000/api/authors/999105"
            },
            "ids": [{
                "schema": "INSPIRE BAI",
                "value": "G.Hooft.1"
            }],
        }]
    })
    literature_record_2 = LiteratureRecord.create(literature_data_2)
    db.session.commit()

    def assert_lit_records_exist_in_es():
        lit_record_1_from_es = InspireSearch.get_record_data_from_es(
            literature_record)
        lit_record_2_from_es = InspireSearch.get_record_data_from_es(
            literature_record_2)
        assert lit_record_1_from_es and lit_record_2_from_es

    retry_until_pass(assert_lit_records_exist_in_es, retry_interval=3)

    literature_data_3 = faker.record("lit", with_control_number=True)
    literature_data_3.update(
        {"authors": [{
            "full_name": "'t Hooft, Gerard Antonio"
        }]})
    literature_record_3 = LiteratureRecord.create(literature_data_3)
    db.session.commit()

    def assert_disambiguation_task():
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record_3)
        assert (literature_data_2["authors"][0]["record"] ==
                literature_record_from_es["authors"][0]["record"])

    retry_until_pass(assert_disambiguation_task, retry_interval=2)
 def assert_disambiguation_on_update():
     db.session.close()
     literature_record = LiteratureRecord.get_record(literature_record_uuid)
     literature_record_from_es = InspireSearch.get_record_data_from_es(
         literature_record)
     assert literature_record["authors"][0]["record"]["$ref"]
     assert literature_record_from_es["authors"][0]["record"]["$ref"]
def test_disambiguation_handle_deleted_records(inspire_app,
                                               clean_celery_session,
                                               enable_disambiguation):
    literature_data = faker.record("lit", with_control_number=True)
    literature_data.update({
        "authors": [{
            "full_name":
            "Kowalczyk, Elisabeth",
            "ids": [{
                "schema": "INSPIRE BAI",
                "value": "E.Kowalczyk.1"
            }],
        }],
        "deleted":
        True,
    })

    literature_record = LiteratureRecord.create(data=literature_data)
    db.session.commit()

    literature_record["authors"][0]["affiliations"] = [{"value": "test"}]
    literature_record.update(dict(literature_record))

    try:
        db.session.commit()
    except Exception:
        assert False
def test_disambiguate_authors_create_new_author(inspire_app,
                                                clean_celery_session,
                                                enable_disambiguation):
    literature_data = faker.record("lit", with_control_number=True)
    literature_data.update({
        "authors": [{
            "full_name": "Michal Kowal",
            "affiliations": [{
                "value": "Warsaw U."
            }]
        }]
    })
    literature_record = LiteratureRecord.create(data=literature_data)
    db.session.commit()

    def assert_lit_records_exist_in_es():
        lit_record_1_from_es = InspireSearch.get_record_data_from_es(
            literature_record)
        assert lit_record_1_from_es

    retry_until_pass(assert_lit_records_exist_in_es, retry_interval=3)

    def assert_disambiguation_task():
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record)
        author_record_from_es = AuthorsSearch().query_from_iq("").execute()
        assert author_record_from_es.hits[0].name["value"] == "Michal Kowal"
        assert (literature_record_from_es["authors"][0]["recid"] ==
                author_record_from_es.hits[0].control_number)

    retry_until_pass(assert_disambiguation_task)
def test_disambiguate_authors_create_two_author_with_same_name(
        inspire_app, clean_celery_session, enable_disambiguation):
    literature_data = faker.record("lit", with_control_number=True)
    literature_data.update({
        "authors": [{
            "full_name": "Michal Kowal"
        }, {
            "full_name": "Michal Kowal"
        }]
    })
    literature_record = LiteratureRecord.create(data=literature_data)

    db.session.commit()

    def assert_lit_records_exist_in_es():
        lit_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record)

        assert lit_record_from_es

    retry_until_pass(assert_lit_records_exist_in_es, retry_interval=3)

    def assert_disambiguation_task():
        author_records_from_es = AuthorsSearch().query_from_iq("").execute()
        assert len(author_records_from_es.hits) == 2

    retry_until_pass(assert_disambiguation_task)
Пример #6
0
    def get_resolved_references_by_control_number(self, data):
        data = force_list(data)
        from inspirehep.records.api.literature import LiteratureRecord

        resolved_records = LiteratureRecord.get_es_linked_references(data)

        return {record["control_number"]: record.dumps() for record in resolved_records}
Пример #7
0
def link_signature_to_author(signature_data, author_control_number):
    """Adds record/$ref of the given author to the given signature.

    Args:
        author_control_number (int): The control number of the author to which we want to link.
        signature_data (list): List containing 2 elements: the publication_id and the signature uuid.

    Returns:
        dict: The signature data from the publication with the linked author.
    """
    record = LiteratureRecord.get_record_by_pid_value(
        signature_data["publication_id"])
    signature = next(
        (author for author in record.get("authors")
         if author.get("uuid") == signature_data["signature_uuid"]),
        None,
    )
    if not signature or ("record" in signature
                         and signature.get("curated_relation")):
        return None

    if signature.get("curated_relation") and "record" not in signature:
        signature["curated_relation"] = False

    new_author_record = get_record_ref(author_control_number, "authors")
    if new_author_record == signature.get("record"):
        # no changes, avoid creating a new useless version of the record
        return None

    signature["record"] = new_author_record
    record.update(dict(record))
    return signature
Пример #8
0
def test_signature_linked_by_disambiguation_has_correct_facet_author_name(
        inspire_app, celery_app_with_context, celery_session_worker):
    data = faker.record("lit")
    data["authors"] = [{
        "full_name": "Doe, John",
        "uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51"
    }]
    record = LiteratureRecord.create(data)
    db.session.commit()
    clusters = [{
        "signatures": [{
            "publication_id":
            record["control_number"],
            "signature_uuid":
            "94fc2b0a-dc17-42c2-bae3-ca0024079e51",
        }],
        "authors": [],
    }]
    disambiguate_signatures(clusters)
    author_pids = PersistentIdentifier.query.filter_by(pid_type="aut").all()
    assert len(author_pids) == 1
    pid_value = author_pids[0].pid_value
    author = AuthorsRecord.get_record_by_pid_value(pid_value)
    author_control_number = author.pop("control_number")

    expected_facet_author_name = [f"{author_control_number}_John Doe"]
    expected_record_ref = f"http://localhost:5000/api/authors/{pid_value}"
    steps = [
        {
            "step": current_search.flush_and_refresh,
            "args": ["records-hep"]
        },
        {
            "step": es_search,
            "args": ["records-hep"],
            "expected_result": {
                "expected_key": "hits.total.value",
                "expected_result": 1,
            },
        },
        {
            "expected_key": "hits.hits[0]._source.facet_author_name",
            "expected_result": expected_facet_author_name,
        },
        {
            "expected_key": "hits.hits[0]._source.authors[0].record.$ref",
            "expected_result": expected_record_ref,
        },
    ]
    retry_until_matched(steps)
def test_disambiguation_doesnt_assign_bai_when_already_in_author(
        inspire_app, clean_celery_session, enable_disambiguation):
    author_data = faker.record("aut", with_control_number=True)
    author_data.update({
        "name": {
            "value": "Brian Gross"
        },
        "ids": [{
            "schema": "INSPIRE BAI",
            "value": "J.M.Maldacena.1"
        }],
        "email_addresses": [{
            "current": True,
            "value": "*****@*****.**"
        }],
    })
    author_record = InspireRecord.create(author_data)
    db.session.commit()

    def assert_authors_records_exist_in_es():
        author_record_from_es = InspireSearch.get_record_data_from_es(
            author_record)
        assert author_record_from_es

    retry_until_pass(assert_authors_records_exist_in_es)

    literature_data = faker.record("lit", with_control_number=True)
    literature_data.update({
        "authors": [{
            "full_name": "Brian Gross",
            "ids": [{
                "schema": "INSPIRE BAI",
                "value": "A.Test.1"
            }],
            "emails": ["*****@*****.**"],
        }]
    })
    literature_record = LiteratureRecord.create(literature_data)
    db.session.commit()

    def assert_disambiguation_task():
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record)
        assert {
            "schema": "INSPIRE BAI",
            "value": "J.M.Maldacena.1",
        } in literature_record_from_es["authors"][0]["ids"]

    retry_until_pass(assert_disambiguation_task, retry_interval=2)
def test_disambiguation_on_record_update_unambiguous_match(
        inspire_app, clean_celery_session, enable_disambiguation):
    literature_data = faker.record("lit", with_control_number=True)
    literature_data.update({
        "authors": [{
            "full_name":
            "Kowalczyk, Elisabeth",
            "ids": [{
                "schema": "INSPIRE BAI",
                "value": "E.Kowalczyk.1"
            }],
        }]
    })
    literature_record = LiteratureRecord.create(data=literature_data)
    db.session.commit()

    def assert_first_disambiguation_no_match():
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record)

        assert get_values_for_schema(
            literature_record_from_es["authors"][0]["ids"], "INSPIRE BAI")

    retry_until_pass(assert_first_disambiguation_no_match, retry_interval=2)
    old_bai = get_values_for_schema(literature_record["authors"][0]["ids"],
                                    "INSPIRE BAI")[0]
    db.session.expire_all()
    lit_record = InspireRecord.get_record(literature_record.id)
    lit_record["authors"][0]["emails"] = ["test.test@com"]
    lit_record.update(dict(lit_record))
    db.session.commit()

    def assert_disambiguation_on_record_update():
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record)
        assert (get_values_for_schema(
            literature_record_from_es["authors"][0]["ids"],
            "INSPIRE BAI")[0] == old_bai)

    retry_until_pass(assert_disambiguation_on_record_update, retry_interval=2)
def test_signature_linked_by_disambiguation_has_correct_facet_author_name(
        inspire_app, clean_celery_session):
    data = faker.record("lit")
    data["authors"] = [{
        "full_name": "Doe, John",
        "uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e51"
    }]
    record = LiteratureRecord.create(data)
    db.session.commit()
    clusters = [{
        "signatures": [{
            "publication_id":
            record["control_number"],
            "signature_uuid":
            "94fc2b0a-dc17-42c2-bae3-ca0024079e51",
        }],
        "authors": [],
    }]
    disambiguate_signatures(clusters)
    author_pids = PersistentIdentifier.query.filter_by(pid_type="aut").all()
    assert len(author_pids) == 1

    pid_value = author_pids[0].pid_value
    author = AuthorsRecord.get_record_by_pid_value(pid_value)
    author_control_number = author.pop("control_number")

    expected_facet_author_name = [f"{author_control_number}_John Doe"]
    expected_record_ref = f"http://localhost:5000/api/authors/{pid_value}"

    def assert_references():
        current_search.flush_and_refresh("records-hep")
        record_from_es = InspireSearch.get_record_data_from_es(record)
        assert expected_facet_author_name == record_from_es[
            "facet_author_name"]
        assert expected_record_ref == record_from_es["authors"][0]["record"][
            "$ref"]

    retry_until_pass(assert_references, retry_interval=2)
def test_disambiguation_on_record_update_ambiguous_match(
        inspire_app, clean_celery_session, enable_disambiguation):
    literature_data = faker.record("lit", with_control_number=True)
    literature_data.update({
        "authors": [{
            "full_name":
            "Kowal, Michal",
            "ids": [{
                "schema": "INSPIRE BAI",
                "value": "J.M.Maldacena.1"
            }],
            "affiliations": [{
                "value": "Warsaw U."
            }],
            "record": {
                "$ref": "http://localhost:5000/api/authors/999101"
            },
            "curated_relation":
            True,
        }]
    })
    literature_record = LiteratureRecord.create(data=literature_data)

    literature_data_2 = faker.record("lit", with_control_number=True)
    literature_data_2.update({
        "authors": [{
            "full_name":
            "Kowal, Michal",
            "ids": [{
                "schema": "INSPIRE BAI",
                "value": "J.M.Maldacena.2"
            }],
            "record": {
                "$ref": "http://localhost:5000/api/authors/999102"
            },
            "curated_relation":
            True,
        }]
    })
    literature_record_2 = LiteratureRecord.create(data=literature_data_2)

    db.session.commit()

    def assert_authors_records_exist_in_es():
        lit_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record)
        lit_record_from_es_2 = InspireSearch.get_record_data_from_es(
            literature_record_2)
        assert lit_record_from_es
        assert lit_record_from_es_2

    retry_until_pass(assert_authors_records_exist_in_es)

    literature_data_3 = faker.record("lit", with_control_number=True)
    literature_data_3.update({"authors": [{"full_name": "Kowal, Michal"}]})
    literature_record_3 = LiteratureRecord.create(data=literature_data_3)
    db.session.commit()

    def assert_first_disambiguation_no_match():
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record_3)

        assert get_values_for_schema(
            literature_record_from_es["authors"][0]["ids"], "INSPIRE BAI")
        assert (literature_record_from_es["authors"][0]["ids"] !=
                literature_record["authors"][0]["ids"])
        assert (literature_record_from_es["authors"][0]["ids"] !=
                literature_record_2["authors"][0]["ids"])

    retry_until_pass(assert_first_disambiguation_no_match, retry_interval=2)

    db.session.expire_all()
    lit_record = InspireRecord.get_record(literature_record_3.id)
    lit_record["authors"][0]["affiliations"] = [{"value": "CERN"}]
    lit_record.update(dict(lit_record))
    db.session.commit()

    def assert_disambiguation_on_record_update():
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record_3)
        assert (literature_record_from_es["authors"][0]["ids"] ==
                lit_record["authors"][0]["ids"])

        assert (literature_record_from_es["authors"][0]["record"] ==
                lit_record["authors"][0]["record"])

    retry_until_pass(assert_disambiguation_on_record_update, retry_interval=2)
Пример #13
0
    def build_seminar(self, data) -> dict:
        builder = SeminarBuilder()
        builder.set_title(title=data.get("name"))
        builder.add_inspire_categories(data.get("field_of_interest", []))
        builder.add_public_note(value=data.get("additional_info", ""))
        builder.add_series(
            name=data.get("series_name"), number=data.get("series_number")
        )

        timezone = data.get("timezone")
        builder.set_timezone(timezone)

        start_datetime = get_value(data, "dates[0]")
        start_datetime_utc = local_form_datetime_to_iso_utc(start_datetime, timezone)
        builder.set_start_datetime(start_datetime_utc)

        end_datetime = get_value(data, "dates[1]")
        end_datetime_utc = local_form_datetime_to_iso_utc(end_datetime, timezone)
        builder.set_end_datetime(end_datetime_utc)

        address = data.get("address")
        if address:
            builder.set_address(
                cities=[address.get("city")],
                state=address.get("state"),
                place_name=address.get("venue"),
                country_code=country_name_to_code(address.get("country")),
            )

        abstract = data.get("abstract")
        if abstract:
            builder.set_abstract(value=abstract)

        captioned = data.get("captioned")
        if captioned:
            builder.set_captioned(captioned)

        for contact in data.get("contacts", []):
            builder.add_contact(**contact)

        for speaker in data.get("speakers", []):
            name = speaker.get("name")
            record = speaker.get("record")
            affiliation_value = speaker.get("affiliation")
            affiliation_record = speaker.get("affiliation_record")

            affiliation = {}
            if affiliation_value:
                affiliation["value"] = affiliation_value

            if affiliation_record:
                affiliation["record"] = affiliation_record

            affiliations = [affiliation] if affiliation else None

            builder.add_speaker(name=name, record=record, affiliations=affiliations)

        for url in data.get("material_urls", []):
            builder.add_material_url(**url)

        for url in data.get("join_urls", []):
            builder.add_join_url(**url)

        for website in data.get("websites", []):
            builder.add_url(website)

        for keyword in data.get("keywords", []):
            builder.add_keyword(value=keyword)

        for literature_record_pid in data.get("literature_records", []):
            try:
                LiteratureRecord.get_record_by_pid_value(literature_record_pid)
            except PIDDoesNotExistError:
                raise InvalidDataError(
                    f"{literature_record_pid} is not a valid literature record."
                )
            record = {
                "$ref": f"{get_inspirehep_url()}/api/literature/{literature_record_pid}"
            }
            builder.add_literature_record(record=record)

        builder.record["$schema"] = url_for(
            "invenio_jsonschemas.get_schema",
            schema_path="records/seminars.json",
            _external=True,
        )

        return builder.record
def test_disambiguation_races_assign(override_config, inspire_app,
                                     clean_celery_session,
                                     enable_disambiguation):
    cataloger = create_user(role="cataloger")
    with override_config(FEATURE_FLAG_ENABLE_BAI_PROVIDER=True,
                         FEATURE_FLAG_ENABLE_BAI_CREATION=True):
        author_record_data = faker.record("aut")
        author_record_data.update({
            "name": {
                "value": "Michael F. A'Hearn"
            },
            "ids": [{
                "schema": "INSPIRE BAI",
                "value": "M.F.A.Hearn.1"
            }],
        })
        author_record = AuthorsRecord.create(author_record_data)
        lit_data = faker.record("lit")
        lit_data.update({
            "authors": [{
                "ids": [{
                    "value": "M.F.A.Hearn.1",
                    "schema": "INSPIRE BAI"
                }],
                "uuid":
                "ce061c1e-866a-422d-9982-652183bae814",
                "full_name":
                "A'Hearn, M.F.",
                "signature_block":
                "HARNm",
                "curated_relation":
                True,
                "record":
                author_record["self"],
            }]
        })
        lit_record = LiteratureRecord.create(lit_data)
        db.session.commit()

    with inspire_app.test_client() as client:
        login_user_via_session(client, email=cataloger.email)
        client.post(
            "/api/assign/author",
            data=orjson.dumps({
                "literature_recids": [lit_record["control_number"]],
                "from_author_recid":
                author_record["control_number"],
            }),
            content_type="application/json",
        )

        def assert_disambiguation_on_record_update():
            literature_record_from_es = InspireSearch.get_record_data_from_es(
                lit_record)

            assert (get_values_for_schema(
                literature_record_from_es["authors"][0]["ids"],
                "INSPIRE BAI")[0] != "M.F.A.Hearn.1")

        retry_until_pass(assert_disambiguation_on_record_update,
                         retry_interval=2)
def test_disambiguate_many_authors_runs_after_record_creation(
        inspire_app, clean_celery_session, enable_disambiguation):
    author_1 = faker.record("aut", with_control_number=True)
    author_1.update({
        "name": {
            "value": "Brian Gross"
        },
        "ids": [
            {
                "schema": "INSPIRE ID",
                "value": "INSPIRE-00304313"
            },
            {
                "schema": "INSPIRE BAI",
                "value": "J.M.Maldacena.1"
            },
        ],
        "email_addresses": [{
            "current": True,
            "value": "*****@*****.**"
        }],
    })
    author_2 = faker.record("aut", with_control_number=True)
    author_2.update({
        "name": {
            "value": "Donald Matthews"
        },
        "ids": [{
            "schema": "INSPIRE BAI",
            "value": "H.Khalfoun.1"
        }],
        "email_addresses": [
            {
                "current": True,
                "value": "*****@*****.**"
            },
            {
                "current": True,
                "value": "*****@*****.**"
            },
        ],
    })

    author_record_1 = InspireRecord.create(author_1)
    author_record_2 = InspireRecord.create(author_2)
    db.session.commit()

    def assert_authors_records_exist_in_es():
        author_record_1_from_es = InspireSearch.get_record_data_from_es(
            author_record_1)
        author_record_2_from_es = InspireSearch.get_record_data_from_es(
            author_record_2)
        assert author_record_1_from_es and author_record_2_from_es

    retry_until_pass(assert_authors_records_exist_in_es)

    literature_data = faker.record("lit", with_control_number=True)
    literature_data.update({
        "authors": [
            {
                "full_name":
                "Brian Gross",
                "ids": [
                    {
                        "schema": "INSPIRE ID",
                        "value": "INSPIRE-00304313"
                    },
                    {
                        "schema": "INSPIRE BAI",
                        "value": "J.M.Maldacena.1"
                    },
                ],
                "emails": ["*****@*****.**"],
            },
            {
                "full_name": "Donald Matthews",
                "ids": [{
                    "schema": "INSPIRE BAI",
                    "value": "H.Khalfoun.1"
                }],
                "emails": ["*****@*****.**", "*****@*****.**"],
            },
        ]
    })
    literature_record = LiteratureRecord.create(literature_data)
    db.session.commit()

    def assert_disambiguation_task():
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record)
        literature_record_from_es_authors = literature_record_from_es.get(
            "authors")
        assert (str(author_1["control_number"])
                in literature_record_from_es_authors[0]["record"]["$ref"])
        assert (str(author_2["control_number"])
                in literature_record_from_es_authors[1]["record"]["$ref"])

    retry_until_pass(assert_disambiguation_task, retry_interval=2)
def test_disambiguation_runs_after_lit_record_update(inspire_app,
                                                     clean_celery_session,
                                                     enable_disambiguation):
    author_data = faker.record("aut")
    author_data.update({
        "control_number":
        1,
        "name": {
            "value": "Brian Gross"
        },
        "ids": [{
            "schema": "INSPIRE BAI",
            "value": "J.M.Maldacena.1"
        }],
        "email_addresses": [{
            "current": True,
            "value": "*****@*****.**"
        }],
    })
    author_record = InspireRecord.create(author_data)

    author_data_2 = faker.record("aut")
    author_data_2.update({
        "control_number":
        2,
        "name": {
            "value": "Test Author"
        },
        "email_addresses": [{
            "current": True,
            "value": "*****@*****.**"
        }],
    })
    author_record_2 = InspireRecord.create(author_data_2)
    author_data_3 = faker.record("aut")
    author_data_3.update({
        "control_number":
        3,
        "name": {
            "value": "Another Author"
        },
        "email_addresses": [
            {
                "current": True,
                "value": "*****@*****.**"
            },
            {
                "current": True,
                "hidden": True,
                "value": "*****@*****.**"
            },
        ],
    })
    author_record_3 = InspireRecord.create(author_data_3)
    db.session.commit()

    def assert_authors_records_exist_in_es():
        author_record_from_es = InspireSearch.get_record_data_from_es(
            author_record)
        author_2_from_es = InspireSearch.get_record_data_from_es(
            author_record_2)
        author_3_from_es = InspireSearch.get_record_data_from_es(
            author_record_3)
        assert author_record_from_es
        assert author_2_from_es
        assert author_3_from_es

    retry_until_pass(assert_authors_records_exist_in_es, retry_interval=5)

    literature_data = faker.record("lit")
    literature_data.update({
        "control_number":
        4,
        "authors": [{
            "full_name":
            "Brian Gross",
            "ids": [{
                "schema": "INSPIRE BAI",
                "value": "J.M.Maldacena.1"
            }],
            "emails": ["*****@*****.**"],
            "uuid":
            "798d9afe-d3c2-479e-b384-f0aee2573076",
        }],
    })
    literature_record = LiteratureRecord.create(literature_data)
    literature_record_uuid = literature_record.id
    db.session.commit()

    def assert_disambiguation_on_update():
        db.session.close()
        literature_record = LiteratureRecord.get_record(literature_record_uuid)
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record)
        assert literature_record["authors"][0]["record"]["$ref"]
        assert literature_record_from_es["authors"][0]["record"]["$ref"]

    retry_until_pass(assert_disambiguation_on_update, retry_interval=5)

    literature_record = LiteratureRecord.get_record(literature_record_uuid)
    literature_record["authors"].append({
        "full_name": "Test Author",
        "emails": ["*****@*****.**"]
    })
    literature_record.update(dict(literature_record))
    db.session.commit()

    def assert_disambiguation_on_update():
        db.session.close()
        literature_record = LiteratureRecord.get_record(literature_record_uuid)
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record)
        assert literature_record["authors"][0]["record"]["$ref"]
        assert literature_record["authors"][1]["record"]["$ref"]
        assert literature_record_from_es["authors"][0]["record"]["$ref"]
        assert literature_record_from_es["authors"][1]["record"]["$ref"]

    retry_until_pass(assert_disambiguation_on_update, retry_interval=5)
def test_disambiguate_authors_doesnt_match_when_author_is_ambiguous(
        inspire_app, clean_celery_session, enable_disambiguation):
    author_1 = faker.record("aut", with_control_number=True)
    author_1.update({
        "name": {
            "value": "Brian Gross"
        },
        "ids": [
            {
                "schema": "INSPIRE ID",
                "value": "INSPIRE-00304313"
            },
            {
                "schema": "INSPIRE BAI",
                "value": "J.M.Maldacena.2"
            },
        ],
        "email_addresses": [{
            "current": True,
            "value": "*****@*****.**"
        }],
        "control_number":
        90_676_330,
    })
    author_2 = faker.record("aut", with_control_number=True)
    author_2.update({
        "name": {
            "value": "Brian Gross"
        },
        "ids": [
            {
                "schema": "INSPIRE ID",
                "value": "INSPIRE-00300003"
            },
            {
                "schema": "INSPIRE BAI",
                "value": "J.M.Maldacena.1"
            },
        ],
        "email_addresses": [{
            "current": True,
            "value": "*****@*****.**"
        }],
        "control_number":
        90_676_331,
    })

    author_record_1 = InspireRecord.create(author_1)
    author_record_2 = InspireRecord.create(author_2)
    db.session.commit()

    def assert_authors_records_exist_in_es():
        author_record_from_es = InspireSearch.get_record_data_from_es(
            author_record_1)
        author_2_from_es = InspireSearch.get_record_data_from_es(
            author_record_2)
        assert author_record_from_es
        assert author_2_from_es

    retry_until_pass(assert_authors_records_exist_in_es, retry_interval=2)

    authors = [{"full_name": "Brian Gross", "emails": ["*****@*****.**"]}]

    literature_data = faker.record("lit", with_control_number=True)
    literature_data.update({"authors": authors})
    literature_record = LiteratureRecord.create(literature_data)
    db.session.commit()

    def assert_disambiguation_task():
        literature_record_from_es = InspireSearch.get_record_data_from_es(
            literature_record)
        # new author is created
        assert (literature_record_from_es["authors"][0].get("record") !=
                "http://localhost:5000/api/authors/90676330")
        assert (literature_record_from_es["authors"][0].get("record") !=
                "http://localhost:5000/api/authors/90676331")

    retry_until_pass(assert_disambiguation_task, retry_interval=5)