Exemplo n.º 1
0
def test_get_signatures_only_curated(
    scan_mock,
    es_record_with_2_curated_authors,
    es_record_with_curated_author_and_no_recid,
    es_record_with_non_curated_author,
):
    scan_mock.side_effect = [[
        es_record_with_2_curated_authors,
        es_record_with_curated_author_and_no_recid,
        es_record_with_non_curated_author,
    ]]
    signatures = get_signatures(only_curated=True)
    expected_signatures = [
        Signature(
            author_affiliation="Rutgers U., Piscataway",
            author_id=989440,
            author_name="Seiberg, N.",
            publication=Publication(
                abstract="2 curated authors with recid",
                authors=["Seiberg, N.", "Jimmy"],
                collaborations=[],
                keywords=["effective action", "approximation: semiclassical"],
                publication_id=374836,
                title="Title",
                topics=["Theory-HEP"],
            ),
            signature_block="SABARGn",
            signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e52",
            is_curated_author_id=True,
        ),
        Signature(
            author_affiliation="UAIC",
            author_id=989440,
            author_name="Jimmy",
            publication=Publication(
                abstract="2 curated authors with recid",
                authors=["Seiberg, N.", "Jimmy"],
                collaborations=[],
                keywords=["effective action", "approximation: semiclassical"],
                publication_id=374836,
                title="Title",
                topics=["Theory-HEP"],
            ),
            signature_block="JANa",
            signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e55",
            is_curated_author_id=True,
        ),
    ]

    assert sorted(signatures, key=itemgetter("signature_uuid")) == sorted(
        expected_signatures, key=itemgetter("signature_uuid"))
def test_train_and_save_distance_model(
    scan_mock,
    sample_mock,
    choices_mock,
    fit_mock,
    score_mock,
    tmpdir,
    ethnicity_path,
    es_record_with_many_curated_authors,
):
    choices = [
        # same cluster, different name
        ("JOhn", "94fc2b0a-dc17-42c2-bae3-ca0024079e52"),
        "94fc2b0a-dc17-42c2-bae3-ca0024079e53",
        # same cluster, same name
        ("JOhn", "94fc2b0a-dc17-42c2-bae3-ca0024079e54"),
        "94fc2b0a-dc17-42c2-bae3-ca0024079e55",
        # different cluster, different name
        ("JOhn", "94fc2b0a-dc17-42c2-bae3-ca0024079e56"),
        "94fc2b0a-dc17-42c2-bae3-ca0024079e57",
        # different cluster, same name
        ("JOhn", "94fc2b0a-dc17-42c2-bae3-ca0024079e52"),
        "94fc2b0a-dc17-42c2-bae3-ca0024079e54",
    ]
    scan_mock.side_effect = [[es_record_with_many_curated_authors]]
    signatures = [
        Signature(
            author_affiliation="Rutgers U., Piscataway",
            author_id=1,
            author_name="Doe, John",
            publication=Publication(
                abstract="Many curated authors",
                authors=[
                    "Doe, John",
                    "Doe, J",
                    "Doe, John",
                    "Doe, John",
                    "Doe, John",
                    "Doe, John",
                    "Jamie",
                    "Jamie",
                ],
                collaborations=[],
                keywords=["keyword"],
                publication_id=1,
                title="Title",
                topics=["category"],
            ),
            signature_block="JOhn",
            signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e52",
            is_curated_author_id=True,
        ),
        Signature(
            author_affiliation="Rutgers U., Piscataway",
            author_id=1,
            author_name="Doe, John",
            publication=Publication(
                abstract="Many curated authors",
                authors=[
                    "Doe, John",
                    "Doe, J",
                    "Doe, John",
                    "Doe, John",
                    "Doe, John",
                    "Doe, John",
                    "Jamie",
                    "Jamie",
                ],
                collaborations=[],
                keywords=["keyword"],
                publication_id=1,
                title="Title",
                topics=["category"],
            ),
            signature_block="JOhn",
            signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e53",
            is_curated_author_id=True,
        ),
        Signature(
            author_affiliation="Rutgers U., Piscataway",
            author_id=1,
            author_name="Doe, J",
            publication=Publication(
                abstract="Many curated authors",
                authors=[
                    "Doe, John",
                    "Doe, J",
                    "Doe, John",
                    "Doe, John",
                    "Doe, John",
                    "Doe, John",
                    "Jamie",
                    "Jamie",
                ],
                collaborations=[],
                keywords=["keyword"],
                publication_id=1,
                title="Title",
                topics=["category"],
            ),
            signature_block="JOhn",
            signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e54",
            is_curated_author_id=True,
        ),
        Signature(
            author_affiliation="Rutgers U., Piscataway",
            author_id=2,
            author_name="Doe, John",
            publication=Publication(
                abstract="Many curated authors",
                authors=[
                    "Doe, John",
                    "Doe, J",
                    "Doe, John",
                    "Doe, John",
                    "Doe, John",
                    "Doe, John",
                    "Jamie",
                    "Jamie",
                ],
                collaborations=[],
                keywords=["keyword"],
                publication_id=1,
                title="Title",
                topics=["category"],
            ),
            signature_block="JOhn",
            signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e55",
            is_curated_author_id=True,
        ),
        Signature(
            author_affiliation="Rutgers U., Piscataway",
            author_id=2,
            author_name="Doe, John",
            publication=Publication(
                abstract="Many curated authors",
                authors=[
                    "Doe, John",
                    "Doe, J",
                    "Doe, John",
                    "Doe, John",
                    "Doe, John",
                    "Doe, John",
                    "Jamie",
                    "Jamie",
                ],
                collaborations=[],
                keywords=["keyword"],
                publication_id=1,
                title="Title",
                topics=["category"],
            ),
            signature_block="JOhn",
            signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e56",
            is_curated_author_id=True,
        ),
        Signature(
            author_affiliation="",
            author_id=3,
            author_name="Doe, John",
            publication=Publication(
                abstract="Many curated authors",
                authors=[
                    "Doe, John",
                    "Doe, J",
                    "Doe, John",
                    "Doe, John",
                    "Doe, John",
                    "Doe, John",
                    "Jamie",
                    "Jamie",
                ],
                collaborations=[],
                keywords=["keyword"],
                publication_id=1,
                title="Title",
                topics=["category"],
            ),
            signature_block="JOhn",
            signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e57",
            is_curated_author_id=True,
        ),
        Signature(
            author_affiliation="",
            author_id=6,
            author_name="Doe, John",
            publication=Publication(
                abstract="Many curated authors",
                authors=[
                    "Doe, John",
                    "Doe, J",
                    "Doe, John",
                    "Doe, John",
                    "Doe, John",
                    "Doe, John",
                    "Jamie",
                    "Jamie",
                ],
                collaborations=[],
                keywords=["keyword"],
                publication_id=1,
                title="Title",
                topics=["category"],
            ),
            signature_block="JOhn",
            signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e58",
            is_curated_author_id=True,
        ),
        Signature(
            author_affiliation="Rutgers U., Piscataway",
            author_id=7,
            author_name="Jamie",
            publication=Publication(
                abstract="Many curated authors",
                authors=[
                    "Doe, John",
                    "Doe, J",
                    "Doe, John",
                    "Doe, John",
                    "Doe, John",
                    "Doe, John",
                    "Jamie",
                    "Jamie",
                ],
                collaborations=[],
                keywords=["keyword"],
                publication_id=1,
                title="Title",
                topics=["category"],
            ),
            signature_block="Jana",
            signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e59",
            is_curated_author_id=True,
        ),
    ]
    choices_mock.side_effect = choices + choices
    signatures_dict = {signature.signature_uuid: signature for signature in signatures}
    signatures_list = [signatures_dict, signatures_dict]
    sample_mock.return_value = signatures_list
    score_mock.return_value = 0.85
    distance_model_path = tmpdir.join("distance.pkl")
    train_and_save_distance_model(ethnicity_path, distance_model_path, 4)
    assert os.path.getsize(distance_model_path) > 0
def test_distance_estimator_load_data(scan_mock,
                                      es_record_with_many_curated_authors):
    scan_mock.side_effect = [[es_record_with_many_curated_authors]]
    signatures = get_signatures()
    pairs = [
        {
            "same_cluster":
            True,
            "signature_uuids": [
                "94fc2b0a-dc17-42c2-bae3-ca0024079e52",
                "94fc2b0a-dc17-42c2-bae3-ca0024079e53",
            ],
        },
        {
            "same_cluster":
            True,
            "signature_uuids": [
                "94fc2b0a-dc17-42c2-bae3-ca0024079e54",
                "94fc2b0a-dc17-42c2-bae3-ca0024079e55",
            ],
        },
        {
            "same_cluster":
            False,
            "signature_uuids": [
                "94fc2b0a-dc17-42c2-bae3-ca0024079e56",
                "94fc2b0a-dc17-42c2-bae3-ca0024079e57",
            ],
        },
        {
            "same_cluster":
            False,
            "signature_uuids": [
                "94fc2b0a-dc17-42c2-bae3-ca0024079e52",
                "94fc2b0a-dc17-42c2-bae3-ca0024079e54",
            ],
        },
    ]
    distance_estimator = DistanceEstimator(None)
    distance_estimator.load_data(signatures, pairs, 4)
    expected_X = array(
        [
            [
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=1,
                    author_name="Doe, John",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=1,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e52",
                    is_curated_author_id=True,
                ),
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=1,
                    author_name="Doe, J",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=1,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e53",
                    is_curated_author_id=True),
            ],
            [
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=2,
                    author_name="Doe, John",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=1,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e54",
                    is_curated_author_id=True),
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=2,
                    author_name="Doe, John",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=1,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e55",
                    is_curated_author_id=True),
            ],
            [
                Signature(
                    author_affiliation="",
                    author_id=6,
                    author_name="Doe, John",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=1,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e56",
                    is_curated_author_id=True),
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=7,
                    author_name="Jamie",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=1,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="Jana",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e57",
                    is_curated_author_id=True),
            ],
            [
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=1,
                    author_name="Doe, John",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=1,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e52",
                    is_curated_author_id=True),
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=2,
                    author_name="Doe, John",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=1,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e54",
                    is_curated_author_id=True),
            ],
        ],
        dtype=object,
    )
    expected_y = array([0, 0, 1, 1])
    assert (distance_estimator.X == expected_X).all()
    assert (distance_estimator.y == expected_y).all()
def test_clusterer_load_data(
    scan_mock,
    distance_estimator_mock,
    es_record_with_curated_author,
    es_record_with_non_curated_author,
):
    scan_mock.side_effect = [[
        es_record_with_curated_author, es_record_with_non_curated_author
    ]]
    signatures = get_signatures()
    input_clusters = get_input_clusters(signatures)
    clusterer = Clusterer(distance_estimator_mock)
    clusterer.load_data(signatures, input_clusters)
    expected_X = array(
        [
            [
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=989441,
                    author_name="Doe, John",
                    publication=Publication(
                        abstract="2 curated authors with recid",
                        authors=["Doe, John"],
                        collaborations=["ATLAS"],
                        keywords=[
                            "effective action", "approximation: semiclassical"
                        ],
                        publication_id=374836,
                        title="Title",
                        topics=["Theory-HEP"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e52",
                    is_curated_author_id=True)
            ],
            [
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=989443,
                    author_name="Seiberg, Nana.",
                    publication=Publication(
                        abstract="Author curated no recid",
                        authors=["Seiberg, Nana."],
                        collaborations=[],
                        keywords=["thesis", "string model"],
                        publication_id=421404,
                        title="Black holes in string theory",
                        topics=["Theory-HEP"],
                    ),
                    signature_block="SABARGn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e51",
                    is_curated_author_id=False,
                )
            ],
        ],
        dtype=object,
    )

    expected_y = array([0, -1])
    assert (clusterer.X == expected_X).all()
    assert (clusterer.y == expected_y).all()
Exemplo n.º 5
0
def test_get_signatures_for_all(
    scan_mock,
    es_record_with_2_curated_authors,
    es_record_with_curated_author_and_no_recid,
    es_record_with_non_curated_author,
):
    scan_mock.side_effect = [[
        es_record_with_2_curated_authors,
        es_record_with_curated_author_and_no_recid,
        es_record_with_non_curated_author,
    ]]
    signatures = get_signatures()
    expected_signatures = [
        Signature(
            author_affiliation="Rutgers U., Piscataway",
            author_id=989440,
            author_name="Seiberg, N.",
            publication=Publication(
                abstract="2 curated authors with recid",
                authors=["Seiberg, N.", "Jimmy"],
                collaborations=[],
                keywords=["effective action", "approximation: semiclassical"],
                publication_id=374836,
                title="Title",
                topics=["Theory-HEP"],
            ),
            signature_block="SABARGn",
            signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e52",
            is_curated_author_id=True),
        Signature(
            author_affiliation="UAIC",
            author_id=989440,
            author_name="Jimmy",
            publication=Publication(
                abstract="2 curated authors with recid",
                authors=["Seiberg, N.", "Jimmy"],
                collaborations=[],
                keywords=["effective action", "approximation: semiclassical"],
                publication_id=374836,
                title="Title",
                topics=["Theory-HEP"],
            ),
            signature_block="JANa",
            signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e55",
            is_curated_author_id=True,
        ),
        Signature(
            author_affiliation="Texas U.",
            author_id=None,
            author_name="Weinberg, Steven",
            publication=Publication(
                abstract="Author not curated",
                authors=["Weinberg, Steven"],
                collaborations=[],
                keywords=["book"],
                publication_id=406190,
                title="The Quantum theory of fields. Vol. 1: Foundations",
                topics=["Theory-HEP", "General Physics"],
            ),
            signature_block="WANBARGs",
            signature_uuid="5e550ded-e955-4a22-b906-8af5aaa9f1e2",
            is_curated_author_id=False),
        Signature(author_affiliation="Rutgers U., Piscataway",
                  author_id=989443,
                  author_name="Seiberg, Nana.",
                  publication=Publication(
                      abstract="Author curated no recid",
                      authors=["Seiberg, Nana."],
                      collaborations=[],
                      keywords=["thesis", "string model"],
                      publication_id=421404,
                      title="Black holes in string theory",
                      topics=["Theory-HEP"],
                  ),
                  signature_block="SABARGn",
                  signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e51",
                  is_curated_author_id=False),
    ]
    assert sorted(signatures, key=itemgetter("signature_uuid")) == sorted(
        expected_signatures, key=itemgetter("signature_uuid"))
Exemplo n.º 6
0
def test_process_clustering_output_signatures_without_author_id():
    clusterer_mock = MagicMock()
    clusterer_mock.clusterer.labels_ = numpy.array([1, 1])
    clusterer_mock.X = numpy.array(
        [
            [
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=None,
                    author_name="Doe, John",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=1,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e52",
                    is_curated_author_id=False,
                )
            ],
            [
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=None,
                    author_name="Doe, John",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=1,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e54",
                    is_curated_author_id=False,
                )
            ],
        ],
        dtype=object,
    )

    expected_output = [{
        "signatures": [
            (1, "94fc2b0a-dc17-42c2-bae3-ca0024079e52"),
            (1, "94fc2b0a-dc17-42c2-bae3-ca0024079e54"),
        ],
        "authors": [],
    }]

    output = process_clustering_output(clusterer_mock)
    assert not DeepDiff(output, expected_output, ignore_order=True)
Exemplo n.º 7
0
def test_process_clustering_output_signatures_multiple_curated_author_ids():
    clusterer_mock = MagicMock()
    clusterer_mock.clusterer.labels_ = numpy.array([0, 0, 1, 1, 1])
    clusterer_mock.X = numpy.array(
        [
            [
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=1,
                    author_name="Doe, John",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=11,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e52",
                    is_curated_author_id=True,
                )
            ],
            [
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=None,
                    author_name="Doe, John",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=12,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e53",
                    is_curated_author_id=False,
                )
            ],
            [
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=3,
                    author_name="Doe, John",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=13,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e54",
                    is_curated_author_id=True,
                )
            ],
            [
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=None,
                    author_name="Doe, John",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=14,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e55",
                    is_curated_author_id=False,
                )
            ],
            [
                Signature(
                    author_affiliation="Rutgers U., Piscataway",
                    author_id=5,
                    author_name="Doe, John",
                    publication=Publication(
                        abstract="Many curated authors",
                        authors=[
                            "Doe, John",
                            "Doe, J",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Doe, John",
                            "Jamie",
                            "Jamie",
                        ],
                        collaborations=[],
                        keywords=["keyword"],
                        publication_id=15,
                        title="Title",
                        topics=["category"],
                    ),
                    signature_block="JOhn",
                    signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e56",
                    is_curated_author_id=True,
                )
            ],
        ],
        dtype=object,
    )

    expected_output = [
        {
            "signatures": [
                {
                    "publication_id": 11,
                    "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e52",
                },
                {
                    "publication_id": 12,
                    "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e53",
                },
            ],
            "authors": [{"author_id": 1, "has_claims": True}],
        },
        {
            "signatures": [
                {
                    "publication_id": 13,
                    "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e54",
                },
                {
                    "publication_id": 14,
                    "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e55",
                },
                {
                    "publication_id": 15,
                    "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e56",
                },
            ],
            "authors": [
                {"author_id": 3, "has_claims": True},
                {"author_id": 5, "has_claims": True},
            ],
        },
    ]
    output = process_clustering_output(clusterer_mock)
    assert not DeepDiff(output, expected_output, ignore_order=True)