def test_process_clustering_output_signatures_without_author_id(): clusterer_mock = MagicMock() clusterer_mock.clusterer.labels_ = numpy.array([1, 1]) clusterer_mock.X = numpy.array( [ [ Signature( author_affiliation="Rutgers U., Piscataway", author_id=None, author_name="Doe, John", publication=Publication( abstract="Many curated authors", authors=[ "Doe, John", "Doe, J", "Doe, John", "Doe, John", "Doe, John", "Doe, John", "Jamie", "Jamie", ], collaborations=[], keywords=["keyword"], publication_id=1, title="Title", topics=["category"], ), signature_block="JOhn", signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e52", is_curated_author_id=False, ) ], [ Signature( author_affiliation="Rutgers U., Piscataway", author_id=None, author_name="Doe, John", publication=Publication( abstract="Many curated authors", authors=[ "Doe, John", "Doe, J", "Doe, John", "Doe, John", "Doe, John", "Doe, John", "Jamie", "Jamie", ], collaborations=[], keywords=["keyword"], publication_id=1, title="Title", topics=["category"], ), signature_block="JOhn", signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e54", is_curated_author_id=False, ) ], ], dtype=object, ) expected_output = [{ "signatures": [ (1, "94fc2b0a-dc17-42c2-bae3-ca0024079e52"), (1, "94fc2b0a-dc17-42c2-bae3-ca0024079e54"), ], "authors": [], }] output = process_clustering_output(clusterer_mock) assert not DeepDiff(output, expected_output, ignore_order=True)
def test_process_clustering_output_signatures_multiple_curated_author_ids(): clusterer_mock = MagicMock() clusterer_mock.clusterer.labels_ = numpy.array([0, 0, 1, 1, 1]) clusterer_mock.X = numpy.array( [ [ Signature( author_affiliation="Rutgers U., Piscataway", author_id=1, author_name="Doe, John", publication=Publication( abstract="Many curated authors", authors=[ "Doe, John", "Doe, J", "Doe, John", "Doe, John", "Doe, John", "Doe, John", "Jamie", "Jamie", ], collaborations=[], keywords=["keyword"], publication_id=11, title="Title", topics=["category"], ), signature_block="JOhn", signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e52", is_curated_author_id=True, ) ], [ Signature( author_affiliation="Rutgers U., Piscataway", author_id=None, author_name="Doe, John", publication=Publication( abstract="Many curated authors", authors=[ "Doe, John", "Doe, J", "Doe, John", "Doe, John", "Doe, John", "Doe, John", "Jamie", "Jamie", ], collaborations=[], keywords=["keyword"], publication_id=12, title="Title", topics=["category"], ), signature_block="JOhn", signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e53", is_curated_author_id=False, ) ], [ Signature( author_affiliation="Rutgers U., Piscataway", author_id=3, author_name="Doe, John", publication=Publication( abstract="Many curated authors", authors=[ "Doe, John", "Doe, J", "Doe, John", "Doe, John", "Doe, John", "Doe, John", "Jamie", "Jamie", ], collaborations=[], keywords=["keyword"], publication_id=13, title="Title", topics=["category"], ), signature_block="JOhn", signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e54", is_curated_author_id=True, ) ], [ Signature( author_affiliation="Rutgers U., Piscataway", author_id=None, author_name="Doe, John", publication=Publication( abstract="Many curated authors", authors=[ "Doe, John", "Doe, J", "Doe, John", "Doe, John", "Doe, John", "Doe, John", "Jamie", "Jamie", ], collaborations=[], keywords=["keyword"], publication_id=14, title="Title", topics=["category"], ), signature_block="JOhn", signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e55", is_curated_author_id=False, ) ], [ Signature( author_affiliation="Rutgers U., Piscataway", author_id=5, author_name="Doe, John", publication=Publication( abstract="Many curated authors", authors=[ "Doe, John", "Doe, J", "Doe, John", "Doe, John", "Doe, John", "Doe, John", "Jamie", "Jamie", ], collaborations=[], keywords=["keyword"], publication_id=15, title="Title", topics=["category"], ), signature_block="JOhn", signature_uuid="94fc2b0a-dc17-42c2-bae3-ca0024079e56", is_curated_author_id=True, ) ], ], dtype=object, ) expected_output = [ { "signatures": [ { "publication_id": 11, "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e52", }, { "publication_id": 12, "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e53", }, ], "authors": [{"author_id": 1, "has_claims": True}], }, { "signatures": [ { "publication_id": 13, "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e54", }, { "publication_id": 14, "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e55", }, { "publication_id": 15, "signature_uuid": "94fc2b0a-dc17-42c2-bae3-ca0024079e56", }, ], "authors": [ {"author_id": 3, "has_claims": True}, {"author_id": 5, "has_claims": True}, ], }, ] output = process_clustering_output(clusterer_mock) assert not DeepDiff(output, expected_output, ignore_order=True)