def test_create_authors_with_invalid_affiliation(): """Test case when an affiliation has no valid id.""" text = "A. Einstein1, N. Bohr2\n" "\n" "ETH\n" "2 Københavns Universitet" with pytest.raises(ValueError) as excinfo: create_authors(text) assert "Cannot identify type of affiliations" in str(excinfo.value)
def test_create_authors_author_blocks_by_affiliation(): text = ("K. Sachs, F. Schwennsen\n" "DESY, D 22607 Hamburg, Germany\n" "\n" "A. Holtkamp, M. Moskovic\n" "CERN, CH 1211 Geneva 23, Switzerland\n" "\n" "T. Schwander\n" "SLAC, Stanford, USA\n") with pytest.raises(ValueError) as excinfo: create_authors(text) assert "Authors grouped by affiliation? - Comming soon" in str( excinfo.value)
def test_create_authors_with_missing_affid(): """Test case when `create_authors` thinks every author should have an affid. This might happen because someone is missing affid or has a number in his/her name so it's tricking `create_authors`. """ text = "A. Einstein, N. Bohr2" result = create_authors(text) expected = [ { "fullname": "A. Einstein", "affiliations": [], "ids": [], "emails": [] }, { "fullname": "N. Bohr", "affiliations": [], "ids": [], "emails": [] }, ] warning = "Unresolved aff-ID or stray footnote symbol. Problematic author and aff-id: N. Bohr 2" assert expected == result["authors"] assert warning in result["warnings"]
def test_create_authors_when_aff_line_ends_in_number(): text = ( "T.M. Liss\n" "1\n" "L. Littenberg\n" "2\n" "\n" "1.\n" "Division of Science, City College of New York, 160 Convent Avenue, New York, NY 10031\n" "2.\n" "Physics Department, Brookhaven National Laboratory, Upton, NY 11973, USA" ) expected = [ ( u"T.M. Liss", [ u"Division of Science, City College of New York, 160 Convent Avenue, New York, NY 10031" ], ), ( u"L. Littenberg", [ u"Physics Department, Brookhaven National Laboratory, Upton, NY 11973, USA" ], ), ] result = create_authors(text) assert expected == result["authors"]
def test_create_authors_without_affiliations(): text = ( "K. Berkelman, D. Cords, R. Felst, E. Gadermann, G. Grindhammer, " "H. Hultschig, P. Joos, W. Koch, U. Kötz, H. Krehbiel, D. Kreinick, " "J. Ludwig, K.-H. Mess, K.C. Moffeit, A. Petersen, G. Poelz, " "J. Ringel, K. Sauerberg, P. Schmüser, G. Vogel, B.H. Wiik, G. Wolf") expected = [ (u"K. Berkelman", []), (u"D. Cords", []), (u"R. Felst", []), (u"E. Gadermann", []), (u"G. Grindhammer", []), (u"H. Hultschig", []), (u"P. Joos", []), (u"W. Koch", []), (u"U. Kötz", []), (u"H. Krehbiel", []), (u"D. Kreinick", []), (u"J. Ludwig", []), (u"K.-H. Mess", []), (u"K.C. Moffeit", []), (u"A. Petersen", []), (u"G. Poelz", []), (u"J. Ringel", []), (u"K. Sauerberg", []), (u"P. Schm\xfcser", []), (u"G. Vogel", []), (u"B.H. Wiik", []), (u"G. Wolf", []), ] result = create_authors(text) assert expected == result["authors"] assert "Found no affiliations (empty line needed)" in result["warnings"]
def test_create_authors_comma_wrong_position(): """Test case when there is comma before affiliation id.""" text = ( "Y. Bao,\n" "1\n" "and A. Lambrecht,\n" "1,\n" "2\n" "\n" "1\n" "Department of Physics, University of Florida, Gainesville, Florida 32611\n" "2\n" "Laboratoire Kastler-Brossel, CNRS, ENS, Universit ́e Pierre et Marie Curie case 74, Campus Jussieu, F-75252 Paris Cedex 05, France\n" ) expected = [ ( u"Y. Bao", [ u"Department of Physics, University of Florida, Gainesville, Florida 32611" ], ), ( u"A. Lambrecht", [ u"Department of Physics, University of Florida, Gainesville, Florida 32611", u"Laboratoire Kastler-Brossel, CNRS, ENS, Universit \u0301e Pierre et Marie Curie case 74, Campus Jussieu, F-75252 Paris Cedex 05, France", ], ), ] result = create_authors(text) assert expected == result["authors"]
def test_create_authors_affiliation_with_numbers_and_letters(): text = ( "O. Buchmueller\n" "1\n" "K. Agashe\n" "2\n" "\n" "1.\n" "High Energy Physics Group, Blackett Laboratory, Imperial College, Prince Consort Road, London SW7 2AZ, UK\n" "2.\n" "University of Maryland, Department of Physics, College Park, MD 20742-4111, USA\n" ) expected = [ ( u"O. Buchmueller", [ u"High Energy Physics Group, Blackett Laboratory, Imperial College, Prince Consort Road, London SW7 2AZ, UK" ], ), ( u"K. Agashe", [ u"University of Maryland, Department of Physics, College Park, MD 20742-4111, USA" ], ), ] result = create_authors(text) assert expected == result["authors"]
def test_create_authors_with_many_affiliations(): text = ("F. Durães1,2,3,4\n" "\n" "1 CERN\n" "2 Fermilab\n" "3 Lund University\n" "4 Instituto de Física, Universidade de São Paulo") expected = [ { "fullname": "F. Dur\xe3es", "affiliations": [ "CERN", "Fermilab", "Lund University", "Instituto de F\xedsica, Universidade de S\xe3o Paulo", ], "ids": [], "emails": [], }, ] result = create_authors(text) assert expected == result["authors"]
def test_create_authors_no_commas_between_authors(): text = ( "C. Patrignani1 K. Agashe2 G. Aielli1,2\n" "\n" "1 Universita di Bologna and INFN, Dip. Scienze per la Qualita della Vita, I-47921, Rimini, Italy\n" "2 University of Maryland, Department of Physics, College Park, MD 20742-4111, USA" ) expected = [ ( u"C. Patrignani", [ u"Universita di Bologna and INFN, Dip. Scienze per la Qualita della Vita, I-47921, Rimini, Italy" ], ), ( u"K. Agashe", [ u"University of Maryland, Department of Physics, College Park, MD 20742-4111, USA" ], ), ( u"G. Aielli", [ u"Universita di Bologna and INFN, Dip. Scienze per la Qualita della Vita, I-47921, Rimini, Italy", u"University of Maryland, Department of Physics, College Park, MD 20742-4111, USA", ], ), ] result = create_authors(text) assert expected == result["authors"]
def test_create_authors_with_no_text(): text = None expected = {} result = create_authors(text) assert expected == result
def test_create_authors_space_between_affids(): text = "Y.X. Ali1, 20, E I Andronov20\n" "\n" "1 CERN\n" "20 DESY" expected = [(u"Y.X. Ali", [u"CERN", u"DESY"]), (u"E I Andronov", [u"DESY"])] result = create_authors(text) assert expected == result["authors"]
def test_create_authors_handles_spaces_at_the_end_of_an_author_or_affiliation( ): text = "J. Smith1 \n" "\n" "1 University of somewhere " expected = [(u"J. Smith", [u"University of somewhere"])] result = create_authors(text) assert expected == result["authors"]
def test_create_authors_with_no_firstnames(): text = "Einstein, Bohr" expected = [(u"Einstein", []), (u"Bohr", [])] result = create_authors(text) assert expected == result["authors"] assert "Author without firstname: Einstein" in result["warnings"]
def test_create_authors_unused_affiliation(): text = "K. Sachs 1, F. Schwennsen 1\n" "\n" "1 DESY\n" "2 CERN\n" expected = [(u"K. Sachs", ["DESY"]), (u"F. Schwennsen", ["DESY"])] result = create_authors(text) assert expected == result["authors"] assert "Unused affiliation-IDs: ['2']" in result["warnings"]
def test_create_authors_ignores_space_between_authors_and_affiliations(): text = "F. Lastname1, F.M. Otherlastname1,2\n" "\n" "1 CERN\n" "2 Otheraffiliation" expected = [ (u"F. Lastname", [u"CERN"]), (u"F.M. Otherlastname", [u"CERN", u"Otheraffiliation"]), ] result = create_authors(text) assert expected == result["authors"]
def test_create_authors_note_symbols(): """Test authors which have symbols like † and ∗""" text = "Y.X. Aduszkiewicž†1, 20, E I Andronov20∗\n" "\n" "† CERN\n" "∗ DESY" expected = [(u"Y.X. Aduszkiewic\u017e", [u"CERN"]), (u"E I Andronov", [u"DESY"])] warning = "CAUTION! Using symbols (# and stuff) as aff-IDs." result = create_authors(text) assert expected == result["authors"] assert warning in result["warnings"]
def test_create_authors_note_footnotes(): """Test authors which have some footnote symbols like † and ∗""" text = "Y.X. Ali†1, 20, E I Andronov20∗\n" "\n" "1 CERN\n" "20 DESY" expected = [(u"Y.X. Ali", [u"CERN", u"DESY"]), (u"E I Andronov", [u"DESY"])] warning = u"Unresolved aff-ID or stray footnote symbol. Problematic author and aff-id: Y.X. Ali †" result = create_authors(text) assert expected == result["authors"] assert warning in result["warnings"]
def test_create_authors_with_affid_but_missing_affiliation(): """Test case when some author has an affiliation id but no affiliation.""" text = "A. Einstein1, N. Bohr2\n" "\n" "2 Københavns Universitet" result = create_authors(text) expected = [(u"A. Einstein", []), (u"N. Bohr", [u"K\xf8benhavns Universitet"])] warning = "Unresolved aff-ID or stray footnote symbol. Problematic author and aff-id: A. Einstein 1" assert expected == result["authors"] assert warning in result["warnings"]
def test_create_authors_with_one_author_missing_affiliation(): """Test case when some author doesn't have an affiliation.""" text = "A. Einstein, N. Bohr1,2\n" "\n" "1 ETH\n" "2 Københavns Universitet" result = create_authors(text) expected = [ (u"A. Einstein", []), (u"N. Bohr", [u"ETH", u"K\xf8benhavns Universitet"]), ] warning = "Author without affiliation-id. Problematic author: A. Einstein" assert expected == result["authors"] assert warning in result["warnings"]
def test_create_authors_no_commas_between_affids(): text = ( "C. Patrignani\n" "1,\n" "K. Agashe\n" "2,\n" "G. Aielli\n" "1\n" "2\n" "\n" "1.\n" "Universita di Bologna and INFN, Dip. Scienze per la Qualita della Vita, I-47921, Rimini, Italy\n" "2.\n" "University of Maryland, Department of Physics, College Park, MD 20742-4111, USA" ) expected = [ { "fullname": "C. Patrignani", "affiliations": [ "Universita di Bologna and INFN, Dip. Scienze per la Qualita della Vita, I-47921, Rimini, Italy" ], "ids": [], "emails": [], }, { "fullname": "K. Agashe", "affiliations": [ "University of Maryland, Department of Physics, College Park, MD 20742-4111, USA" ], "ids": [], "emails": [], }, { "fullname": "G. Aielli", "affiliations": [ "Universita di Bologna and INFN, Dip. Scienze per la Qualita della Vita, I-47921, Rimini, Italy", "University of Maryland, Department of Physics, College Park, MD 20742-4111, USA", ], "ids": [], "emails": [], }, ] result = create_authors(text) assert expected == result["authors"]
def test_create_authors_handles_spaces_at_the_end_of_an_author_or_affiliation( ): text = "J. Smith1 \n" "\n" "1 University of somewhere " expected = [("J. Smith", ["University of somewhere"])] expected = [ { "fullname": "J. Smith", "affiliations": ["University of somewhere"], "ids": [], "emails": [], }, ] result = create_authors(text) assert expected == result["authors"]
def test_create_authors_with_letters(): text = "J. Mills a L. di Caprio\n" "B. Smith bb\n" "\n" "a CERN\n" "bb Fermilab\n" expected = [ (u"J. Mills", ["CERN"]), (u"L. di Caprio", []), (u"B. Smith", ["Fermilab"]), ] warnings = [ 'Is this part of a name or missing aff-id? "di" in a L. di Caprio', "Author without affiliation-id. Problematic author: L. di Caprio", ] result = create_authors(text) assert expected == result["authors"] assert all(warning in result["warnings"] for warning in warnings)
def test_create_authors_bad_author_lines(): text = ("A. Aduszkiewicz\n" "1\n" ", Y.X. Ali\n" "1,20\n" ", E I Andronov\n" "20\n" ", Einstein\n" "13,15\n" "\n" "1 CERN\n" "20 DESY\n" "13 ETH ZÜRICH\n" "15 PRINCETON\n") expected = [ { "fullname": "A. Aduszkiewicz", "affiliations": ["CERN"], "ids": [], "emails": [], }, { "fullname": "Y.X. Ali", "affiliations": ["CERN", "DESY"], "ids": [], "emails": [], }, { "fullname": "E I Andronov", "affiliations": ["DESY"], "ids": [], "emails": [] }, { "fullname": "Einstein", "affiliations": ["ETH Z\xdcRICH", "PRINCETON"], "ids": [], "emails": [], }, ] result = create_authors(text) assert expected == result["authors"]
def test_create_authors_with_affiliations(): text = ("F. Durães1, A.V. Giannini2, V.P. Gonçalves3,4 and F.S. Navarra2\n" " \n" "1 CERN\n" "2 Fermilab\n" "3 Lund University\n" "4 Instituto de Física, Universidade de São Paulo") expected = [ { "fullname": "F. Dur\xe3es", "affiliations": ["CERN"], "ids": [], "emails": [] }, { "fullname": "A.V. Giannini", "affiliations": ["Fermilab"], "ids": [], "emails": [], }, { "fullname": "V.P. Gon\xe7alves", "affiliations": [ "Lund University", "Instituto de F\xedsica, Universidade de S\xe3o Paulo", ], "ids": [], "emails": [], }, { "fullname": "F.S. Navarra", "affiliations": ["Fermilab"], "ids": [], "emails": [], }, ] result = create_authors(text) assert expected == result["authors"] assert "warnings" not in result.keys()
def test_create_authors_with_many_affiliations(): text = ("F. Durães1,2,3,4\n" "\n" "1 CERN\n" "2 Fermilab\n" "3 Lund University\n" "4 Instituto de Física, Universidade de São Paulo") expected = [( u"F. Dur\xe3es", [ u"CERN", u"Fermilab", u"Lund University", u"Instituto de F\xedsica, Universidade de S\xe3o Paulo", ], )] result = create_authors(text) assert expected == result["authors"]
def test_create_authors_ignores_space_between_authors_and_affiliations(): text = "F. Lastname1, F.M. Otherlastname1,2\n" "\n" "1 CERN\n" "2 Otheraffiliation" expected = [ { "fullname": "F. Lastname", "affiliations": ["CERN"], "ids": [], "emails": [] }, { "fullname": "F.M. Otherlastname", "affiliations": ["CERN", "Otheraffiliation"], "ids": [], "emails": [], }, ] result = create_authors(text) assert expected == result["authors"]
def test_create_authors_no_empty_line(): text = ("K. Sachs1, M. Moskovic2\n" "1 DESY, D 22607 Hamburg, Germany\n" "2 CERN, CH 1211 Geneva 23, Switzerland\n") expected = [ (u"K. Sachs", []), (u"M. Moskovic", []), (u"DESY", []), (u"D", []), (u"Hamburg", []), (u"Germany", []), (u"CERN", []), (u"CH", []), (u"Geneva", []), (u"Switzerland", []), ] result = create_authors(text) assert expected == result["authors"] assert "Found no affiliations (empty line needed)" in result["warnings"]
def test_create_authors_space_between_affids(): text = "Y.X. Ali1, 20, E I Andronov20\n" "\n" "1 CERN\n" "20 DESY" expected = [("Y.X. Ali", ["CERN", "DESY"]), ("E I Andronov", ["DESY"])] expected = [ { "fullname": "Y.X. Ali", "affiliations": ["CERN", "DESY"], "ids": [], "emails": [], }, { "fullname": "E I Andronov", "affiliations": ["DESY"], "ids": [], "emails": [] }, ] result = create_authors(text) assert expected == result["authors"]
def test_create_authors_with_no_firstnames(): text = "Einstein, Bohr" expected = [ { "fullname": "Einstein", "affiliations": [], "ids": [], "emails": [] }, { "fullname": "Bohr", "affiliations": [], "ids": [], "emails": [] }, ] result = create_authors(text) assert expected == result["authors"] assert "Author without firstname: Einstein" in result["warnings"]
def test_create_authors_affids_with_dots(): text = ( "C. Patrignani\n" "1,\n" "K. Agashe\n" "2,\n" "G. Aielli\n" "1,\n" "2\n" "\n" "1.\n" "Universita di Bologna and INFN, Dip. Scienze per la Qualita della Vita, I-47921, Rimini, Italy\n" "2.\n" "University of Maryland, Department of Physics, College Park, MD 20742-4111, USA" ) expected = [ ( u"C. Patrignani", [ u"Universita di Bologna and INFN, Dip. Scienze per la Qualita della Vita, I-47921, Rimini, Italy" ], ), ( u"K. Agashe", [ u"University of Maryland, Department of Physics, College Park, MD 20742-4111, USA" ], ), ( u"G. Aielli", [ u"Universita di Bologna and INFN, Dip. Scienze per la Qualita della Vita, I-47921, Rimini, Italy", u"University of Maryland, Department of Physics, College Park, MD 20742-4111, USA", ], ), ] result = create_authors(text) assert expected == result["authors"]