def test_nationality_to_alpha2_code_multiple(read_nationalities):
    text = 'Turkey and United States of America'
    alpha2 = nationality_to_alpha2_code(text, read_nationalities)
    assert (alpha2 == 'TR|US')

    text = 'Turkish American'
    alpha2 = nationality_to_alpha2_code(text, read_nationalities)
    assert (alpha2 == 'TR|US')

    text = 'Turk and American'
    alpha2 = nationality_to_alpha2_code(text, read_nationalities)
    assert (alpha2 == 'TR|US')

    text = 'Turkish Americans'
    alpha2 = nationality_to_alpha2_code(text, read_nationalities)
    assert (alpha2 == 'TR|US')
def test_nationality_to_alpha2_code_single(read_nationalities):
    text = 'Turkey'
    alpha2 = nationality_to_alpha2_code(text, read_nationalities)
    assert (alpha2 == 'TR')

    text = 'Turkish'
    alpha2 = nationality_to_alpha2_code(text, read_nationalities)
    assert (alpha2 == 'TR')

    text = 'Turk'
    alpha2 = nationality_to_alpha2_code(text, read_nationalities)
    assert (alpha2 == 'TR')

    text = 'Turks'
    alpha2 = nationality_to_alpha2_code(text, read_nationalities)
    assert (alpha2 == 'TR')
def test_nationality_to_alpha2_code_northern_ireland(read_nationalities):
    text = 'Duke of Northern Ireland'
    alpha2 = nationality_to_alpha2_code(text, read_nationalities)
    assert (alpha2 == 'GB')

    text = 'Ireland'
    alpha2 = nationality_to_alpha2_code(text, read_nationalities)
    assert (alpha2 == 'IE')

    text = 'Irish'
    alpha2 = nationality_to_alpha2_code(text, read_nationalities)
    assert (alpha2 == 'IE')

    text = 'Northern Irish'
    alpha2 = nationality_to_alpha2_code(text, read_nationalities)
    assert (alpha2 == 'GB')

    text = 'Northern Ireland and The Republic of Ireland'
    alpha2 = nationality_to_alpha2_code(text, read_nationalities)
    assert (alpha2 == 'GB|IE')

    text = 'Northern Ireland and the Irish'
    alpha2 = nationality_to_alpha2_code(text, read_nationalities)
    assert (alpha2 == 'GB|IE')

    # The following tests should really return
    # both 'GB' and 'IE'. However, I have forced
    # the code to just return 'GB' for performance
    # reasons. I can easily resolve the issue by
    # using named entity recognition, but it is very
    # slow. Anyhow, it is very rare to find a physicist
    # who has both of these nationalities.
    text = 'Northern Ireland and Ireland'
    alpha2 = nationality_to_alpha2_code(text, read_nationalities)
    assert (alpha2 == 'GB')

    text = 'Northern Irish and Irish'
    alpha2 = nationality_to_alpha2_code(text, read_nationalities)
    assert (alpha2 == 'GB')
def test_nationality_to_alpha2_code_pipes_to_periods_for_tokenization(
        read_nationalities):
    text = 'Counties of Northern Ireland|County Down|'
    'The Troubles in County Down|Ulster'
    alpha2 = nationality_to_alpha2_code(text, read_nationalities)
    assert (alpha2 == 'GB')
def test_nationality_to_alpha2_code_text_is_float(read_nationalities):
    text = np.nan
    alpha2 = nationality_to_alpha2_code(text, read_nationalities)
    assert (np.isnan(alpha2))
def test_nationality_to_alpha2_code_not_found(read_nationalities):
    text = '3M'
    alpha2 = nationality_to_alpha2_code(text, read_nationalities)
    assert (np.isnan(alpha2))