def test_generate_when_middle_name_absent():
    test_source = get_no_middle_source()
    pg = PseudocolumnGenerator(test_source)
    pg.generate()
    assert "full_name" in test_source.get_data().columns
    assert list(
        test_source.get_data()["full_name"].values) == full_names_no_middle
def test_get_canonical_columns():
    test_source = get_source()
    pg = PseudocolumnGenerator(test_source)
    pg._get_canonical_columns()
    assert len(pg.canonical_columns) == 3
    assert ColumnLabelCatalog.FIRST_NAME in pg.canonical_columns
    assert ColumnLabelCatalog.MIDDLE_NAME in pg.canonical_columns
    assert ColumnLabelCatalog.LAST_NAME in pg.canonical_columns
def test_can_generate_first_last_name_when_true():
    test_source = get_no_middle_source()
    pg = PseudocolumnGenerator(test_source)
    pg._get_canonical_columns()
    assert pg._can_generate_first_last_name()
def test_can_generate_full_name_when_false():
    test_source = get_no_middle_source()
    pg = PseudocolumnGenerator(test_source)
    pg._get_canonical_columns()
    assert not pg._can_generate_full_name()
def test_init_when_unstructured():
    unstructured_source = "this is an unstructured piece of text"
    with pytest.raises(Exception):
        PseudocolumnGenerator(unstructured_source)
def test_init_when_structured_and_custom_delimiter():
    test_source = get_source()
    pg = PseudocolumnGenerator(test_source, concatenation_delimiter=",")
    assert pg.data_source == test_source
    assert pg.delimiter == ","
def test_init_when_structured_and_default():
    test_source = get_source()
    pg = PseudocolumnGenerator(test_source)
    assert pg.data_source == test_source
    assert pg.delimiter == " "
def test_dedupe_spaces():
    test_source = get_source()
    pg = PseudocolumnGenerator(test_source)
    assert pg._dedupe_spaces("This  is a    test") == "This is a test"