Exemplo n.º 1
0
def test_onehotencode_no_drop_first():
    """Basic binning test."""
    df = _one_categ_df()
    onehotencode = OneHotEncode("Born", drop_first=False)
    res_df = onehotencode(df, verbose=True)
    assert "Born" not in res_df.columns
    assert "Born_UK" in res_df.columns
    assert res_df["Born_UK"][1] == 0
    assert res_df["Born_UK"][2] == 1
    assert res_df["Born_UK"][3] == 0
    assert "Born_USA" in res_df.columns
    assert res_df["Born_USA"][1] == 1
    assert res_df["Born_USA"][2] == 0
    assert res_df["Born_USA"][3] == 0
    assert "Born_Greece" in res_df.columns
    assert res_df["Born_Greece"][1] == 0
    assert res_df["Born_Greece"][2] == 0
    assert res_df["Born_Greece"][3] == 1

    # check when fitted
    df2 = _one_categ_single_row_df()
    assert onehotencode.is_fitted
    res_df2 = onehotencode(df2, verbose=True)
    print(res_df2)
    assert "Born" not in res_df2.columns
    assert "Born_UK" in res_df2.columns
    assert res_df2["Born_UK"][1] == 0
    assert "Born_USA" in res_df.columns
    assert res_df2["Born_USA"][1] == 0
    assert "Born_Greece" in res_df2.columns
    assert res_df2["Born_Greece"][1] == 1
Exemplo n.º 2
0
def test_onehotencode_large():
    """Basic binning test."""
    df = _one_categ_df()
    onehotencode = OneHotEncode("Born")
    res_df = onehotencode(df, verbose=True)
    assert "Born" not in res_df.columns
    assert "Born_Greece" not in res_df.columns
    assert "Born_UK" in res_df.columns
    assert res_df["Born_UK"][1] == 0
    assert res_df["Born_UK"][2] == 1
    assert res_df["Born_UK"][3] == 0
    assert "Born_USA" in res_df.columns
    assert res_df["Born_USA"][1] == 1
    assert res_df["Born_USA"][2] == 0
    assert res_df["Born_USA"][3] == 0

    # check when fitted
    df2 = _one_categ_df_large()
    assert onehotencode.is_fitted
    res_df2 = onehotencode(df2, verbose=True)
    print(res_df2)
    assert len(res_df2) == 7
    assert "Born" not in res_df2.columns
    assert "Born_Greece" not in res_df2.columns
    assert res_df2["Born_UK"][3] == 0
    assert res_df2["Born_USA"][3] == 0
    assert "Born_UK" in res_df2.columns
    assert res_df2["Born_UK"][2] == 1
    assert "Born_USA" in res_df.columns
    assert res_df2["Born_USA"][1] == 1
Exemplo n.º 3
0
def test_onehotencode_col_subset():
    df = _two_categ_df()
    onehotencode = OneHotEncode(columns=["Born", "Cat"], col_subset=True)
    res_df = onehotencode(df)
    assert "Born" not in res_df.columns
    assert "Born_Greece" not in res_df.columns
    assert "Born_UK" in res_df.columns
    assert res_df["Born_UK"][1] == 0
    assert res_df["Born_UK"][2] == 1
    assert res_df["Born_UK"][3] == 0
    assert "Born_USA" in res_df.columns
    assert res_df["Born_USA"][1] == 1
    assert res_df["Born_USA"][2] == 0
    assert res_df["Born_USA"][3] == 0
    assert "Name" in res_df.columns
    assert "Name_Bob" not in res_df.columns
    assert "Name_Jack" not in res_df.columns
    assert "Name_Yan" not in res_df.columns

    # check when fitted
    df2 = _two_categ_single_row_df()
    assert onehotencode.is_fitted
    res_df2 = onehotencode(df2, verbose=True)
    print(res_df2)
    assert "Born" not in res_df2.columns
    assert "Born_Greece" not in res_df2.columns
    assert "Born_UK" in res_df2.columns
    assert res_df2["Born_UK"][1] == 0
    assert "Born_USA" in res_df.columns
    assert res_df2["Born_USA"][1] == 0
    assert "Name" in res_df.columns
    assert "Name_Bob" not in res_df.columns
    assert "Name_Jack" not in res_df.columns
    assert "Name_Yan" not in res_df.columns
Exemplo n.º 4
0
def test_onehotencode_with_dummy_na():
    """Basic binning test."""
    df = _one_categ_df_with_nan()
    onehotencode = OneHotEncode("Born", dummy_na=True)
    res_df = onehotencode(df)
    assert "Born" not in res_df.columns
    assert "Born_nan" not in res_df.columns
    assert "Born_UK" in res_df.columns
    assert res_df["Born_UK"][1] == 0
    assert res_df["Born_UK"][2] == 1
    assert res_df["Born_UK"][3] == 0
    assert "Born_USA" in res_df.columns
    assert res_df["Born_USA"][1] == 1
    assert res_df["Born_USA"][2] == 0
    assert res_df["Born_USA"][3] == 0

    # check when fitted
    df2 = _one_categ_single_row_df()
    assert onehotencode.is_fitted
    res_df2 = onehotencode(df2, verbose=True)
    print(res_df2)
    assert "Born" not in res_df2.columns
    assert "Born_nan" not in res_df2.columns
    assert "Born_USA" in res_df.columns
    assert res_df2["Born_USA"][1] == 0
    assert "Born_UK" in res_df2.columns
    assert res_df2["Born_UK"][1] == 0
Exemplo n.º 5
0
def test_onehotencode_one_with_drop_first_colname(verbose):
    df = _one_categ_df()
    onehotencode = OneHotEncode("Born", drop_first="UK")
    res_df = onehotencode(df, verbose=verbose)
    assert "Born" not in res_df.columns
    assert "Born_UK" not in res_df.columns
    assert "Born_Greece" in res_df.columns
    assert res_df["Born_Greece"][1] == 0
    assert res_df["Born_Greece"][2] == 0
    assert res_df["Born_Greece"][3] == 1
    assert "Born_USA" in res_df.columns
    assert res_df["Born_USA"][1] == 1
    assert res_df["Born_USA"][2] == 0
    assert res_df["Born_USA"][3] == 0

    # check when fitted
    df2 = _one_categ_single_row_df()
    assert onehotencode.is_fitted
    res_df2 = onehotencode(df2, verbose=True)
    print(res_df2)
    assert "Born" not in res_df2.columns
    assert "Born_UK" not in res_df2.columns
    assert "Born_Greece" in res_df2.columns
    assert res_df2["Born_Greece"][1] == 1
    assert "Born_USA" in res_df.columns
    assert res_df2["Born_USA"][1] == 0
Exemplo n.º 6
0
def test_onehotencode_two():
    """Basic binning test."""
    df = _two_categ_df()
    onehotencode = OneHotEncode()
    res_df = onehotencode(df)
    assert "Born" not in res_df.columns
    assert "Born_Greece" not in res_df.columns
    assert "Born_UK" in res_df.columns
    assert res_df["Born_UK"][1] == 0
    assert res_df["Born_UK"][2] == 1
    assert res_df["Born_UK"][3] == 0
    assert "Born_USA" in res_df.columns
    assert res_df["Born_USA"][1] == 1
    assert res_df["Born_USA"][2] == 0
    assert res_df["Born_USA"][3] == 0
    assert "Name" not in res_df.columns
    assert "Name_Bob" not in res_df.columns
    assert "Name_Jack" in res_df.columns
    assert res_df["Name_Jack"][1] == 0
    assert res_df["Name_Jack"][2] == 1
    assert res_df["Name_Jack"][3] == 0
    assert "Name_Yan" in res_df.columns
    assert res_df["Name_Yan"][1] == 0
    assert res_df["Name_Yan"][2] == 0
    assert res_df["Name_Yan"][3] == 1

    # check when fitted
    df2 = _two_categ_single_row_df()
    assert onehotencode.is_fitted
    res_df2 = onehotencode(df2, verbose=True)
    print(res_df2)
    assert "Born" not in res_df2.columns
    assert "Born_Greece" not in res_df2.columns
    assert "Born_UK" in res_df2.columns
    assert res_df2["Born_UK"][1] == 0
    assert "Born_USA" in res_df.columns
    assert res_df2["Born_USA"][1] == 0
    assert "Name" not in res_df.columns
    assert "Name_Bob" not in res_df.columns
    assert "Name_Jack" in res_df.columns
    assert res_df2["Name_Jack"][1] == 0
    assert "Name_Yan" in res_df.columns
    assert res_df2["Name_Yan"][1] == 0