Пример #1
0
def test_without_target():
    df = pd.DataFrame({"a": [2], "b": [3]})
    conv = DFtoVW(
        df=df, namespaces=Namespace([Feature(Col("a")), Feature(Col("b"))])
    )
    first_line = conv.process_df()[0]
    assert first_line == "| 2 3"
Пример #2
0
def test_multiple_lines_conversion():
    df = pd.DataFrame({"y": [1, -1], "x": [1, 2]})
    conv = DFtoVW(
        label=SimpleLabel(Col("y")),
        namespaces=Namespace(Feature(value=Col("x"))),
        df=df,
    )
    lines_list = conv.process_df()
    assert lines_list == ["1 | 1", "-1 | 2"]
Пример #3
0
def test_feature_variable_column_name():
    df = pd.DataFrame({"y": [1], "x": [2], "a": ["col_x"]})
    conv = DFtoVW(
        label=SimpleLabel(Col("y")),
        namespaces=Namespace(Feature(name=Col("a"), value=Col("x"))),
        df=df,
    )
    first_line = conv.process_df()[0]
    assert first_line == "1 | col_x:2"
Пример #4
0
def test_feature_constant_column_with_empty_name():
    df = pd.DataFrame({"idx": ["id_1"], "y": [1], "x": [2]})
    conv = DFtoVW(
        label=SimpleLabel(Col("y")),
        tag=Col("idx"),
        namespaces=Namespace([Feature(name="", value=2)]),
        df=df,
    )
    first_line = conv.process_df()[0]
    assert first_line == "1 id_1| :2"
Пример #5
0
def test_feature_column_renaming_and_tag():
    df = pd.DataFrame({"idx": ["id_1"], "y": [1], "x": [2]})
    conv = DFtoVW(
        label=SimpleLabel(Col("y")),
        tag=Col("idx"),
        namespaces=Namespace([Feature(name="col_x", value=Col("x"))]),
        df=df,
    )
    first_line = conv.process_df()[0]
    assert first_line == "1 id_1| col_x:2"
Пример #6
0
def test_multiple_namespaces():
    df = pd.DataFrame({"y": [1], "a": [2], "b": [3]})
    conv = DFtoVW(
        df=df,
        label=SimpleLabel(Col("y")),
        namespaces=[
            Namespace(name="FirstNameSpace", features=Feature(Col("a"))),
            Namespace(name="DoubleIt", value=2, features=Feature(Col("b"))),
        ],
    )
    first_line = conv.process_df()[0]
    assert first_line == "1 |FirstNameSpace 2 |DoubleIt:2 3"