Пример #1
0
def test_multilabel_list_of_len_1():
    df = pd.DataFrame({"y": [1], "x": [2]})
    conv1 = DFtoVW(
        df=df, label=MultiLabel(["y"]), features=Feature("x")
    )
    conv2 = DFtoVW(
        df=df, label=MultiLabel("y"), features=Feature("x")
    )
    assert conv1.convert_df()[0] == conv2.convert_df()[0]
Пример #2
0
def test_multiclasslabel():
    df = pd.DataFrame({"a": [1], "b": [0.5], "c": ["x"]})
    conv = DFtoVW(
        df=df, label=MulticlassLabel(name="a", weight="b"), features=Feature("c")
    )
    first_line = conv.convert_df()[0]
    assert first_line == "1 0.5 | x"
Пример #3
0
def test_multilabel():
    df = pd.DataFrame({"y1": [1], "y2": [2], "x": [3]})
    conv = DFtoVW(
        df=df, label=MultiLabel(["y1", "y2"]), features=Feature("x")
    )
    first_line = conv.convert_df()[0]
    assert first_line == "1,2 | 3"
Пример #4
0
def test_multiple_lines():
    df = pd.DataFrame({"y": [1, -1], "x": [1, 2]})
    conv = DFtoVW(
        label=SimpleLabel("y"),
        features=Feature(value="x"),
        df=df,
    )
    lines_list = conv.convert_df()
    assert lines_list == ["1 | 1", "-1 | 2"]
Пример #5
0
def test_variable_feature_name():
    df = pd.DataFrame({"y": [1], "x": [2], "a": ["col_x"]})
    conv = DFtoVW(
        label=SimpleLabel("y"),
        features=Feature(name="a", value="x", name_from_df=True),
        df=df,
    )
    first_line = conv.convert_df()[0]
    assert first_line == "1 | col_x:2"
Пример #6
0
def test_constant_feature_value_with_empty_name():
    df = pd.DataFrame({"idx": ["id_1"], "y": [1], "x": [2]})
    conv = DFtoVW(
        label=SimpleLabel("y"),
        tag="idx",
        features=Feature(name="", value=2, value_from_df=False),
        df=df,
    )
    first_line = conv.convert_df()[0]
    assert first_line == "1 id_1| :2"
Пример #7
0
def test_feature_column_renaming_and_tag():
    df = pd.DataFrame({"idx": ["id_1"], "y": [1], "x": [2]})
    conv = DFtoVW(
        label=SimpleLabel("y"),
        tag="idx",
        features=Feature(name="col_x", value="x"),
        df=df,
    )
    first_line = conv.convert_df()[0]
    assert first_line == "1 id_1| col_x:2"
Пример #8
0
def test_multiple_named_namespaces():
    df = pd.DataFrame({"y": [1], "a": [2], "b": [3]})
    conv = DFtoVW(
        df=df,
        label=SimpleLabel("y"),
        namespaces=[
            Namespace(name="FirstNameSpace", features=Feature("a")),
            Namespace(name="DoubleIt", value=2, features=Feature("b")),
        ],
    )
    first_line = conv.convert_df()[0]
    assert first_line == "1 |FirstNameSpace 2 |DoubleIt:2 3"
Пример #9
0
def test_without_target_multiple_features():
    df = pd.DataFrame({"a": [2], "b": [3]})
    conv = DFtoVW(df=df, features=[Feature(col) for col in ["a", "b"]])
    first_line = conv.convert_df()[0]
    assert first_line == "| 2 3"