Пример #1
0
def test_without_target():
    df = pd.DataFrame({"a": [2], "b": [3]})
    conv = DFtoVW(
        df=df, namespaces=Namespace([Feature(Col("a")), Feature(Col("b"))])
    )
    first_line = conv.process_df()[0]
    assert first_line == "| 2 3"
Пример #2
0
def test_multilabel_list_of_len_1():
    df = pd.DataFrame({"y": [1], "x": [2]})
    conv1 = DFtoVW(
        df=df, label=MultiLabel(["y"]), features=Feature("x")
    )
    conv2 = DFtoVW(
        df=df, label=MultiLabel("y"), features=Feature("x")
    )
    assert conv1.convert_df()[0] == conv2.convert_df()[0]
Пример #3
0
def test_multiple_namespaces():
    df = pd.DataFrame({"y": [1], "a": [2], "b": [3]})
    conv = DFtoVW(
        df=df,
        label=SimpleLabel(Col("y")),
        namespaces=[
            Namespace(name="FirstNameSpace", features=Feature(Col("a"))),
            Namespace(name="DoubleIt", value=2, features=Feature(Col("b"))),
        ],
    )
    first_line = conv.process_df()[0]
    assert first_line == "1 |FirstNameSpace 2 |DoubleIt:2 3"
Пример #4
0
def test_absent_col_error():
    with pytest.raises(ValueError) as value_error:
        df = pd.DataFrame({"a": [1]})
        conv = DFtoVW(
            df=df,
            label=SimpleLabel(Col("a")),
            namespaces=Namespace(
                [Feature(Col("a")), Feature(Col("c")), Feature("d")]
            ),
        )
    expected = "In argument 'features', column(s) 'c' not found in dataframe"
    assert expected == str(value_error.value)
Пример #5
0
def test_multiclasslabel():
    df = pd.DataFrame({"a": [1], "b": [0.5], "c": ["x"]})
    conv = DFtoVW(
        df=df, label=MulticlassLabel(name="a", weight="b"), features=Feature("c")
    )
    first_line = conv.convert_df()[0]
    assert first_line == "1 0.5 | x"
Пример #6
0
def test_multilabel():
    df = pd.DataFrame({"y1": [1], "y2": [2], "x": [3]})
    conv = DFtoVW(
        df=df, label=MultiLabel(["y1", "y2"]), features=Feature("x")
    )
    first_line = conv.convert_df()[0]
    assert first_line == "1,2 | 3"
Пример #7
0
def test_multiple_lines():
    df = pd.DataFrame({"y": [1, -1], "x": [1, 2]})
    conv = DFtoVW(
        label=SimpleLabel("y"),
        features=Feature(value="x"),
        df=df,
    )
    lines_list = conv.convert_df()
    assert lines_list == ["1 | 1", "-1 | 2"]
Пример #8
0
def test_feature_variable_column_name():
    df = pd.DataFrame({"y": [1], "x": [2], "a": ["col_x"]})
    conv = DFtoVW(
        label=SimpleLabel(Col("y")),
        namespaces=Namespace(Feature(name=Col("a"), value=Col("x"))),
        df=df,
    )
    first_line = conv.process_df()[0]
    assert first_line == "1 | col_x:2"
Пример #9
0
def test_multiple_lines_conversion():
    df = pd.DataFrame({"y": [1, -1], "x": [1, 2]})
    conv = DFtoVW(
        label=SimpleLabel(Col("y")),
        namespaces=Namespace(Feature(value=Col("x"))),
        df=df,
    )
    lines_list = conv.process_df()
    assert lines_list == ["1 | 1", "-1 | 2"]
Пример #10
0
def test_variable_feature_name():
    df = pd.DataFrame({"y": [1], "x": [2], "a": ["col_x"]})
    conv = DFtoVW(
        label=SimpleLabel("y"),
        features=Feature(name="a", value="x", name_from_df=True),
        df=df,
    )
    first_line = conv.convert_df()[0]
    assert first_line == "1 | col_x:2"
Пример #11
0
def test_multilabel_non_positive_name_error():
    df = pd.DataFrame({"y": [0], "b": [1]})
    with pytest.raises(ValueError) as value_error:
        DFtoVW(
            df=df,
            label=MultiLabel(name="y"),
            features=Feature("b"),
        )
    expected = "In argument 'name' of 'MultiLabel', column 'y' must be >= 1."
    assert expected == str(value_error.value)
Пример #12
0
def test_multiclasslabel_non_positive_name_error():
    df = pd.DataFrame({"a": [0], "b": [0.5], "c": ["x"]})
    with pytest.raises(ValueError) as value_error:
        DFtoVW(
            df=df,
            label=MulticlassLabel(name="a", weight="b"),
            features=Feature("c"),
        )
    expected = "In argument 'name' of 'MulticlassLabel', column 'a' must be >= 1."
    assert expected == str(value_error.value)
Пример #13
0
def test_feature_constant_column_with_empty_name():
    df = pd.DataFrame({"idx": ["id_1"], "y": [1], "x": [2]})
    conv = DFtoVW(
        label=SimpleLabel(Col("y")),
        tag=Col("idx"),
        namespaces=Namespace([Feature(name="", value=2)]),
        df=df,
    )
    first_line = conv.process_df()[0]
    assert first_line == "1 id_1| :2"
Пример #14
0
def test_feature_column_renaming_and_tag():
    df = pd.DataFrame({"idx": ["id_1"], "y": [1], "x": [2]})
    conv = DFtoVW(
        label=SimpleLabel(Col("y")),
        tag=Col("idx"),
        namespaces=Namespace([Feature(name="col_x", value=Col("x"))]),
        df=df,
    )
    first_line = conv.process_df()[0]
    assert first_line == "1 id_1| col_x:2"
Пример #15
0
def test_absent_col_error():
    with pytest.raises(ValueError) as value_error:
        df = pd.DataFrame({"a": [1]})
        DFtoVW(
            df=df,
            label=SimpleLabel("a"),
            features=[Feature(col) for col in ["a", "c", "d"]],
        )
    expected = "In 'Feature': column(s) 'c', 'd' not found in dataframe."
    assert expected == str(value_error.value)
Пример #16
0
def test_multiclasslabel_negative_weight_error():
    df = pd.DataFrame({"y": [1], "w": [-0.5], "x": [2]})
    with pytest.raises(ValueError) as value_error:
        DFtoVW(
            df=df,
            label=MulticlassLabel(name="y", weight="w"),
            features=Feature("x"),
        )
    expected = "In argument 'weight' of 'MulticlassLabel', column 'w' must be >= 0."
    assert expected == str(value_error.value)
Пример #17
0
def test_constant_feature_value_with_empty_name():
    df = pd.DataFrame({"idx": ["id_1"], "y": [1], "x": [2]})
    conv = DFtoVW(
        label=SimpleLabel("y"),
        tag="idx",
        features=Feature(name="", value=2, value_from_df=False),
        df=df,
    )
    first_line = conv.convert_df()[0]
    assert first_line == "1 id_1| :2"
Пример #18
0
def test_feature_column_renaming_and_tag():
    df = pd.DataFrame({"idx": ["id_1"], "y": [1], "x": [2]})
    conv = DFtoVW(
        label=SimpleLabel("y"),
        tag="idx",
        features=Feature(name="col_x", value="x"),
        df=df,
    )
    first_line = conv.convert_df()[0]
    assert first_line == "1 id_1| col_x:2"
Пример #19
0
def test_multiclasslabel_constant_label_type_error():
    df = pd.DataFrame({"a": [0], "b": [0.5], "c": ["x"]})
    with pytest.raises(TypeError) as type_error:
        DFtoVW(
            df=df,
            label=MulticlassLabel(name="a", weight="b", weight_from_df=False),
            features=Feature("c"),
        )
    expected = "In 'MulticlassLabel', when weight_from_df=False, argument 'weight' should be either of the following type(s): 'int', 'float'."
    assert expected == str(type_error.value)
Пример #20
0
def test_multiclasslabel_non_positive_constant_label_error():
    df = pd.DataFrame({"a": [0], "b": [0.5], "c": ["x"]})
    with pytest.raises(ValueError) as value_error:
        DFtoVW(
            df=df,
            label=MulticlassLabel(name=-1, weight="b", name_from_df=False),
            features=Feature("c"),
        )
    expected = "In 'MulticlassLabel', argument 'name' must be >= 1."
    assert expected == str(value_error.value)
Пример #21
0
def test_non_numerical_simplelabel_error():
    df = pd.DataFrame({"y": ["a"], "x": ["featX"]})
    with pytest.raises(TypeError) as type_error:
        DFtoVW(df=df, label=SimpleLabel(name="y"), features=Feature("x"))
    expected = "In argument 'name' of 'SimpleLabel', column 'y' should be either of the following type(s): 'int', 'float'."
    assert expected == str(type_error.value)
Пример #22
0
def test_without_target_multiple_features():
    df = pd.DataFrame({"a": [2], "b": [3]})
    conv = DFtoVW(df=df, features=[Feature(col) for col in ["a", "b"]])
    first_line = conv.convert_df()[0]
    assert first_line == "| 2 3"