def test_multilabel_list_of_len_1(): df = pd.DataFrame({"y": [1], "x": [2]}) conv1 = DFtoVW( df=df, label=MultiLabel(["y"]), features=Feature("x") ) conv2 = DFtoVW( df=df, label=MultiLabel("y"), features=Feature("x") ) assert conv1.convert_df()[0] == conv2.convert_df()[0]
def test_multiclasslabel(): df = pd.DataFrame({"a": [1], "b": [0.5], "c": ["x"]}) conv = DFtoVW( df=df, label=MulticlassLabel(name="a", weight="b"), features=Feature("c") ) first_line = conv.convert_df()[0] assert first_line == "1 0.5 | x"
def test_multilabel(): df = pd.DataFrame({"y1": [1], "y2": [2], "x": [3]}) conv = DFtoVW( df=df, label=MultiLabel(["y1", "y2"]), features=Feature("x") ) first_line = conv.convert_df()[0] assert first_line == "1,2 | 3"
def test_multiple_lines(): df = pd.DataFrame({"y": [1, -1], "x": [1, 2]}) conv = DFtoVW( label=SimpleLabel("y"), features=Feature(value="x"), df=df, ) lines_list = conv.convert_df() assert lines_list == ["1 | 1", "-1 | 2"]
def test_variable_feature_name(): df = pd.DataFrame({"y": [1], "x": [2], "a": ["col_x"]}) conv = DFtoVW( label=SimpleLabel("y"), features=Feature(name="a", value="x", name_from_df=True), df=df, ) first_line = conv.convert_df()[0] assert first_line == "1 | col_x:2"
def test_constant_feature_value_with_empty_name(): df = pd.DataFrame({"idx": ["id_1"], "y": [1], "x": [2]}) conv = DFtoVW( label=SimpleLabel("y"), tag="idx", features=Feature(name="", value=2, value_from_df=False), df=df, ) first_line = conv.convert_df()[0] assert first_line == "1 id_1| :2"
def test_feature_column_renaming_and_tag(): df = pd.DataFrame({"idx": ["id_1"], "y": [1], "x": [2]}) conv = DFtoVW( label=SimpleLabel("y"), tag="idx", features=Feature(name="col_x", value="x"), df=df, ) first_line = conv.convert_df()[0] assert first_line == "1 id_1| col_x:2"
def test_multiple_named_namespaces(): df = pd.DataFrame({"y": [1], "a": [2], "b": [3]}) conv = DFtoVW( df=df, label=SimpleLabel("y"), namespaces=[ Namespace(name="FirstNameSpace", features=Feature("a")), Namespace(name="DoubleIt", value=2, features=Feature("b")), ], ) first_line = conv.convert_df()[0] assert first_line == "1 |FirstNameSpace 2 |DoubleIt:2 3"
def test_without_target_multiple_features(): df = pd.DataFrame({"a": [2], "b": [3]}) conv = DFtoVW(df=df, features=[Feature(col) for col in ["a", "b"]]) first_line = conv.convert_df()[0] assert first_line == "| 2 3"