Пример #1
0
def test_verbosity():
    irep_v5 = IREP(random_state=42, verbosity=5)
    rip_v5 = RIPPER(random_state=42, verbosity=5)

    irep_v5.fit(DF, class_feat=CLASS_FEAT, pos_class=POS_CLASS)
    assert irep_v5.ruleset_ == IREP_RULESET_42
    rip_v5.fit(DF, class_feat=CLASS_FEAT, pos_class=POS_CLASS)
    assert rip_v5.ruleset_ == RIP_RULESET_42
Пример #2
0
def test_fit_Xy_np():
    irep = IREP(random_state=42)
    rip = RIPPER(random_state=42)

    irep.fit(XY_NP, y=None, class_feat=NP_CLASS_FEAT, pos_class=POS_CLASS)
    assert irep.ruleset_ == feat_to_num_rs(IREP_RULESET_42)

    rip.fit(XY_NP, y=None, class_feat=NP_CLASS_FEAT, pos_class=POS_CLASS)
    assert rip.ruleset_ == feat_to_num_rs(RIP_RULESET_42)
Пример #3
0
def test_fit_Xy_df():
    irep = IREP(random_state=42)
    rip = RIPPER(random_state=42)

    irep.fit(DF, class_feat=CLASS_FEAT, pos_class=POS_CLASS)
    assert irep.ruleset_ == IREP_RULESET_42

    rip.fit(DF, class_feat=CLASS_FEAT, pos_class=POS_CLASS)
    assert rip.ruleset_ == RIP_RULESET_42
Пример #4
0
def test_fit_X_y_np():
    irep = IREP(random_state=42)
    rip = RIPPER(random_state=42)

    irep.fit(X_DF, y=Y_DF, pos_class=POS_CLASS)
    assert irep.ruleset_ == IREP_RULESET_42

    rip.fit(X_DF, y=Y_DF, pos_class=POS_CLASS)
    assert rip.ruleset_ == RIP_RULESET_42
Пример #5
0
def test_df_isnt_modified():
    old_df = pd.read_csv("credit.csv")
    df = old_df.copy()
    irep = IREP(random_state=42)
    irep.fit(CREDIT_DF, class_feat=CREDIT_CLASS_FEAT, pos_class=CREDIT_POS_CLASS)
    assert df.equals(old_df)

    old_df = pd.read_csv("credit.csv")
    df = old_df.copy()
    rip = RIPPER(random_state=42)
    rip.fit(CREDIT_DF, class_feat=CREDIT_CLASS_FEAT, pos_class=CREDIT_POS_CLASS)
    assert df.equals(old_df)
Пример #6
0
def test_fit_discrete_dataset():

    irep = IREP(random_state=0, n_discretize_bins=11)
    rip = RIPPER(random_state=0, n_discretize_bins=11)

    discrete_df = CREDIT_DF.select_dtypes(float).applymap(lambda x: int(x % 10))
    discrete_df[CREDIT_CLASS_FEAT] = CREDIT_DF[CREDIT_CLASS_FEAT]

    irep.fit(discrete_df, class_feat=CREDIT_CLASS_FEAT, pos_class=CREDIT_POS_CLASS)
    assert not (irep.ruleset_.isuniversal()) and not (irep.ruleset_.isnull())
    rip.fit(discrete_df, class_feat=CREDIT_CLASS_FEAT, pos_class=CREDIT_POS_CLASS)
    assert not (rip.ruleset_.isuniversal()) and not (rip.ruleset_.isnull())
Пример #7
0
def test_random_state():

    # Party dataset
    irep_rulesets = []
    rip_rulesets = []
    for _ in range(3):
        irep = IREP(random_state=72)
        rip = RIPPER(random_state=72)
        irep.fit(DF, class_feat=CLASS_FEAT, pos_class=POS_CLASS)
        rip.fit(DF, class_feat=CLASS_FEAT, pos_class=POS_CLASS)
        irep_rulesets.append(irep.ruleset_)
        rip_rulesets.append(rip.ruleset_)
    assert all(rs == irep_rulesets[0] for rs in irep_rulesets)
    assert all(rs == rip_rulesets[0] for rs in rip_rulesets)

    # Credit dataset
    irep_rulesets = []
    rip_rulesets = []
    for _ in range(3):
        irep = IREP(random_state=72)
        rip = RIPPER(random_state=72)
        irep.fit(CREDIT_DF,
                 class_feat=CREDIT_CLASS_FEAT,
                 pos_class=CREDIT_POS_CLASS)
        rip.fit(CREDIT_DF,
                class_feat=CREDIT_CLASS_FEAT,
                pos_class=CREDIT_POS_CLASS)
        irep_rulesets.append(irep.ruleset_)
        rip_rulesets.append(rip.ruleset_)
    assert all(rs == irep_rulesets[0] for rs in irep_rulesets)
    assert all(rs == rip_rulesets[0] for rs in rip_rulesets)
Пример #8
0
def test_fit_numeric_dataset():
    irep = IREP(random_state=42)
    rip = RIPPER(random_state=42)

    irep.fit(
        CREDIT_DF, class_feat=CREDIT_CLASS_FEAT, pos_class=CREDIT_POS_CLASS,
    )
    assert irep.ruleset_ == CREDIT_IREP_RULESET_42
Пример #9
0
def test_df_isnt_modified():
    # df shouldn't be affected by side-effects during model fitting
    old_df = pd.read_csv("credit.csv")
    df = old_df.copy()
    irep = IREP(random_state=42)
    irep.fit(CREDIT_DF,
             class_feat=CREDIT_CLASS_FEAT,
             pos_class=CREDIT_POS_CLASS)
    assert df.equals(old_df)

    old_df = pd.read_csv("credit.csv")
    df = old_df.copy()
    rip = RIPPER(random_state=42)
    rip.fit(CREDIT_DF,
            class_feat=CREDIT_CLASS_FEAT,
            pos_class=CREDIT_POS_CLASS)
    assert df.equals(old_df)
Пример #10
0
def test_infer_pos_class():
    irep = IREP(random_state=42)
    rip = RIPPER(random_state=42)

    infer_df = DF.copy()
    infer_df[CLASS_FEAT] = infer_df[CLASS_FEAT].map(
        lambda x: 1 if x == "democrat" else 0
    )

    irep.fit(
        infer_df, class_feat=CLASS_FEAT,
    )
    assert irep.ruleset_ == IREP_RULESET_42
    rip.fit(
        infer_df, class_feat=CLASS_FEAT,
    )
    assert rip.ruleset_ == RIP_RULESET_42
Пример #11
0
def test_save_load_txt():
    txt_filename = "temp_test_sme.py_test_save_load.txt"
    rip = credit_rip.copy()
    rip.remove_rule_at(1)
    rip.remove_rule_at(1)
    rip.add_rule("[A8=-2.5--1.2]")
    # Make sure set up works:
    assert rip.ruleset_ == ruleset_fromstr(
        "[[A9=t ^ A10=t ^ A4=u ^ A1=b ^ A11=7-16] V \
        [A9=t ^ A10=t] V \
        [A9=t ^ A7=h ^ A6=q] V \
        [A8=-2.5--1.2]]"
    )
    # Save
    rip.to_txt(txt_filename)
    new_rip = RIPPER(random_state=42)
    # Load
    new_rip.from_txt(
        txt_filename, class_feat=credit_class_feat, pos_class=credit_pos_class
    )
    os.remove(txt_filename)
    assert new_rip.ruleset_ == ruleset_fromstr(
        "[[A9=t ^ A10=t ^ A4=u ^ A1=b ^ A11=7-16] V \
        [A9=t ^ A10=t] V \
        [A9=t ^ A7=h ^ A6=q] V \
        [A8=-2.5--1.2]]"
    )
    assert new_rip.bin_transformer_.bins_ == {
        "A11": [("7", "16")],
        "A8": [("-2.5", "-1.2")],
    }
    assert new_rip.bin_transformer_.n_discretize_bins == 10
    assert new_rip.bin_transformer_.names_precision == 1
    assert new_rip.bin_transformer_.verbosity == 0
    assert new_rip.class_feat == credit_class_feat
    assert new_rip.pos_class == credit_pos_class
Пример #12
0
def test_fit_XY_rename_columns():
    irep = IREP(random_state=42)
    rip = RIPPER(random_state=42)

    # With xy
    irep.fit(
        XY_NP,
        y=None,
        class_feat=CLASS_FEAT,
        pos_class=POS_CLASS,
        feature_names=DF.columns,
    )
    assert irep.ruleset_ == IREP_RULESET_42

    rip.fit(
        XY_NP,
        y=None,
        class_feat=CLASS_FEAT,
        pos_class=POS_CLASS,
        feature_names=DF.columns,
    )
    assert rip.ruleset_ == RIP_RULESET_42

    irep = IREP(random_state=42)
    rip = RIPPER(random_state=42)

    # With x_y
    irep.fit(
        X_NP,
        y=Y_NP,
        class_feat=CLASS_FEAT,
        pos_class=POS_CLASS,
        feature_names=DF.drop(CLASS_FEAT, axis=1).columns,
    )
    assert irep.ruleset_ == IREP_RULESET_42

    rip.fit(
        X_NP,
        y=Y_NP,
        class_feat=CLASS_FEAT,
        pos_class=POS_CLASS,
        feature_names=DF.drop(CLASS_FEAT, axis=1).columns,
    )
    assert rip.ruleset_ == RIP_RULESET_42
Пример #13
0
def test_fit_boolean_dataset():
    irep = IREP(random_state=42)
    rip = RIPPER(random_state=42)

    def tobool(x):
        if x == "y":
            return 0
        elif x == "n":
            return 1
        else:
            return 2

    bool_df = DF.copy()
    for col in bool_df.drop("Party", axis=1).columns:
        bool_df[col] = bool_df[col].map(tobool)
    irep.fit(bool_df, class_feat="Party", pos_class="democrat")
    assert not (irep.ruleset_.isuniversal()) and not (irep.ruleset_.isnull())
Пример #14
0
def test_deprecated_bin_transformer():
    deprecated_bin_transformer = {
        "A11": [(0, 1), (1, 2), (2, 4), (4, 8), (8, 17), (17, 67)],
        "A15": [
            (0, 1),
            (1, 9),
            (10, 105),
            (108, 351),
            (351, 1004),
            (1058, 4607),
            (4700, 100000),
        ],
        "A3": [
            (0.0, 0.415),
            (0.415, 0.79),
            (0.79, 1.375),
            (1.375, 2.04),
            (2.04, 3.04),
            (3.04, 4.71),
            (4.75, 7.04),
            (7.08, 10.665),
            (10.75, 14.585),
            (14.79, 28.0),
        ],
        "A8": [
            (0.0, 0.04),
            (0.04, 0.165),
            (0.165, 0.335),
            (0.335, 0.71),
            (0.75, 1.25),
            (1.25, 1.835),
            (1.835, 2.79),
            (3.0, 5.04),
            (5.085, 13.0),
            (13.5, 28.5),
        ],
    }
    df = pd.read_csv("credit.csv")
    irep = IREP()
    irep.fit(df, class_feat="Class", pos_class="+")
    irep.bin_transformer_ = deprecated_bin_transformer
    preds = irep.predict(df)

    rip = RIPPER()
    rip.fit(df, class_feat="Class", pos_class="+")
    rip.bin_transformer_ = deprecated_bin_transformer
    preds = rip.predict(df)
Пример #15
0
def interpret_model(
    X,
    model,
    interpreter=RIPPER(),
    model_predict_function=None,
    score_function=score_accuracy,
):
    """Interpret a more complex model.

        Parameters
        ----------
        model :
            trained classifier, e.g. sklearn, keras, pytorch, etc.
        X : DataFrame, numpy array, or other iterable
            Dataset upon which to interpret model's predictions.
        interpreter : IREP or RIPPER object, default=RIPPER()
            wittgenstein classifier to perform interpretation.
        model_predict_function : function, default=None
            if
        score_function : function, default=score_accuracy
            scoring function to evaluate how dutifully interpreter interpreted the model.
        Return
        ------
        tuple :
            interpreter fit to model,
            scoring of how dutifully interpreter interpreted the model
    """
    model_preds = utils.try_np_tonum(
        model_predict(X, model, model_predict_function=model_predict_function))
    interpreter.fit(X, model_preds)
    resolution = score_resolution(X,
                                  interpreter=interpreter,
                                  model_preds=model_preds,
                                  score_function=score_function)
    interpreter.base_model = model
    return interpreter.ruleset_, resolution
Пример #16
0
def test_use_initial_model():

    initial_model = "[[A9=t ^ A10=t]]"
    expected_irep = ruleset_fromstr(
        """[[A9=t ^ A10=t] V
        [A9=t ^ A7=h] V
        [A9=t ^ A4=u ^ A7=v]]
        """
    )
    expected_rip = ruleset_fromstr(
        """[[A9=t ^ A10=t] V
        [A9=t ^ A7=h] V
        [A9=t ^ A4=u ^ A14=0 ^ A15=0-0] V
        [A9=t ^ A6=w]]
        """
    )

    # From str
    irep = IREP(random_state=1)
    irep.fit(credit_df, class_feat='Class', pos_class='+',
            initial_model=initial_model
    )
    assert irep.ruleset_ == expected_irep
    rip = RIPPER(random_state=1)
    rip.fit(credit_df, class_feat='Class', pos_class='+',
            initial_model=initial_model
    )
    assert rip.ruleset_ == expected_rip

    # From IREP
    initial_irep_model = IREP()
    initial_irep_model.init_ruleset(initial_model)
    irep = IREP(random_state=1)
    irep.fit(credit_df, class_feat='Class', pos_class='+',
            initial_model=initial_irep_model
    )
    assert irep.ruleset_ == expected_irep
    rip = RIPPER(random_state=1)
    rip.fit(credit_df, class_feat='Class', pos_class='+',
            initial_model=initial_irep_model
    )
    assert rip.ruleset_ == expected_rip

    # From RIP
    initial_rip_model = RIPPER()
    initial_rip_model.init_ruleset(initial_model)
    irep = IREP(random_state=1)
    irep.fit(credit_df, class_feat='Class', pos_class='+',
            initial_model=initial_rip_model
    )
    assert irep.ruleset_ == expected_irep
    rip = RIPPER(random_state=1)
    rip.fit(credit_df, class_feat='Class', pos_class='+',
            initial_model=initial_rip_model
    )
    assert rip.ruleset_ == expected_rip

    # No side-effects
    assert initial_irep_model.ruleset_ == ruleset_fromstr(initial_model)
    assert initial_rip_model.ruleset_ == ruleset_fromstr(initial_model)
Пример #17
0
from wittgenstein.base import Ruleset, ruleset_fromstr, rule_fromstr

DF = pd.read_csv("mushroom.csv")
original_ruleset_str = "[[Odor=f] V [Gill-size=n] V [Spore-print-color=r] V [Odor=m]]"
original_ruleset = ruleset_fromstr(original_ruleset_str)
original_rules = original_ruleset.rules
original_irep = IREP(random_state=42)
original_irep.fit(DF, class_feat="Poisonous/Edible", pos_class="p")
# Ensure setup works
assert original_ruleset == original_irep.ruleset_


credit_df = pd.read_csv("credit.csv")
credit_class_feat = "Class"
credit_pos_class = "+"
credit_rip = RIPPER(random_state=42, verbosity=0)
credit_rip.fit(credit_df, class_feat="Class", pos_class="+")
credit_original_ruleset = ruleset_fromstr(
    "[[A9=t ^ A10=t ^ A4=u ^ A1=b ^ A11=7-16] V \
    [A9=t ^ A10=t ^ A4=u ^ A11=3-7] V \
    [A9=t ^ A10=t ^ A14=0] V \
    [A9=t ^ A10=t] V \
    [A9=t ^ A7=h ^ A6=q]]"
)
assert credit_rip.ruleset_ == credit_original_ruleset


def test_initruleset():
    irep = IREP(random_state=42)
    irep.init_ruleset()
    irep.ruleset_ == Ruleset()
Пример #18
0
def test_same_inputs_give_same_results():
    for random_state in range(3):
        irep_res = []
        rip_res = []

        irep = IREP(random_state=random_state)
        rip = RIPPER(random_state=random_state)
        irep.fit(DF, class_feat=CLASS_FEAT, pos_class=POS_CLASS)
        irep_res.append(irep.ruleset_)
        rip.fit(DF, class_feat=CLASS_FEAT, pos_class=POS_CLASS)
        rip_res.append(rip.ruleset_)

        irep = IREP(random_state=random_state)
        rip = RIPPER(random_state=random_state)
        irep.fit(X_DF, y=Y_DF, class_feat=CLASS_FEAT, pos_class=POS_CLASS)
        irep_res.append(irep.ruleset_)
        rip.fit(X_DF, y=Y_DF, class_feat=CLASS_FEAT, pos_class=POS_CLASS)
        rip_res.append(rip.ruleset_)

        irep = IREP(random_state=random_state)
        rip = RIPPER(random_state=random_state)
        irep.fit(X_DF, y=Y_DF, pos_class=POS_CLASS)
        irep_res.append(irep.ruleset_)
        rip.fit(X_DF, y=Y_DF, pos_class=POS_CLASS)
        rip_res.append(rip.ruleset_)

        irep = IREP(random_state=random_state)
        rip = RIPPER(random_state=random_state)
        irep.fit(
            XY_NP,
            y=None,
            class_feat=CLASS_FEAT,
            pos_class=POS_CLASS,
            feature_names=DF.columns,
        )
        irep_res.append(irep.ruleset_)
        rip.fit(
            XY_NP,
            y=None,
            class_feat=CLASS_FEAT,
            pos_class=POS_CLASS,
            feature_names=DF.columns,
        )
        rip_res.append(rip.ruleset_)
        irep = IREP(random_state=random_state)
        rip = RIPPER(random_state=random_state)
        irep.fit(
            X_NP,
            y=Y_NP,
            class_feat=CLASS_FEAT,
            pos_class=POS_CLASS,
            feature_names=DF.drop(CLASS_FEAT, axis=1).columns,
        )
        irep_res.append(irep.ruleset_)
        rip.fit(
            X_NP,
            y=Y_NP,
            class_feat=CLASS_FEAT,
            pos_class=POS_CLASS,
            feature_names=DF.drop(CLASS_FEAT, axis=1).columns,
        )
        rip_res.append(rip.ruleset_)

        assert all([res == irep_res[0] for res in irep_res])
        assert all([res == rip_res[0] for res in rip_res])