Exemplos de IREP em Python, exemplos de wittgenstein.irep.IREP em Python

Exemplo n.º 1

0

Exibir arquivo

def test_fit_numeric_dataset():
    irep = IREP(random_state=42)
    rip = RIPPER(random_state=42)

    irep.fit(
        CREDIT_DF, class_feat=CREDIT_CLASS_FEAT, pos_class=CREDIT_POS_CLASS,
    )
    assert irep.ruleset_ == CREDIT_IREP_RULESET_42

Exemplo n.º 2

0

Exibir arquivo

def test_verbosity():
    irep_v5 = IREP(random_state=42, verbosity=5)
    rip_v5 = RIPPER(random_state=42, verbosity=5)

    irep_v5.fit(DF, class_feat=CLASS_FEAT, pos_class=POS_CLASS)
    assert irep_v5.ruleset_ == IREP_RULESET_42
    rip_v5.fit(DF, class_feat=CLASS_FEAT, pos_class=POS_CLASS)
    assert rip_v5.ruleset_ == RIP_RULESET_42

Exemplo n.º 3

0

Exibir arquivo

def test_fit_X_y_np():
    irep = IREP(random_state=42)
    rip = RIPPER(random_state=42)

    irep.fit(X_DF, y=Y_DF, pos_class=POS_CLASS)
    assert irep.ruleset_ == IREP_RULESET_42

    rip.fit(X_DF, y=Y_DF, pos_class=POS_CLASS)
    assert rip.ruleset_ == RIP_RULESET_42

Exemplo n.º 4

0

Exibir arquivo

def test_fit_Xy_df():
    irep = IREP(random_state=42)
    rip = RIPPER(random_state=42)

    irep.fit(DF, class_feat=CLASS_FEAT, pos_class=POS_CLASS)
    assert irep.ruleset_ == IREP_RULESET_42

    rip.fit(DF, class_feat=CLASS_FEAT, pos_class=POS_CLASS)
    assert rip.ruleset_ == RIP_RULESET_42

Exemplo n.º 5

0

Exibir arquivo

def test_fit_Xy_np():
    irep = IREP(random_state=42)
    rip = RIPPER(random_state=42)

    irep.fit(XY_NP, y=None, class_feat=NP_CLASS_FEAT, pos_class=POS_CLASS)
    assert irep.ruleset_ == feat_to_num_rs(IREP_RULESET_42)

    rip.fit(XY_NP, y=None, class_feat=NP_CLASS_FEAT, pos_class=POS_CLASS)
    assert rip.ruleset_ == feat_to_num_rs(RIP_RULESET_42)

Exemplo n.º 6

0

Exibir arquivo

def test_df_isnt_modified():
    old_df = pd.read_csv("credit.csv")
    df = old_df.copy()
    irep = IREP(random_state=42)
    irep.fit(CREDIT_DF, class_feat=CREDIT_CLASS_FEAT, pos_class=CREDIT_POS_CLASS)
    assert df.equals(old_df)

    old_df = pd.read_csv("credit.csv")
    df = old_df.copy()
    rip = RIPPER(random_state=42)
    rip.fit(CREDIT_DF, class_feat=CREDIT_CLASS_FEAT, pos_class=CREDIT_POS_CLASS)
    assert df.equals(old_df)

Exemplo n.º 7

0

Exibir arquivo

def test_fit_discrete_dataset():

    irep = IREP(random_state=0, n_discretize_bins=11)
    rip = RIPPER(random_state=0, n_discretize_bins=11)

    discrete_df = CREDIT_DF.select_dtypes(float).applymap(lambda x: int(x % 10))
    discrete_df[CREDIT_CLASS_FEAT] = CREDIT_DF[CREDIT_CLASS_FEAT]

    irep.fit(discrete_df, class_feat=CREDIT_CLASS_FEAT, pos_class=CREDIT_POS_CLASS)
    assert not (irep.ruleset_.isuniversal()) and not (irep.ruleset_.isnull())
    rip.fit(discrete_df, class_feat=CREDIT_CLASS_FEAT, pos_class=CREDIT_POS_CLASS)
    assert not (rip.ruleset_.isuniversal()) and not (rip.ruleset_.isnull())

Exemplo n.º 8

0

Exibir arquivo

def test_random_state():

    # Party dataset
    irep_rulesets = []
    rip_rulesets = []
    for _ in range(3):
        irep = IREP(random_state=72)
        rip = RIPPER(random_state=72)
        irep.fit(DF, class_feat=CLASS_FEAT, pos_class=POS_CLASS)
        rip.fit(DF, class_feat=CLASS_FEAT, pos_class=POS_CLASS)
        irep_rulesets.append(irep.ruleset_)
        rip_rulesets.append(rip.ruleset_)
    assert all(rs == irep_rulesets[0] for rs in irep_rulesets)
    assert all(rs == rip_rulesets[0] for rs in rip_rulesets)

    # Credit dataset
    irep_rulesets = []
    rip_rulesets = []
    for _ in range(3):
        irep = IREP(random_state=72)
        rip = RIPPER(random_state=72)
        irep.fit(CREDIT_DF,
                 class_feat=CREDIT_CLASS_FEAT,
                 pos_class=CREDIT_POS_CLASS)
        rip.fit(CREDIT_DF,
                class_feat=CREDIT_CLASS_FEAT,
                pos_class=CREDIT_POS_CLASS)
        irep_rulesets.append(irep.ruleset_)
        rip_rulesets.append(rip.ruleset_)
    assert all(rs == irep_rulesets[0] for rs in irep_rulesets)
    assert all(rs == rip_rulesets[0] for rs in rip_rulesets)

Exemplo n.º 9

0

Exibir arquivo

def test_fit_boolean_dataset():
    irep = IREP(random_state=42)
    rip = RIPPER(random_state=42)

    def tobool(x):
        if x == "y":
            return 0
        elif x == "n":
            return 1
        else:
            return 2

    bool_df = DF.copy()
    for col in bool_df.drop("Party", axis=1).columns:
        bool_df[col] = bool_df[col].map(tobool)
    irep.fit(bool_df, class_feat="Party", pos_class="democrat")
    assert not (irep.ruleset_.isuniversal()) and not (irep.ruleset_.isnull())

Exemplo n.º 10

0

Exibir arquivo

def test_df_isnt_modified():
    # df shouldn't be affected by side-effects during model fitting
    old_df = pd.read_csv("credit.csv")
    df = old_df.copy()
    irep = IREP(random_state=42)
    irep.fit(CREDIT_DF,
             class_feat=CREDIT_CLASS_FEAT,
             pos_class=CREDIT_POS_CLASS)
    assert df.equals(old_df)

    old_df = pd.read_csv("credit.csv")
    df = old_df.copy()
    rip = RIPPER(random_state=42)
    rip.fit(CREDIT_DF,
            class_feat=CREDIT_CLASS_FEAT,
            pos_class=CREDIT_POS_CLASS)
    assert df.equals(old_df)

Exemplo n.º 11

0

Exibir arquivo

def test_infer_pos_class():
    irep = IREP(random_state=42)
    rip = RIPPER(random_state=42)

    infer_df = DF.copy()
    infer_df[CLASS_FEAT] = infer_df[CLASS_FEAT].map(
        lambda x: 1 if x == "democrat" else 0
    )

    irep.fit(
        infer_df, class_feat=CLASS_FEAT,
    )
    assert irep.ruleset_ == IREP_RULESET_42
    rip.fit(
        infer_df, class_feat=CLASS_FEAT,
    )
    assert rip.ruleset_ == RIP_RULESET_42

Exemplo n.º 12

0

Exibir arquivo

def test_fit_XY_rename_columns():
    irep = IREP(random_state=42)
    rip = RIPPER(random_state=42)

    # With xy
    irep.fit(
        XY_NP,
        y=None,
        class_feat=CLASS_FEAT,
        pos_class=POS_CLASS,
        feature_names=DF.columns,
    )
    assert irep.ruleset_ == IREP_RULESET_42

    rip.fit(
        XY_NP,
        y=None,
        class_feat=CLASS_FEAT,
        pos_class=POS_CLASS,
        feature_names=DF.columns,
    )
    assert rip.ruleset_ == RIP_RULESET_42

    irep = IREP(random_state=42)
    rip = RIPPER(random_state=42)

    # With x_y
    irep.fit(
        X_NP,
        y=Y_NP,
        class_feat=CLASS_FEAT,
        pos_class=POS_CLASS,
        feature_names=DF.drop(CLASS_FEAT, axis=1).columns,
    )
    assert irep.ruleset_ == IREP_RULESET_42

    rip.fit(
        X_NP,
        y=Y_NP,
        class_feat=CLASS_FEAT,
        pos_class=POS_CLASS,
        feature_names=DF.drop(CLASS_FEAT, axis=1).columns,
    )
    assert rip.ruleset_ == RIP_RULESET_42

Exemplo n.º 13

0

Exibir arquivo

Arquivo: test_preprocess.py Projeto: vishalbelsare/wittgenstein

def test_deprecated_bin_transformer():
    deprecated_bin_transformer = {
        "A11": [(0, 1), (1, 2), (2, 4), (4, 8), (8, 17), (17, 67)],
        "A15": [
            (0, 1),
            (1, 9),
            (10, 105),
            (108, 351),
            (351, 1004),
            (1058, 4607),
            (4700, 100000),
        ],
        "A3": [
            (0.0, 0.415),
            (0.415, 0.79),
            (0.79, 1.375),
            (1.375, 2.04),
            (2.04, 3.04),
            (3.04, 4.71),
            (4.75, 7.04),
            (7.08, 10.665),
            (10.75, 14.585),
            (14.79, 28.0),
        ],
        "A8": [
            (0.0, 0.04),
            (0.04, 0.165),
            (0.165, 0.335),
            (0.335, 0.71),
            (0.75, 1.25),
            (1.25, 1.835),
            (1.835, 2.79),
            (3.0, 5.04),
            (5.085, 13.0),
            (13.5, 28.5),
        ],
    }
    df = pd.read_csv("credit.csv")
    irep = IREP()
    irep.fit(df, class_feat="Class", pos_class="+")
    irep.bin_transformer_ = deprecated_bin_transformer
    preds = irep.predict(df)

    rip = RIPPER()
    rip.fit(df, class_feat="Class", pos_class="+")
    rip.bin_transformer_ = deprecated_bin_transformer
    preds = rip.predict(df)

Exemplo n.º 14

0

Exibir arquivo

def test_same_inputs_give_same_results():
    for random_state in range(3):
        irep_res = []
        rip_res = []

        irep = IREP(random_state=random_state)
        rip = RIPPER(random_state=random_state)
        irep.fit(DF, class_feat=CLASS_FEAT, pos_class=POS_CLASS)
        irep_res.append(irep.ruleset_)
        rip.fit(DF, class_feat=CLASS_FEAT, pos_class=POS_CLASS)
        rip_res.append(rip.ruleset_)

        irep = IREP(random_state=random_state)
        rip = RIPPER(random_state=random_state)
        irep.fit(X_DF, y=Y_DF, class_feat=CLASS_FEAT, pos_class=POS_CLASS)
        irep_res.append(irep.ruleset_)
        rip.fit(X_DF, y=Y_DF, class_feat=CLASS_FEAT, pos_class=POS_CLASS)
        rip_res.append(rip.ruleset_)

        irep = IREP(random_state=random_state)
        rip = RIPPER(random_state=random_state)
        irep.fit(X_DF, y=Y_DF, pos_class=POS_CLASS)
        irep_res.append(irep.ruleset_)
        rip.fit(X_DF, y=Y_DF, pos_class=POS_CLASS)
        rip_res.append(rip.ruleset_)

        irep = IREP(random_state=random_state)
        rip = RIPPER(random_state=random_state)
        irep.fit(
            XY_NP,
            y=None,
            class_feat=CLASS_FEAT,
            pos_class=POS_CLASS,
            feature_names=DF.columns,
        )
        irep_res.append(irep.ruleset_)
        rip.fit(
            XY_NP,
            y=None,
            class_feat=CLASS_FEAT,
            pos_class=POS_CLASS,
            feature_names=DF.columns,
        )
        rip_res.append(rip.ruleset_)
        irep = IREP(random_state=random_state)
        rip = RIPPER(random_state=random_state)
        irep.fit(
            X_NP,
            y=Y_NP,
            class_feat=CLASS_FEAT,
            pos_class=POS_CLASS,
            feature_names=DF.drop(CLASS_FEAT, axis=1).columns,
        )
        irep_res.append(irep.ruleset_)
        rip.fit(
            X_NP,
            y=Y_NP,
            class_feat=CLASS_FEAT,
            pos_class=POS_CLASS,
            feature_names=DF.drop(CLASS_FEAT, axis=1).columns,
        )
        rip_res.append(rip.ruleset_)

        assert all([res == irep_res[0] for res in irep_res])
        assert all([res == rip_res[0] for res in rip_res])

Exemplo n.º 15

0

Exibir arquivo

def test_use_initial_model():

    initial_model = "[[A9=t ^ A10=t]]"
    expected_irep = ruleset_fromstr(
        """[[A9=t ^ A10=t] V
        [A9=t ^ A7=h] V
        [A9=t ^ A4=u ^ A7=v]]
        """
    )
    expected_rip = ruleset_fromstr(
        """[[A9=t ^ A10=t] V
        [A9=t ^ A7=h] V
        [A9=t ^ A4=u ^ A14=0 ^ A15=0-0] V
        [A9=t ^ A6=w]]
        """
    )

    # From str
    irep = IREP(random_state=1)
    irep.fit(credit_df, class_feat='Class', pos_class='+',
            initial_model=initial_model
    )
    assert irep.ruleset_ == expected_irep
    rip = RIPPER(random_state=1)
    rip.fit(credit_df, class_feat='Class', pos_class='+',
            initial_model=initial_model
    )
    assert rip.ruleset_ == expected_rip

    # From IREP
    initial_irep_model = IREP()
    initial_irep_model.init_ruleset(initial_model)
    irep = IREP(random_state=1)
    irep.fit(credit_df, class_feat='Class', pos_class='+',
            initial_model=initial_irep_model
    )
    assert irep.ruleset_ == expected_irep
    rip = RIPPER(random_state=1)
    rip.fit(credit_df, class_feat='Class', pos_class='+',
            initial_model=initial_irep_model
    )
    assert rip.ruleset_ == expected_rip

    # From RIP
    initial_rip_model = RIPPER()
    initial_rip_model.init_ruleset(initial_model)
    irep = IREP(random_state=1)
    irep.fit(credit_df, class_feat='Class', pos_class='+',
            initial_model=initial_rip_model
    )
    assert irep.ruleset_ == expected_irep
    rip = RIPPER(random_state=1)
    rip.fit(credit_df, class_feat='Class', pos_class='+',
            initial_model=initial_rip_model
    )
    assert rip.ruleset_ == expected_rip

    # No side-effects
    assert initial_irep_model.ruleset_ == ruleset_fromstr(initial_model)
    assert initial_rip_model.ruleset_ == ruleset_fromstr(initial_model)

Exemplo n.º 16

0

Exibir arquivo

from copy import deepcopy
import os

import pytest
import pandas as pd

from wittgenstein.irep import IREP
from wittgenstein.ripper import RIPPER
from wittgenstein.base import Ruleset, ruleset_fromstr, rule_fromstr

DF = pd.read_csv("mushroom.csv")
original_ruleset_str = "[[Odor=f] V [Gill-size=n] V [Spore-print-color=r] V [Odor=m]]"
original_ruleset = ruleset_fromstr(original_ruleset_str)
original_rules = original_ruleset.rules
original_irep = IREP(random_state=42)
original_irep.fit(DF, class_feat="Poisonous/Edible", pos_class="p")
# Ensure setup works
assert original_ruleset == original_irep.ruleset_


credit_df = pd.read_csv("credit.csv")
credit_class_feat = "Class"
credit_pos_class = "+"
credit_rip = RIPPER(random_state=42, verbosity=0)
credit_rip.fit(credit_df, class_feat="Class", pos_class="+")
credit_original_ruleset = ruleset_fromstr(
    "[[A9=t ^ A10=t ^ A4=u ^ A1=b ^ A11=7-16] V \
    [A9=t ^ A10=t ^ A4=u ^ A11=3-7] V \
    [A9=t ^ A10=t ^ A14=0] V \
    [A9=t ^ A10=t] V \
    [A9=t ^ A7=h ^ A6=q]]"

Exemplo n.º 17

0

Exibir arquivo

def test_initruleset():
    irep = IREP(random_state=42)
    irep.init_ruleset()
    irep.ruleset_ == Ruleset()

    irep = IREP(random_state=42)
    irep.init_ruleset(original_ruleset)
    irep.ruleset_ == original_ruleset

    irep = IREP(random_state=42)
    irep.init_ruleset(original_ruleset_str)
    irep.ruleset_ == original_ruleset