Exemplo n.º 1
0
EqualWidth = namedtuple("EqualWidth", ["k"])
Remove = namedtuple("Remove", [])
Custom = namedtuple("Custom", ["points"])

METHODS = [(Default, ), (Leave, ), (MDL, ), (EqualFreq, ), (EqualWidth, ),
           (Remove, ), (Custom, )]

_dispatch = {
    Default:
    lambda m, data, var: _dispatch[type(m.method)](m.method, data, var),
    Leave:
    lambda m, data, var: var,
    MDL:
    lambda m, data, var: disc.EntropyMDL()(data, var),
    EqualFreq:
    lambda m, data, var: disc.EqualFreq(m.k)(data, var),
    EqualWidth:
    lambda m, data, var: disc.EqualWidth(m.k)(data, var),
    Remove:
    lambda m, data, var: None,
    Custom:
    lambda m, data, var: disc.Discretizer.create_discretized_var(
        var, m.points)
}

# Variable discretization state
DState = namedtuple(
    "DState",
    [
        "method",  # discretization method
        "points",  # induced cut points
Exemplo n.º 2
0
            ()),
 MethodDesc(Methods.Keep,
            "Keep numeric", "keep",
            "Keep the variable as is",
            lambda data, var: var,
            ()),
 MethodDesc(Methods.MDL,
            "Entropy vs. MDL", "entropy",
            "Split values until MDL exceeds the entropy (Fayyad-Irani)\n"
            "(requires discrete class variable)",
            _mdl_discretization,
            ()),
 MethodDesc(Methods.EqualFreq,
            "Equal frequency, intervals: ", "equal freq, k={}",
            "Create bins with same number of instances",
            lambda data, var, k: disc.EqualFreq(k)(data, var),
            ("freq_spin", )),
 MethodDesc(Methods.EqualWidth,
            "Equal width, intervals: ", "equal width, k={}",
            "Create bins of the same width",
            lambda data, var, k: disc.EqualWidth(k)(data, var),
            ("width_spin", )),
 MethodDesc(Methods.Remove,
            "Remove", "remove",
            "Remove variable",
            lambda *_: None,
            ()),
 MethodDesc(Methods.Binning,
            "Natural binning, desired bins: ", "binning, desired={}",
            "Create bins with nice thresholds; "
            "try matching desired number of bins",