示例#1
0
def test_ground_truth():
    """[Utils] testing: test ground truth for stacking."""

    gf = np.array([[17., 11., -42.], [29., 15., -30.],
                   [39.64705882, 17.64705882, -6.35294118],
                   [52.35294118, 22.35294118, 6.35294118], [63., 25., 30.],
                   [75., 29., 42.]])

    gwf = np.array([[-5., 11.], [-7., 9.], [-1.52941176, 7.88235294],
                    [-3.52941176, 5.88235294], [-3., 9.], [-5., 7.], [3., 3.],
                    [3.17647059, 3.17647059], [3., 3.]])

    gp = np.array([[14.57142857, 8.57142857, -31.42857143],
                   [27.14285714, 13.14285714, -18.85714286],
                   [39.71428571, 17.71428571, -6.28571429],
                   [52.28571429, 22.28571429, 6.28571429],
                   [64.85714286, 26.85714286, 18.85714286],
                   [77.42857143, 31.42857143, 31.42857143]])

    gwp = np.array([[-2., 8.28571429], [-4., 6.28571429],
                    [3.14285714, 3.14285714]])

    t, z = Data('stack', False, True).get_data((6, 2), 2)

    with open(os.devnull, 'w') as f, redirect_stdout(f):
        (F, wf), (P, wp) = Data('stack', False, True, 3).ground_truth(t, z)

    np.testing.assert_array_almost_equal(F, gf)
    np.testing.assert_array_almost_equal(wf, gwf)
    np.testing.assert_array_almost_equal(P, gp)
    np.testing.assert_array_almost_equal(wp, gwp)
def run(cls, proba, preprocessing, **kwargs):
    """Function for executing specified test."""
    if cls == 'subset':
        p = kwargs['n_partitions']
    else:
        p = 1

    ests = ESTS[(proba, preprocessing)]
    prep = PREPROCESSING if preprocessing else None

    data = Data(cls, proba, preprocessing, **kwargs)

    X, y = data.get_data((LEN, WIDTH), MOD)
    (F, wf), _ = data.ground_truth(X, y, p)

    ens = EnsembleTransformer()
    ens.add(cls, ests, prep, proba=proba, **kwargs)
    ens.fit(X, y)

    pred = ens.transform(X)

    np.testing.assert_array_equal(F, pred)
示例#3
0
                               lc_from_file,
                               lc_from_csv,
                               lc_predict,
                               lc_transform)

PROBA = True
PROCESSING = True
LEN = 12
WIDTH = 2
FOLDS = 3
PARTITIONS = 2
MOD, r = divmod(LEN, FOLDS)
assert r == 0

lg = LayerGenerator()
data = Data('subset', PROBA, PROCESSING, PARTITIONS, FOLDS)

X, y = data.get_data((LEN, WIDTH), MOD)
(F, wf), (P, wp) = data.ground_truth(X, y, subsets=PARTITIONS)

layer = lg.get_layer('subset', PROBA, PROCESSING, PARTITIONS, FOLDS)
lc = lg.get_layer_container('subset', PROBA, PROCESSING, PARTITIONS, FOLDS)

layer.indexer.fit(X)

cache = Cache(X, y, data)


def test_layer_fit():
    """[Parallel | Subset | Prep | Proba] test layer fit."""
    layer_fit(layer, cache, F, wf)
示例#4
0
"""ML-Ensemble

"""

import numpy as np
from mlens.parallel.evaluation import fit_score
from mlens.utils.dummy import OLS, Data
from mlens.metrics import mape, make_scorer

X, y = Data('stack', False, False).get_data((10, 2), 3)


def test_fit_score():
    """[Parallel | Evaluation] Test fit-score function."""
    out = fit_score(case='test',
                    tr_list=[],
                    est_name='ols',
                    est=OLS(),
                    params=(0, {
                        'offset': 2
                    }),
                    x=X,
                    y=y,
                    idx=((0, 5), (5, 10)),
                    scorer=make_scorer(mape, greater_is_better=False),
                    error_score=None)

    assert out[0] == 'test'
    assert out[1] == 'ols'
    assert out[2] == 0
示例#5
0
Place holder for more rigorous tests.

"""
import numpy as np
from mlens.ensemble import (SequentialEnsemble, SuperLearner, BlendEnsemble,
                            Subsemble)

from mlens.utils.dummy import (Data, PREPROCESSING, ESTIMATORS, ECM,
                               LayerGenerator)

FOLDS = 3
LEN = 24
WIDTH = 2
MOD = 2

data = Data('stack', False, True, FOLDS)
X, y = data.get_data((LEN, WIDTH), MOD)

lc_s = LayerGenerator().get_layer_container('stack', False, True)
lc_b = LayerGenerator().get_layer_container('blend', False, False)
lc_u = LayerGenerator().get_layer_container('subset', False, False)


def test_fit():
    """[Sequential] Test multilayer fitting."""

    S = lc_s.fit(X, y, -1)[-1]
    B = lc_b.fit(S, y, -1)[-1]
    U = lc_u.fit(B, y, -1)[-1]

    ens = SequentialEnsemble()
示例#6
0
from mlens.utils.dummy import LayerGenerator, Data, Cache
from mlens.utils.dummy import (layer_fit, layer_predict, layer_transform,
                               lc_fit, lc_from_file, lc_from_csv, lc_predict,
                               lc_transform, lc_feature_prop)

PROBA = True
PROCESSING = False
LEN = 6
WIDTH = 2
FOLDS = 3
MOD, r = divmod(LEN, FOLDS)
assert r == 0

lg = LayerGenerator()
data = Data('blend', PROBA, PROCESSING)

X, y = data.get_data((LEN, WIDTH), MOD)
(F, wf), (P, wp) = data.ground_truth(X, y)

layer = lg.get_layer('blend', PROBA, PROCESSING)
lc = lg.get_layer_container('blend', PROBA, PROCESSING)
lc_p = lg.get_layer_container('blend',
                              PROBA,
                              PROCESSING,
                              propagate_features=[1])

layer.indexer.fit(X)

cache = Cache(X, y, data)
示例#7
0
Place holder for more rigorous tests.

"""
import numpy as np
from mlens.metrics import rmse
from mlens.base import BlendIndex
from mlens.utils.dummy import Data, ESTIMATORS, PREPROCESSING, OLS

from mlens.ensemble import BlendEnsemble

LEN = 6
WIDTH = 2
MOD = 2

data = Data('blend', False, True)
X, y = data.get_data((LEN, WIDTH), MOD)

(F, wf), (P, wp) = data.ground_truth(X, y, 1, False)


def test_run():
    """[Blend] 'fit' and 'predict' runs correctly."""
    meta = OLS()
    meta.fit(F, y[3:])
    g = meta.predict(P)

    ens = BlendEnsemble(test_size=3)
    ens.add(ESTIMATORS, PREPROCESSING, dtype=np.float64)
    ens.add(OLS(), meta=True, dtype=np.float64)
示例#8
0
                               lc_fit,
                               lc_from_file,
                               lc_from_csv,
                               lc_predict,
                               lc_transform)

PROBA = False
PROCESSING = True
LEN = 6
WIDTH = 2
FOLDS = 3
MOD, r = divmod(LEN, FOLDS)
assert r == 0

lg = LayerGenerator()
data = Data('stack', PROBA, PROCESSING, FOLDS)

X, y = data.get_data((LEN, WIDTH), MOD)
(F, wf), (P, wp) = data.ground_truth(X, y)

layer = lg.get_layer('stack', PROBA, PROCESSING, FOLDS)
lc = lg.get_layer_container('stack', PROBA, PROCESSING, FOLDS)

layer.indexer.fit(X)

cache = Cache(X, y, data)


def test_layer_fit():
    """[Parallel | Stack | Prep] test layer fit."""
    layer_fit(layer, cache, F, wf)
示例#9
0
"""ML-ENSEMBLE

:author: Sebastian Flennerhag
"""

import numpy as np
from mlens.preprocessing import Subset, Shift
from mlens.utils.dummy import Data

X, _ = Data('stack', False, False).get_data((10, 4), 2)

sub = Subset([0, 1])


def test_subset_1():
    """[Preprocessing | Subset]: assert correct subset."""
    assert sub.fit_transform(X).shape[1] == 2


def test_subset_2():
    """[Preprocessing | Subset]: assert X is returned for empty subset."""
    sub.set_params(**{'subset': None})
    out = sub.fit_transform(X)
    assert id(out) == id(X)


def test_shift():
    """[Preprocessing | Shift] test lagging."""
    sh = Shift(2)

    sh.fit(X)
示例#10
0
def fail_func(y, p):
    """Test for use of in-script scoring functions."""
    raise ValueError


def null_func(y, p):
    """Test for failed aggregation"""
    return 'not_value'


FOLDS = 3
LEN = 6
WIDTH = 2
MOD = 2

data1 = Data('stack', False, True, FOLDS)
X1, y1 = data1.get_data((LEN, WIDTH), MOD)
(F1, wf1), (P1, wp1) = data1.ground_truth(X1, y1, 1, False)
G1 = OLS().fit(F1, y1).predict(P1)

data2 = Data('stack', False, False, FOLDS)
X2, y2 = data1.get_data((LEN, WIDTH), MOD)
(F2, wf2), (P2, wp2) = data2.ground_truth(X2, y2, 1, False)
G2 = OLS().fit(F2, y2).predict(P2)

ens1 = SuperLearner(folds=FOLDS, scorer=rmse, verbose=100)
ens1.add(ESTIMATORS, PREPROCESSING, dtype=np.float64)
ens1.add_meta(OLS(), dtype=np.float64)

ens1_b = SuperLearner(folds=FOLDS, scorer=in_script_func)
ens1_b.add(ESTIMATORS, PREPROCESSING, dtype=np.float64)
示例#11
0
"""ML-ENSEMBLE

"""

import numpy as np

from mlens.utils.dummy import ECM, Data
from mlens.ensemble.base import LayerContainer
from mlens.externals.sklearn.base import clone

X, y = Data('stack', False, False).get_data((6, 2), 2)

lc = LayerContainer()
lc.add(ECM, 'full')


def get_gt():
    """Build ground truth."""

    F = np.empty((X.shape[0], len(ECM)))

    for i, (_, est) in enumerate(ECM):

        e = clone(est)

        assert e is not est

        e.fit(X, y)

        F[:, i] = e.predict(X)
示例#12
0
"""ML-ENSEMBLE

Test base functionality.
"""

import numpy as np
from mlens.externals.sklearn.base import clone
from mlens.utils.dummy import Data, LayerGenerator

LEN = 6
WIDTH = 2
MOD = 2

data = Data('stack', False, True, n_splits=5)
X, y = data.get_data((LEN, WIDTH), MOD)

lc = LayerGenerator().get_layer_container('stack', False, False)
layer = LayerGenerator().get_layer('stack', False, False)


def test_clone():
    """[Ensemble | LayerContainer] Test cloning."""
    cloned = clone(lc)

    params = lc.get_params(deep=False)
    params_cloned = cloned.get_params(deep=False)

    for par, param in params.items():
        if par == 'layers':
            assert param is not params_cloned[par]
        else: