示例#1
0
 def __init__(self):
     args = {
         LearnerEstimator: (OLS(), FoldIndex()),
         LayerEnsemble: (FoldIndex(), OLS()),
         TransformerEstimator: (Scale(), FoldIndex())
     }[Est]
     super(Tmp, self).__init__(*args)
示例#2
0
 def __init__(self):
     args = {
         LearnerEstimator: (OLS(), FoldIndex()),
         LayerEnsemble: (make_group(FoldIndex(), ESTIMATORS,
                                    PREPROCESSING), ),
         TransformerEstimator: (Scale(), FoldIndex())
     }[Est]
     super(Tmp, self).__init__(*args)
示例#3
0
def test_full_tuple_shape():
    """[Base] FoldIndex: test the tuple shape on generation."""
    tups = [(tri, tei) for tri, tei in FoldIndex(5, X=X).generate()]

    assert tups == [(((1, 5), ), (0, 1)), (((0, 1), (2, 5)), (1, 2)),
                    (((0, 2), (3, 5)), (2, 3)), (((0, 3), (4, 5)), (3, 4)),
                    (((0, 4), ), (4, 5))]
示例#4
0
    def test_scores_wo_folds_sklearn():
        """[SuperLearner] test scoring without folds on sklearn scorer."""
        if not run_sklearn:
            return

        with open(os.devnull, 'w') as f, redirect_stdout(f):
            ens3.fit(X2, y2)
            ens3.predict(X2)

        scores = dict()
        for _, tei in FoldIndex(FOLDS, X2).generate(as_array=True):
            col = 0
            for est_name, __ in sorted(ECM):
                s = mean_squared_error(y2[tei], F2[tei][:, col])

                if est_name not in scores:
                    scores[est_name] = []

                scores[est_name].append(s)

                col += 1

        for k in scores:
            scores[k] = np.mean(scores[k])

        for k in scores:
            assert scores[k] == ens3.data['score-m']['layer-1/%s' % k]
示例#5
0
def test_full_index_is_fitted():
    """[Base] FoldIndex: check fit methods."""
    idx = FoldIndex(4)
    assert not idx.n_samples
    idx.fit(X)
    assert idx.n_samples

    idx = FoldIndex(4)
    assert not idx.n_samples
    for _ in idx.generate(X): pass
    assert idx.n_samples

    idx = FoldIndex(4, X)
    assert idx.n_samples
示例#6
0
def test_full_array_shape():
    """[Base] FoldIndex: test the array shape on generation."""
    tr = [np.array([2, 3, 4]),  np.array([0, 1, 4]), np.array([0, 1, 2, 3])]
    te = [np.array([0, 1]), np.array([2, 3]), np.array([4])]

    for i, (tri, tei) in enumerate(FoldIndex(3, X).generate(as_array=True)):

        np.testing.assert_array_equal(tri, tr[i])
        np.testing.assert_array_equal(tei, te[i])
示例#7
0
def test_scores_w_folds():
    """[SuperLearner] test scoring with folds."""

    scores = {'null-1': [], 'offs-1': [], 'sc.offs-2': [], 'sc.null-2': []}

    for _, tei in FoldIndex(FOLDS, X1).generate(as_array=True):
        col = 0
        for case in sorted(PREPROCESSING):
            for est_name, _ in sorted(ESTIMATORS[case]):
                s = rmse(y1[tei], F1[tei][:, col])
                if case != 'no':
                    scores['%s.%s-2' % (case, est_name)].append(s)
                else:
                    scores['%s-1' % est_name].append(s)

                col += 1
示例#8
0
def test_scores_wo_folds_in_script():
    """[SuperLearner] test scoring without folds and in-script scorer."""
    ens2_b.fit(X2, y2)
    scores = dict()
    for _, tei in FoldIndex(FOLDS, X2).generate(as_array=True):
        col = 0
        for est_name, __ in sorted(ECM):
            s = in_script_func(y2[tei], F2[tei][:, col])

            if not est_name in scores:
                scores[est_name] = []

            scores[est_name].append(s)

            col += 1

    for k in scores:
        scores[k] = np.mean(scores[k])

    for k in scores:
        assert scores[k] == ens2_b.data['score-m']['layer-1/%s' % k]
示例#9
0
def test_scores_wo_folds():
    """[SuperLearner] test scoring without folds."""

    scores = dict()
    for _, tei in FoldIndex(FOLDS, X2).generate(as_array=True):
        col = 0
        for est_name, _ in sorted(ECM):
            s = rmse(y2[tei], F2[tei][:, col])

            if not est_name in scores:
                scores[est_name] = []

            scores[est_name].append(s)

            col += 1

    for k in scores:
        scores[k] = np.mean(scores[k])

    for k in scores:
        assert scores[k] == ens2.data['score-m']['layer-1/%s' % k]
示例#10
0
from mlens.utils.exceptions import ParameterChangeWarning
from mlens.testing import Data
from mlens.estimators import LearnerEstimator, TransformerEstimator, LayerEnsemble
from mlens.externals.sklearn.base import clone

try:
    from sklearn.utils.estimator_checks import check_estimator
    run_sklearn = True
except ImportError:
    check_estimator = None
    run_sklearn = False

data = Data('stack', False, False)
X, y = data.get_data((25, 4), 3)

est = TransformerEstimator(Scale(), FoldIndex(), dtype=np.float64)

Est = TransformerEstimator


class Tmp(Est):
    """Temporary class

    Wrapper to get full estimator on no-args instantiation. For compatibility
    with older Scikit-learn versions.
    """
    def __init__(self):
        args = {
            LearnerEstimator: (OLS(), FoldIndex()),
            LayerEnsemble: (FoldIndex(), OLS()),
            TransformerEstimator: (Scale(), FoldIndex())
示例#11
0
def test_learner_attr():
    """[Module | LearnerEstimator] test setting attribute"""
    est.fit(X, y)
    est.indexer = FoldIndex(1)
    np.testing.assert_raises(NotFittedError, est.predict, X)
示例#12
0
from mlens.testing import Data
from mlens.estimators import LearnerEstimator, TransformerEstimator, LayerEnsemble
from mlens.externals.sklearn.base import clone

try:
    from sklearn.utils.estimator_checks import check_estimator
    run_sklearn = True
except ImportError:
    check_estimator = None
    run_sklearn = False

data = Data('stack', False, False)
X, y = data.get_data((25, 4), 3)
(F, wf), (P, wp) = data.ground_truth(X, y)

est = LearnerEstimator(OLS(), FoldIndex(), dtype=np.float64)

Est = LearnerEstimator


class Tmp(Est):
    """Temporary class

    Wrapper to get full estimator on no-args instantiation. For compatibility
    with older Scikit-learn versions.
    """
    def __init__(self):
        args = {
            LearnerEstimator: (OLS(), FoldIndex()),
            LayerEnsemble: (FoldIndex(), OLS()),
            TransformerEstimator: (Scale(), FoldIndex())
示例#13
0
def test_full_raises_on_empty():
    """[Base] FoldIndex: check raises error on singular array."""
    with np.testing.assert_raises(ValueError):
        FoldIndex(2, np.empty(1))
示例#14
0
from mlens.estimators import LearnerEstimator, TransformerEstimator, LayerEnsemble
from mlens.externals.sklearn.base import clone

try:
    from sklearn.utils.estimator_checks import check_estimator
    run_sklearn = True
except ImportError:
    check_estimator = None
    run_sklearn = False

data = Data('stack', False, True)
X, y = data.get_data((25, 4), 3)
(F, wf), (P, wp) = data.ground_truth(X, y)

Est = LayerEnsemble
est = LayerEnsemble(make_group(FoldIndex(), ESTIMATORS, PREPROCESSING),
                    dtype=np.float64)


class Tmp(Est):
    """Temporary class

    Wrapper to get full estimator on no-args instantiation. For compatibility
    with older Scikit-learn versions.
    """
    def __init__(self):
        args = {
            LearnerEstimator: (OLS(), FoldIndex()),
            LayerEnsemble: (make_group(FoldIndex(), ESTIMATORS,
                                       PREPROCESSING), ),
            TransformerEstimator: (Scale(), FoldIndex())
wrapper around a generic estimator along with a cross-validation strategy. The job
of the learner is to manage all sub-computations required for fitting and prediction.
In fact, it's public methods are generators from sub-learners, that do the actual
computation.  A learner is the parent node of an estimator's computational sub-graph
induced by the cross-validation strategy.

A learner is created by specifying an ``estimator`` and an ``indexer``, along with a
set of optional arguments, most notably the ``name`` of the learner. Naming is important,
is it is used for cache referencing. If setting it manually, ensure you give the learner
a unique name.
"""
from mlens.utils.dummy import OLS
from mlens.parallel import Learner, Job
from mlens.index import FoldIndex

indexer = FoldIndex(folds=2)
learner = Learner(estimator=OLS(), indexer=indexer, name='ols')

######################################################################
# The learner doesn't do any heavy lifting itself, it manages the creation a sub-graph
# of auxiliary :class:`SubLearner` nodes for each fold during estimation.
# This process is dynamic: the sub-learners are temporary instances created for each
# estimation.

######################################################################
# To fit a learner, we need a cache reference. When fitting all estimators from the
# main process, this reference can be a list. If not (e.g. multiprocessing), the
# reference should instead be a ``str`` pointing to the path of the cache directory.
# Prior to running a job (``fit``, ``predict``, ``transform``), the learner must be
# configured on the given data by calling the ``setup`` method. This takes cares of
# indexing the training set for cross-validation, assigning output columns et.c.
示例#16
0
def test_full_raises_on_oversampling():
    """[Base] FoldIndex: check raises error."""
    with np.testing.assert_raises(ValueError):
        FoldIndex(100, X)
示例#17
0
def test_learner_attr():
    """[Module | TransformerEstimator] test setting attribute"""
    est.fit(X, y)
    est.indexer = FoldIndex(2)
    np.testing.assert_warns(ParameterChangeWarning, est.predict, X)
示例#18
0
def test_full_raises_on_fold_1():
    """[Base] FoldIndex: check raises error on folds=1."""
    with np.testing.assert_raises(ValueError):
        FoldIndex(1, X)
示例#19
0
To parallelize the implementation, we can use the :class:`Layer` class. A layer is
a handle that will run any number of :class:`Group` instances attached to it in parallel. Each
group in turn is a wrapper around a ``indexer-transformers-estimators`` triplet.

Basics
------

So, to fit our two learners in parallel, we first need a :class:`Group` object to
handle them.
"""
from mlens.parallel import Layer, Group, make_group, run
from mlens.utils.dummy import OLS, Scale
from mlens.index import FoldIndex

indexer = FoldIndex(folds=2)
group = make_group(indexer, [OLS(1), OLS(2)], None)

############################################################################
# This ``group`` object is now a complete description of how to fit our two
# learners using the prescribed indexing method.
#
# To train the estimators, we need feed the group to a :class:`Layer` instance:

import numpy as np

np.random.seed(2)

X = np.arange(20).reshape(10, 2)
y = np.random.rand(10)
示例#20
0
def test_full_warns_on_fold_1():
    """[Base] FoldIndex: check warns on folds=1 if not raise_on_exception."""
    with np.testing.assert_warns(UserWarning):
        FoldIndex(1, X, raise_on_exception=False)
示例#21
0
def test_full_raises_on_float():
    """[Base] FoldIndex: check raises error on float."""
    with np.testing.assert_raises(ValueError):
        FoldIndex(0.5, X)
        return super(OLSSparse, self).predict(X.toarray())


X = np.random.rand(10, 50).astype(np.float32)
y = np.arange(10).astype(np.float32)

first_prop = [1, 2, 3]
n_first_prop = len(first_prop)

second_prop = [i for i in range(n_first_prop)]
second_prop.append(second_prop[-1] + 1)
second_prop.append(second_prop[-1] + 1)
n_second_prop = len(second_prop)

ens1 = TempClass()
ens1.add([OLS(0), OLS(1)], FoldIndex(), propagate_features=first_prop)

ens2 = TempClass()
ens2.add([OLS(0), OLS(1)], FoldIndex(), propagate_features=first_prop)
ens2.add([OLS(2), OLS(3)], FoldIndex(), propagate_features=second_prop)

ens3 = TempClass()
ens3.add([OLSSparse(0), OLSSparse(1)],
         FoldIndex(),
         propagate_features=first_prop)
ens3.add([OLSSparse(2), OLSSparse(3)],
         FoldIndex(),
         propagate_features=second_prop)

ens4 = TempClass()
ens4.add([OLS(), OLS(1), OLS(2)], FoldIndex(), shuffle=True, random_state=SEED)