Exemplo n.º 1
0
def test_partial_fit_transform():
    t = steps.Transform(IncrementalPCA(n_components=3), partial_fit_batches=3)
    trans, y, sample_weight = t.fit_transform(X)
    _run_assertions(trans, y, sample_weight)
    t2 = steps.Transform(IncrementalPCA(n_components=3), partial_fit_batches=3)
    with pytest.raises(TypeError):
        t2.partial_fit = None  # will try to call this and get TypeError
        t2.fit_transform(X)
Exemplo n.º 2
0
def test_fit():
    t = steps.Transform(IncrementalPCA(n_components=3), partial_fit_batches=2)
    fitted = t.fit(X)
    assert isinstance(fitted, steps.Transform)
    assert isinstance(fitted._estimator, IncrementalPCA)
    trans, y, sample_weight = fitted.transform(X)
    _run_assertions(trans, y, sample_weight)
Exemplo n.º 3
0
def test_pipeline_new_with_params():
    p = Pipeline([
        steps.SelectCanvas('band_1'),
        steps.Flatten(),
        ('pca', steps.Transform(IncrementalPCA(n_components=3))),
        ('kmeans', KMeans(n_clusters=4))
    ])
    p.fit(random_elm_store())
    p.predict(random_elm_store())
    assert p.steps[-1][-1].cluster_centers_.shape[0] == 4
    p2 = p.new_with_params(kmeans__n_clusters=7, pca__n_components=2)
    with pytest.raises(NotFittedError):
        p2.predict(random_elm_store())
    p2.fit(random_elm_store())
    assert p2.steps[-1][-1].cluster_centers_.shape[0] == 7
Exemplo n.º 4
0
def test_kmeans_model_selection(client=None):

    pipe = Pipeline([
        steps.Flatten(), ('pca', steps.Transform(IncrementalPCA())),
        ('kmeans', MiniBatchKMeans(n_clusters=5))
    ],
                    scoring=kmeans_aic,
                    scoring_kwargs={'score_weights': [-1]})

    def samp(*args, **kwargs):
        return random_elm_store(bands=12, mn=0, mx=1, height=20, width=40)

    en = ENSEMBLE_KWARGS.copy()
    n_clusters_choices = list(range(3, 10))

    def init(pipe, **kwargs):
        estimators = []
        for _ in range(100):
            n_components = np.random.choice(np.arange(2, 6))
            n_clusters = np.random.choice(n_clusters_choices)
            estimator = copy.deepcopy(pipe)
            estimator.set_params(kmeans__n_clusters=n_clusters,
                                 pca__n_components=n_components)
            estimators.append(estimator)
        return estimators

    en['ngen'] = 20
    en['model_scoring'] = kmeans_aic
    en['ensemble_init_func'] = init
    en['model_selection_kwargs'] = dict(drop_n=30,
                                        evolve_n=30,
                                        choices=n_clusters_choices)
    en['model_selection'] = kmeans_model_averaging
    sa = SAMPLER_DATA_SOURCE.copy()
    sa['sampler'] = samp
    en.update(sa)
    fitted = pipe.fit_ensemble(**en)
    assert len(fitted.ensemble) == en['saved_ensemble_size']
    preds = fitted.predict_many(**sa)
    assert len(preds) == len(fitted.ensemble) * len(
        SAMPLER_DATA_SOURCE['args_list'])
Exemplo n.º 5
0
def test_fit_transform():
    t = steps.Transform(IncrementalPCA(n_components=3))
    trans, y, sample_weight = t.fit_transform(X)
    _run_assertions(trans, y, sample_weight)
    (0.1, 1.1),
    (0.2, 1.2),
    (1, 2),
]
weights = ('weights', steps.ModifySample(add_sample_weight))
log = ('log', steps.ModifySample(log_scaler))
preamble = lambda: [
    diff_in_time,
    flat_step,
    drop_na_step,
    get_y_step,
    weights,
]

linear = lambda: ('estimator', LinearRegression(n_jobs=-1))
pca = lambda: ('pca', steps.Transform(PCA()))
n_components = [None, 4, 6, 8, 10]
scalers = zip(('MinMaxScaler', 'RobustScaler', 'StandardScaler', 'None'),
              (minmax, robust, standard, None))
estimators = zip(('LinearRegression', ), (linear, ))


def main():
    '''
    Beginning on START_DATE, step forward hourly, training on last
    hour's NLDAS FORA dataset with transformers in a 2-layer hierarchical
    ensemble, training on the last hour of data and making
    out-of-training-sample predictions for the current hour.  Makes
    a dill dump file for each hour run. Runs fro NSTEPS hour steps.
    '''
    date = START_DATE
Exemplo n.º 7
0
         {'search_key': 'long_name', 'search_value': "Band 10 ", 'name': 'band_10'},
         {'search_key': 'long_name', 'search_value': "Band 11 ", 'name': 'band_11'}]))
HDF4_FILES = [f for f in glob.glob(os.path.join(ELM_EXAMPLE_DATA_PATH, 'hdf4', '*hdf'))
              if meta_is_day(load_hdf4_meta(f))]

def sampler(fname, **kw):
    return (load_array(fname, band_specs=band_specs), None, None)

data_source = {
    'sampler': sampler,
    'args_list': HDF4_FILES,
}

pipeline_steps = [steps.Flatten(),
                  ('scaler', steps.StandardScaler()),
                  ('pca', steps.Transform(IncrementalPCA(n_components=4), partial_fit_batches=2)),
                  ('kmeans', MiniBatchKMeans(n_clusters=4, compute_labels=True)),]
pipeline = Pipeline(pipeline_steps,
                    scoring=kmeans_aic,
                    scoring_kwargs=dict(score_weights=[-1]))

def ensemble_init_func(pipe, **kw):
    return [pipe.new_with_params(kmeans__n_clusters=np.random.choice(range(6, 10)))
            for _ in range(4)]

ensemble_kwargs = {
    'model_selection': kmeans_model_averaging,
    'model_selection_kwargs': {
        'drop_n': 2,
        'evolve_n': 2,
    },
Exemplo n.º 8
0
def make_pipeline_steps(config, pipeline):
    '''Turn the config's "pipeline" into a list of steps
    to pass to elm.pipeline.Pipeline

    Params:
        :config: validated config from elm.config.ConfigParser
        :step:   a dictionary that is one step of a "pipeline" list

    This is used by :func:``elm.pipeline.parse_run_config``
    '''
    actions = []
    for action_idx, action in enumerate(pipeline):
        is_dic = isinstance(action, dict)
        if not is_dic:
            step_cls = action
        elif 'feature_selection' in action:
            _feature_selection = copy.deepcopy(
                config.feature_selection[action['feature_selection']])
            kw = _feature_selection.copy()
            kw.update(action)
            scaler = _feature_selection['method']
            scaler = import_callable(getattr(skfeat, scaler, scaler))
            if 'func_kwargs' in _feature_selection:
                func = import_callable(_feature_selection['func'])
                scaler = partial(func, feature_selection['func_kwargs'])
                _feature_selection['func'] = func
            kw = {
                k: v
                for k, v in _feature_selection.items()
                if k not in ('func_kwargs', 'method')
            }
            cls = SKLEARN_PREPROCESSING[_feature_selection['method']]
            step_name = action['feature_selection']
            step_cls = cls(**kw)
        elif 'transform' in action:
            trans = config.transform[action['transform']]
            cls = import_callable(trans['model_init_class'])
            kw = trans.get('model_init_kwargs') or {}
            kw_filter = {
                k: v
                for k, v in kw.items() if k != 'partial_fit_batches'
            }
            t = cls(**kw_filter)
            pfb = trans.get('partial_fit_batches',
                            kw.get('partial_fit_batches'))
            step_name = action['transform']
            step_cls = steps.Transform(t, partial_fit_batches=pfb)
        elif 'sklearn_preprocessing' in action:
            _sklearn_preprocessing = config.sklearn_preprocessing[
                action['sklearn_preprocessing']]
            scaler = _sklearn_preprocessing['method']
            scaler = getattr(skpre, scaler, scaler)
            kw = {
                k: v
                for k, v in _sklearn_preprocessing.items()
                if not k in ('method', 'func_kwargs')
            }
            if 'func' in _sklearn_preprocessing:
                kw['func'] = import_callable(_sklearn_preprocessing['func'])
            cls = SKLEARN_PREPROCESSING[_sklearn_preprocessing['method']]
            step_name = action['sklearn_preprocessing']
            step_cls = cls(**kw)
        elif any(k in CHANGE_COORDS_ACTIONS for k in action):
            _sp_step = [k for k in action if k in CHANGE_COORDS_ACTIONS][0]
            step_name = _sp_step
            for att in dir(steps):
                if isinstance(getattr(steps, att), type):
                    if getattr(getattr(steps, att), '_sp_step',
                               None) == _sp_step:
                        step_cls = getattr(steps,
                                           att).from_config_dict(**action)
                        break

        else:
            # add items to actions of the form:
            # (
            #   module_colon_func_name_as_string,        # string
            #   args_to_func,                            # tuple
            #   kwargs_to_func                           # dict
            # )
            # NOTE also add the key name, like 'transform' to the top of
            # elm.config.load_config global variable:
            # "SAMPLE_PIPELINE_ACTIONS"
            raise NotImplementedError(
                'pipeline action {} not recognized.'.format(action))
        actions.append((step_name, step_cls))
    return actions