Пример #1
0
def test_pipeline_new_with_params():
    p = Pipeline([
        steps.SelectCanvas('band_1'),
        steps.Flatten(),
        ('pca', steps.Transform(IncrementalPCA(n_components=3))),
        ('kmeans', KMeans(n_clusters=4))
    ])
    p.fit(random_elm_store())
    p.predict(random_elm_store())
    assert p.steps[-1][-1].cluster_centers_.shape[0] == 4
    p2 = p.new_with_params(kmeans__n_clusters=7, pca__n_components=2)
    with pytest.raises(NotFittedError):
        p2.predict(random_elm_store())
    p2.fit(random_elm_store())
    assert p2.steps[-1][-1].cluster_centers_.shape[0] == 7
Пример #2
0
def example_sampler(h, w, bands, **kwargs):
    '''A sampler takes one of the elements of "args_list" to return X
       Alternatively a sampler taking an element of "args_list" can
       return (X, y, sample_weight) tuple
    '''
    bands = ['band_{}'.format(idx + 1) for idx in range(bands)]
    return random_elm_store(width=w, height=h, bands=bands)
Пример #3
0
def tst_one_pipeline(pipeline,
                     add_na_per_band=0,
                     na_fields_as_str=True,
                     delim='_'):
    from elm.sample_util.sample_pipeline import make_pipeline_steps
    sample = random_elm_store()
    if add_na_per_band:
        for idx, band in enumerate(sample.data_vars):
            band_arr = getattr(sample, band)
            val = band_arr.values
            inds = np.arange(val.size)
            np.random.shuffle(inds)
            x = inds // val.shape[0]
            y = inds % val.shape[0]
            slc = slice(None, add_na_per_band // 2)
            val[y[slc], x[slc]] = 99 * idx
            band_arr.attrs['missing{}value'.format(delim)] = 99 * idx
            slc = slice(add_na_per_band // 2, add_na_per_band)
            val[y[slc], x[slc]] = 199 * idx
            band_arr.attrs['invalid{}range'.format(delim)] = [
                198 * idx, 200 * idx
            ]
            band_arr.attrs['valid{}range'.format(delim)] = [-1e12, 1e12]
            if na_fields_as_str:
                for field in ('missing{}value', 'invalid{}range',
                              'valid{}range'):
                    field = field.format(delim)
                    v = band_arr.attrs[field]
                    if isinstance(v, list):
                        band_arr.attrs[field] = ', '.join(map(str, v))
                    else:
                        band_arr.attrs[field] = str(v)
            assert val[np.isnan(val)].size == 0
    config = ConfigParser(config=make_config(pipeline, data_source))
    pipe = Pipeline(make_pipeline_steps(config, pipeline))
    new_es = pipe.fit_transform(sample)
    return sample, new_es[0]
Пример #4
0
def sampler(**kwargs):
    es = random_elm_store(BANDS)
    for band in BANDS[:len(BANDS) // 2]:
        band_arr = getattr(es, band)
        band_arr.values *= 1e-7
    return es
Пример #5
0
import copy
import glob
import os

import pytest
import yaml

from sklearn.decomposition import IncrementalPCA

from elm.readers import *
from elm.pipeline import steps
from elm.pipeline.tests.util import random_elm_store

X = flatten(random_elm_store())


def _run_assertions(trans, y, sample_weight):
    assert y is None
    assert sample_weight is None
    assert isinstance(trans, ElmStore)
    assert hasattr(trans, 'flat')
    assert tuple(trans.flat.dims) == ('space', 'band')
    assert trans.flat.values.shape[1] == 3
    assert trans.flat.values.shape[0] == X.flat.values.shape[0]


def test_fit_transform():
    t = steps.Transform(IncrementalPCA(n_components=3))
    trans, y, sample_weight = t.fit_transform(X)
    _run_assertions(trans, y, sample_weight)
Пример #6
0
import numpy as np
import xarray as xr

from sklearn.decomposition import PCA

from elm.config import ConfigParser
from elm.pipeline.tests.util import (random_elm_store, test_one_config as
                                     tst_one_config, tmp_dirs_context)
from elm.readers import *
from elm.pipeline import Pipeline

X = random_elm_store()

data_source = {'X': X}

train = {
    'model_init_class': 'sklearn.cluster:MiniBatchKMeans',
    'ensemble': 'ens1'
}


def make_run(pipeline, data_source):
    run = [{'data_source': 'synthetic', 'pipeline': pipeline, 'train': 'ex1'}]
    return run


def make_config(pipeline, data_source):
    return {
        'train': {
            'ex1': train
        },
Пример #7
0
 def samp(*args, **kwargs):
     return random_elm_store(bands=12,
             mn=0, mx=1, height=20, width=40)
Пример #8
0
def setup():
    X = random_elm_store()
    band1, band2 = (np.random.choice(X.band_order) for _ in range(2))
    return X, band1, band2