def test_pipeline_new_with_params(): p = Pipeline([ steps.SelectCanvas('band_1'), steps.Flatten(), ('pca', steps.Transform(IncrementalPCA(n_components=3))), ('kmeans', KMeans(n_clusters=4)) ]) p.fit(random_elm_store()) p.predict(random_elm_store()) assert p.steps[-1][-1].cluster_centers_.shape[0] == 4 p2 = p.new_with_params(kmeans__n_clusters=7, pca__n_components=2) with pytest.raises(NotFittedError): p2.predict(random_elm_store()) p2.fit(random_elm_store()) assert p2.steps[-1][-1].cluster_centers_.shape[0] == 7
def example_sampler(h, w, bands, **kwargs): '''A sampler takes one of the elements of "args_list" to return X Alternatively a sampler taking an element of "args_list" can return (X, y, sample_weight) tuple ''' bands = ['band_{}'.format(idx + 1) for idx in range(bands)] return random_elm_store(width=w, height=h, bands=bands)
def tst_one_pipeline(pipeline, add_na_per_band=0, na_fields_as_str=True, delim='_'): from elm.sample_util.sample_pipeline import make_pipeline_steps sample = random_elm_store() if add_na_per_band: for idx, band in enumerate(sample.data_vars): band_arr = getattr(sample, band) val = band_arr.values inds = np.arange(val.size) np.random.shuffle(inds) x = inds // val.shape[0] y = inds % val.shape[0] slc = slice(None, add_na_per_band // 2) val[y[slc], x[slc]] = 99 * idx band_arr.attrs['missing{}value'.format(delim)] = 99 * idx slc = slice(add_na_per_band // 2, add_na_per_band) val[y[slc], x[slc]] = 199 * idx band_arr.attrs['invalid{}range'.format(delim)] = [ 198 * idx, 200 * idx ] band_arr.attrs['valid{}range'.format(delim)] = [-1e12, 1e12] if na_fields_as_str: for field in ('missing{}value', 'invalid{}range', 'valid{}range'): field = field.format(delim) v = band_arr.attrs[field] if isinstance(v, list): band_arr.attrs[field] = ', '.join(map(str, v)) else: band_arr.attrs[field] = str(v) assert val[np.isnan(val)].size == 0 config = ConfigParser(config=make_config(pipeline, data_source)) pipe = Pipeline(make_pipeline_steps(config, pipeline)) new_es = pipe.fit_transform(sample) return sample, new_es[0]
def sampler(**kwargs): es = random_elm_store(BANDS) for band in BANDS[:len(BANDS) // 2]: band_arr = getattr(es, band) band_arr.values *= 1e-7 return es
import copy import glob import os import pytest import yaml from sklearn.decomposition import IncrementalPCA from elm.readers import * from elm.pipeline import steps from elm.pipeline.tests.util import random_elm_store X = flatten(random_elm_store()) def _run_assertions(trans, y, sample_weight): assert y is None assert sample_weight is None assert isinstance(trans, ElmStore) assert hasattr(trans, 'flat') assert tuple(trans.flat.dims) == ('space', 'band') assert trans.flat.values.shape[1] == 3 assert trans.flat.values.shape[0] == X.flat.values.shape[0] def test_fit_transform(): t = steps.Transform(IncrementalPCA(n_components=3)) trans, y, sample_weight = t.fit_transform(X) _run_assertions(trans, y, sample_weight)
import numpy as np import xarray as xr from sklearn.decomposition import PCA from elm.config import ConfigParser from elm.pipeline.tests.util import (random_elm_store, test_one_config as tst_one_config, tmp_dirs_context) from elm.readers import * from elm.pipeline import Pipeline X = random_elm_store() data_source = {'X': X} train = { 'model_init_class': 'sklearn.cluster:MiniBatchKMeans', 'ensemble': 'ens1' } def make_run(pipeline, data_source): run = [{'data_source': 'synthetic', 'pipeline': pipeline, 'train': 'ex1'}] return run def make_config(pipeline, data_source): return { 'train': { 'ex1': train },
def samp(*args, **kwargs): return random_elm_store(bands=12, mn=0, mx=1, height=20, width=40)
def setup(): X = random_elm_store() band1, band2 = (np.random.choice(X.band_order) for _ in range(2)) return X, band1, band2