def test_kmeans_simple_X(client=None): pipe = Pipeline([steps.Flatten(), MiniBatchKMeans(n_clusters=6)]) fitted = pipe.fit_ensemble(X=X, **ENSEMBLE_KWARGS) _train_asserts(fitted, ENSEMBLE_KWARGS['saved_ensemble_size']) pred = fitted.predict_many(X=X) assert len(pred) == len(fitted.ensemble)
def test_kmeans_simple_sampler(client=None): pipe = Pipeline([steps.Flatten(), MiniBatchKMeans(n_clusters=6)]) kw = SAMPLER_DATA_SOURCE.copy() kw.update(ENSEMBLE_KWARGS) fitted = pipe.fit_ensemble(**kw) ens = fitted.ensemble _train_asserts(fitted, ENSEMBLE_KWARGS['saved_ensemble_size']) pred = fitted.predict_many(**SAMPLER_DATA_SOURCE) assert len(pred) == len(SAMPLER_DATA_SOURCE['args_list']) * len(ens)
def test_supervised_feat_select_X_y(client=None): '''Has a ModifySample step to get necessary y data''' pipe = Pipeline([steps.Flatten(), steps.SelectPercentile(score_func=f_classif, percentile=50), SGDClassifier()]) en = dict(method_kwargs=dict(classes=[0, 1, 2]), **ENSEMBLE_KWARGS) en.update(X_Y_DATA_SOURCE) fitted = pipe.fit_ensemble(**en) _train_asserts(fitted, en['saved_ensemble_size']) pred = fitted.predict_many(**X_Y_DATA_SOURCE) assert len(pred) == len(fitted.ensemble)
def test_simple(): p = Pipeline([steps.Flatten(), MiniBatchKMeans(n_clusters=5),]) args_list = [(100, 200, 5)] * 10 # (height, width, bands) data_source = dict(sampler=example_sampler, args_list=args_list) ensemble_kw = dict(ngen=2, init_ensemble_size=2) ensemble_kw.update(data_source) fitted = p.fit_ensemble(**ensemble_kw) tagged_fitted_models = fitted.ensemble (tag1, model1), (tag2, model2) = tagged_fitted_models # ensemble size of 2 here X = example_sampler(100, 400, 5) pred1 = model1.predict(X) pred2 = model2.predict(X) assert pred1.shape == pred2.shape == (400 * 100,)
def test_kmeans_model_selection(client=None): pipe = Pipeline([ steps.Flatten(), ('pca', steps.Transform(IncrementalPCA())), ('kmeans', MiniBatchKMeans(n_clusters=5)) ], scoring=kmeans_aic, scoring_kwargs={'score_weights': [-1]}) def samp(*args, **kwargs): return random_elm_store(bands=12, mn=0, mx=1, height=20, width=40) en = ENSEMBLE_KWARGS.copy() n_clusters_choices = list(range(3, 10)) def init(pipe, **kwargs): estimators = [] for _ in range(100): n_components = np.random.choice(np.arange(2, 6)) n_clusters = np.random.choice(n_clusters_choices) estimator = copy.deepcopy(pipe) estimator.set_params(kmeans__n_clusters=n_clusters, pca__n_components=n_components) estimators.append(estimator) return estimators en['ngen'] = 20 en['model_scoring'] = kmeans_aic en['ensemble_init_func'] = init en['model_selection_kwargs'] = dict(drop_n=30, evolve_n=30, choices=n_clusters_choices) en['model_selection'] = kmeans_model_averaging sa = SAMPLER_DATA_SOURCE.copy() sa['sampler'] = samp en.update(sa) fitted = pipe.fit_ensemble(**en) assert len(fitted.ensemble) == en['saved_ensemble_size'] preds = fitted.predict_many(**sa) assert len(preds) == len(fitted.ensemble) * len( SAMPLER_DATA_SOURCE['args_list'])