def test_fit_idempotent(self, estimator_instance, scenario): """Check that calling fit twice is equivalent to calling it once.""" estimator = estimator_instance # todo: may have to rework this, due to "if estimator has param" for method in NON_STATE_CHANGING_METHODS: # for now, we have to skip predict_proba, since current output comparison # does not work for tensorflow Distribution if (isinstance(estimator_instance, BaseForecaster) and method == "predict_proba"): continue if _has_capability(estimator, method): set_random_state(estimator) results = scenario.run( estimator, method_sequence=["fit", method], return_all=True, deepcopy_return=True, ) estimator = results[0] set_random_state(estimator) results_2nd = scenario.run( estimator, method_sequence=["fit", method], return_all=True, deepcopy_return=True, ) _assert_array_almost_equal( results[1], results_2nd[1], # err_msg=f"Idempotency check failed for method {method}", )
def test_differencer_produces_expected_results(na_handling): """Test that Differencer produces expected results on a simple DataFrame.""" transformer = Differencer(na_handling=na_handling) y_transformed = transformer.fit_transform(y_simple) y_expected = y_simple_expected_diff[na_handling] _assert_array_almost_equal(y_transformed, y_expected)
def test_classifier_on_unit_test_data(self, estimator_class): """Test classifier on unit test data.""" # we only use the first estimator instance for testing classname = estimator_class.__name__ # retrieve expected predict_proba output, and skip test if not available if classname in unit_test_proba.keys(): expected_probas = unit_test_proba[classname] else: # skip test if no expected probas are registered return None # we only use the first estimator instance for testing estimator_instance = clone( estimator_class.create_test_instance( parameter_set="results_comparison")) # set random seed if possible if "random_state" in estimator_instance.get_params().keys(): estimator_instance.set_params(random_state=0) # load unit test data X_train, y_train = load_unit_test(split="train") X_test, _ = load_unit_test(split="test") indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train classifier and predict probas estimator_instance.fit(X_train, y_train) y_proba = estimator_instance.predict_proba(X_test.iloc[indices]) # assert probabilities are the same _assert_array_almost_equal(y_proba, expected_probas, decimal=2)
def test_fit_idempotent(estimator_instance, scenario): """Check that calling fit twice is equivalent to calling it once.""" estimator = estimator_instance # todo: may have to rework this, due to "if estimator has param" for method in NON_STATE_CHANGING_METHODS: if _has_capability(estimator, method): set_random_state(estimator) results = scenario.run( estimator, method_sequence=["fit", method], return_all=True, deepcopy_return=True, ) estimator = results[0] set_random_state(estimator) results_2nd = scenario.run( estimator, method_sequence=["fit", method], return_all=True, deepcopy_return=True, ) _assert_array_almost_equal( results[1], results_2nd[1], # err_msg=f"Idempotency check failed for method {method}", )
def test_load_UCR_UEA_dataset_download(tmpdir): # tmpdir is a pytest fixture extract_path = tmpdir.mkdtemp() name = "ArrowHead" actual_X, actual_y = load_UCR_UEA_dataset(name, return_X_y=True, extract_path=extract_path) data_path = os.path.join(extract_path, name) assert os.path.exists(data_path) # check files files = [ f"{name}.txt", f"{name}_TEST.arff", f"{name}_TEST.ts", f"{name}_TEST.txt", f"{name}_TRAIN.arff", f"{name}_TRAIN.ts", f"{name}_TRAIN.txt", # "README.md", ] for file in os.listdir(data_path): assert file in files files.remove(file) assert len(files) == 0 # check data expected_X, expected_y = load_arrow_head(return_X_y=True) _assert_array_almost_equal(actual_X, expected_X, decimal=4) np.testing.assert_array_equal(expected_y, actual_y)
def test_dunder_mul(): """Test the mul dunder method.""" X = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) t1 = ExponentTransformer(power=2) t2 = ExponentTransformer(power=5) t3 = ExponentTransformer(power=0.1) t4 = ExponentTransformer(power=1) t12 = t1 * t2 t123 = t12 * t3 t312 = t3 * t12 t1234 = t123 * t4 t1234_2 = t12 * (t3 * t4) assert isinstance(t12, TransformerPipeline) assert isinstance(t123, TransformerPipeline) assert isinstance(t312, TransformerPipeline) assert isinstance(t1234, TransformerPipeline) assert isinstance(t1234_2, TransformerPipeline) assert [x.power for x in t12.steps] == [2, 5] assert [x.power for x in t123.steps] == [2, 5, 0.1] assert [x.power for x in t312.steps] == [0.1, 2, 5] assert [x.power for x in t1234.steps] == [2, 5, 0.1, 1] assert [x.power for x in t1234_2.steps] == [2, 5, 0.1, 1] _assert_array_almost_equal(X, t123.fit_transform(X)) _assert_array_almost_equal(X, t312.fit_transform(X)) _assert_array_almost_equal(X, t1234.fit_transform(X)) _assert_array_almost_equal(X, t1234_2.fit_transform(X)) _assert_array_almost_equal(t12.fit_transform(X), t3.fit(X).inverse_transform(X))
def test_differencer_remove_missing_false(y, lags, na_handling): """Test transform against inverse_transform.""" transformer = Differencer(lags=lags, na_handling=na_handling) y_transform = transformer.fit_transform(y) y_reconstructed = transformer.inverse_transform(y_transform) _assert_array_almost_equal(y, y_reconstructed)
def test_persistence_via_pickle(estimator_instance): """Check that we can pickle all estimators.""" estimator = estimator_instance set_random_state(estimator) fit_args = _make_args(estimator, "fit") estimator.fit(*fit_args) # Generate results before pickling results = dict() args = dict() for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): args[method] = _make_args(estimator, method) results[method] = getattr(estimator, method)(*args[method]) # Pickle and unpickle pickled_estimator = pickle.dumps(estimator) unpickled_estimator = pickle.loads(pickled_estimator) # Compare against results after pickling for method in results: unpickled_result = getattr(unpickled_estimator, method)(*args[method]) _assert_array_almost_equal( results[method], unpickled_result, decimal=6, err_msg="Results are not the same after pickling", )
def test_mul_sklearn_autoadapt(): """Test auto-adapter for sklearn in mul.""" RAND_SEED = 42 y = _make_classification_y(n_instances=10, random_state=RAND_SEED) X = _make_panel_X(n_instances=10, n_timepoints=20, random_state=RAND_SEED, y=y) X_test = _make_panel_X(n_instances=10, n_timepoints=20, random_state=RAND_SEED) t1 = ExponentTransformer(power=2) t2 = StandardScaler() c = KNeighborsTimeSeriesClassifier() t12c_1 = t1 * (t2 * c) t12c_2 = (t1 * t2) * c t12c_3 = t1 * t2 * c assert isinstance(t12c_1, ClassifierPipeline) assert isinstance(t12c_2, ClassifierPipeline) assert isinstance(t12c_3, ClassifierPipeline) y_pred = t12c_1.fit(X, y).predict(X_test) _assert_array_almost_equal(y_pred, t12c_2.fit(X, y).predict(X_test)) _assert_array_almost_equal(y_pred, t12c_3.fit(X, y).predict(X_test))
def test_fit_idempotent(estimator_instance): """Check that calling fit twice is equivalent to calling it once.""" estimator = estimator_instance set_random_state(estimator) # Fit for the first time fit_args = _make_args(estimator, "fit") estimator.fit(*fit_args) results = dict() args = dict() for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): args[method] = _make_args(estimator, method) results[method] = getattr(estimator, method)(*args[method]) # Fit again set_random_state(estimator) estimator.fit(*fit_args) for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): new_result = getattr(estimator, method)(*args[method]) _assert_array_almost_equal( results[method], new_result, # err_msg=f"Idempotency check failed for method {method}", )
def test_differencer_same_series(y, lags): transformer = Differencer(lags=lags) y_transform = transformer.fit_transform(y) y_reconstructed = transformer.inverse_transform(y_transform) # Reconstruction should return the reconstructed series for same indices # that are in the `Z` timeseries passed to inverse_transform _assert_array_almost_equal(y.loc[y_reconstructed.index], y_reconstructed)
def check_transform_inverse_transform_equivalent(Estimator): estimator = _construct_instance(Estimator) X = _make_args(estimator, "fit")[0] Xt = estimator.fit_transform(X) Xit = estimator.inverse_transform(Xt) if estimator.get_tag("transform-returns-same-time-index"): _assert_array_almost_equal(X, Xit) else: _assert_array_almost_equal(X.loc[Xit.index], Xit)
def test_transform_inverse_transform_equivalent(self, estimator_instance, scenario): """Test that inverse_transform is indeed inverse to transform.""" # skip this test if the estimator does not have inverse_transform if not estimator_instance.get_class_tag("capability:inverse_transform", False): return None X = scenario.args["transform"]["X"] Xt = scenario.run(estimator_instance, method_sequence=["fit", "transform"]) Xit = estimator_instance.inverse_transform(Xt) if estimator_instance.get_tag("transform-returns-same-time-index"): _assert_array_almost_equal(X, Xit) else: _assert_array_almost_equal(X.loc[Xit.index], Xit)
def test_mul_sklearn_autoadapt(): """Test auto-adapter for sklearn in mul.""" X = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) t1 = ExponentTransformer(power=2) t2 = StandardScaler() t3 = ExponentTransformer(power=0.5) t123 = t1 * t2 * t3 t123r = t1 * (t2 * t3) t123l = (t1 * t2) * t3 assert isinstance(t123, TransformerPipeline) assert isinstance(t123r, TransformerPipeline) assert isinstance(t123l, TransformerPipeline) _assert_array_almost_equal(t123.fit_transform(X), t123l.fit_transform(X)) _assert_array_almost_equal(t123r.fit_transform(X), t123l.fit_transform(X))
def test_differencer_prediction(y, lags): y_train = y.iloc[:-12].copy() y_true = y.iloc[-12:].copy() transformer = Differencer(lags=[1, 12]) y_transform = transformer.fit_transform(y) # Use the actual transformed values as predictions since we know we should # be able to convert them to the units of the original series and exactly # match the y_true values for this period y_pred = y_transform.iloc[-12:].copy() # Redo the transformer's fit and transformation # Now the transformer doesn't know anything about the values in y_true # This simulates use-case with a forecasting pipeline y_transform = transformer.fit_transform(y_train) y_pred_inv = transformer.inverse_transform(y_pred) _assert_array_almost_equal(y_true, y_pred_inv)
def test_dunder_add(): """Test the add dunder method.""" X = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) t1 = ExponentTransformer(power=2) t2 = ExponentTransformer(power=5) t3 = ExponentTransformer(power=3) t12 = t1 + t2 t123 = t12 + t3 t123r = t1 + (t2 + t3) assert isinstance(t12, FeatureUnion) assert isinstance(t123, FeatureUnion) assert isinstance(t123r, FeatureUnion) assert [x.power for x in t12.transformer_list] == [2, 5] assert [x.power for x in t123.transformer_list] == [2, 5, 3] assert [x.power for x in t123r.transformer_list] == [2, 5, 3] _assert_array_almost_equal(t123r.fit_transform(X), t123.fit_transform(X))
def test_dunder_mul(): """Test the mul dunder method.""" RAND_SEED = 42 y = _make_classification_y(n_instances=10, random_state=RAND_SEED) X = _make_panel_X(n_instances=10, n_timepoints=20, random_state=RAND_SEED, y=y) X_test = _make_panel_X(n_instances=5, n_timepoints=20, random_state=RAND_SEED) t1 = ExponentTransformer(power=4) t2 = ExponentTransformer(power=0.25) c = KNeighborsTimeSeriesClassifier() t12c_1 = t1 * (t2 * c) t12c_2 = (t1 * t2) * c t12c_3 = t1 * t2 * c assert isinstance(t12c_1, ClassifierPipeline) assert isinstance(t12c_2, ClassifierPipeline) assert isinstance(t12c_3, ClassifierPipeline) y_pred = c.fit(X, y).predict(X_test) _assert_array_almost_equal(y_pred, t12c_1.fit(X, y).predict(X_test)) _assert_array_almost_equal(y_pred, t12c_2.fit(X, y).predict(X_test)) _assert_array_almost_equal(y_pred, t12c_3.fit(X, y).predict(X_test))
def test_persistence_via_pickle(self, estimator_instance, scenario): """Check that we can pickle all estimators.""" estimator = estimator_instance set_random_state(estimator) # Fit the model, get args before and after scenario.run(estimator, method_sequence=["fit"], return_args=True) # Generate results before pickling results = {} for method in NON_STATE_CHANGING_METHODS: if _has_capability(estimator, method): results[method] = scenario.run(estimator, method_sequence=[method]) # Pickle and unpickle pickled_estimator = pickle.dumps(estimator) unpickled_estimator = pickle.loads(pickled_estimator) # Compare against results after pickling for method, vanilla_result in results.items(): # escape predict_proba for forecasters, tfp distributions cannot be pickled if (isinstance(estimator_instance, BaseForecaster) and method == "predict_proba"): continue unpickled_result = scenario.run(unpickled_estimator, method_sequence=[method]) msg = ( f"Results of {method} differ between when pickling and not pickling, " f"estimator {type(estimator_instance).__name__}") _assert_array_almost_equal( vanilla_result, unpickled_result, decimal=6, err_msg=msg, )
def test_differencer_remove_missing_false(y, lags): transformer = Differencer(lags=lags, drop_na=False) y_transform = transformer.fit_transform(y) y_reconstructed = transformer.inverse_transform(y_transform) _assert_array_almost_equal(y, y_reconstructed)
def check_transform_inverse_transform_equivalent(Estimator): estimator = _construct_instance(Estimator) X = _make_args(estimator, "fit")[0] Xt = estimator.fit_transform(X) Xit = estimator.inverse_transform(Xt) _assert_array_almost_equal(X, Xit)