def test_with_hyperopt(self): planned = FunctionTransformer(func=np.log1p) >> LogisticRegression trained = planned.auto_configure(self.train_X, self.train_y, optimizer=Hyperopt, cv=3, max_evals=3) _ = trained.predict(self.test_X)
def test_scorers_np_cat(self): fairness_info = self.creditg_np_cat["fairness_info"] train_X = self.creditg_np_cat["train_X"] train_y = self.creditg_np_cat["train_y"] cat_columns, num_columns = [], [] for i in range(train_X.shape[1]): try: _ = train_X[:, i].astype(np.float64) num_columns.append(i) except ValueError: cat_columns.append(i) trainable = ( ( (Project(columns=cat_columns) >> OneHotEncoder(handle_unknown="ignore")) & ( Project(columns=num_columns) >> FunctionTransformer(func=lambda x: x.astype(np.float64)) ) ) >> ConcatFeatures >> LogisticRegression(max_iter=1000) ) trained = trainable.fit(train_X, train_y) test_X = self.creditg_np_cat["test_X"] test_y = self.creditg_np_cat["test_y"] self._attempt_scorers(fairness_info, trained, test_X, test_y)
def test_init_fit_predict(self): import numpy as np import lale.datasets ft = FunctionTransformer(func=np.log1p) lr = LogisticRegression() trainable = ft >> lr (train_X, train_y), (test_X, test_y) = lale.datasets.digits_df() trained = trainable.fit(train_X, train_y) predicted = trained.predict(test_X)
def test_predict(self): (train_X_pd, train_y_pd), (test_X_pd, test_y_pd) = self.tgt2creditg["pandas"] to_pd = FunctionTransformer( func=lambda X: X if isinstance(X, pd.DataFrame) else X.toPandas()) lr = LogisticRegression() sk_trainable = SkStandardScaler() >> lr sk_trained = sk_trainable.fit(train_X_pd, train_y_pd) sk_predicted = sk_trained.predict(test_X_pd) rasl_trainable = RaslStandardScaler() >> to_pd >> lr for tgt, dataset in self.tgt2creditg.items(): (train_X, train_y), (test_X, test_y) = dataset rasl_trained = rasl_trainable.fit(train_X, train_y) rasl_predicted = rasl_trained.predict(test_X) self.assertEqual(sk_predicted.shape, rasl_predicted.shape, tgt) self.assertEqual(sk_predicted.tolist(), rasl_predicted.tolist(), tgt)
def test_predict(self): (train_X_pd, train_y_pd), (test_X_pd, test_y_pd) = self.tgt2creditg["pandas"] cat_columns = categorical()(train_X_pd) prefix = Map(columns={c: it[c] for c in cat_columns}) to_pd = FunctionTransformer( func=lambda X: X if isinstance(X, pd.DataFrame) else X.toPandas()) lr = LogisticRegression() sk_trainable = prefix >> SkOneHotEncoder(sparse=False) >> lr sk_trained = sk_trainable.fit(train_X_pd, train_y_pd) sk_predicted = sk_trained.predict(test_X_pd) rasl_trainable = prefix >> RaslOneHotEncoder( sparse=False) >> to_pd >> lr for tgt, dataset in self.tgt2creditg.items(): (train_X, train_y), (test_X, test_y) = dataset rasl_trained = rasl_trainable.fit(train_X, train_y) rasl_predicted = rasl_trained.predict(test_X) self.assertEqual(sk_predicted.shape, rasl_predicted.shape, tgt) self.assertEqual(sk_predicted.tolist(), rasl_predicted.tolist(), tgt)
def test_predict(self): self._fill_missing_value("age", 36.0, np.nan) (train_X_pd, train_y_pd), (test_X_pd, test_y_pd) = self.tgt2adult["pandas"] num_columns = ["age", "fnlwgt", "education-num"] prefix = Map(columns={c: it[c] for c in num_columns}) to_pd = FunctionTransformer( func=lambda X: X if isinstance(X, pd.DataFrame) else X.toPandas()) lr = LogisticRegression() imputer_args = {"strategy": "mean"} sk_trainable = prefix >> SkSimpleImputer(**imputer_args) >> lr sk_trained = sk_trainable.fit(train_X_pd, train_y_pd) sk_predicted = sk_trained.predict(test_X_pd) rasl_trainable = prefix >> RaslSimpleImputer( **imputer_args) >> to_pd >> lr for tgt, dataset in self.tgt2adult.items(): (train_X, train_y), (test_X, test_y) = dataset rasl_trained = rasl_trainable.fit(train_X, train_y) rasl_predicted = rasl_trained.predict(test_X) self.assertEqual(sk_predicted.shape, rasl_predicted.shape, tgt) self.assertEqual(sk_predicted.tolist(), rasl_predicted.tolist(), tgt)
def test_validate(self): default = FunctionTransformer.hyperparam_defaults()["validate"] self.assertEqual(default, True)
def test_pass_y(self): trainable = (FunctionTransformer(func=np.log1p, pass_y=False) >> LogisticRegression()) trained = trainable.fit(self.train_X, self.train_y) _ = trained.predict(self.test_X)
def test_with_defaults(self): trainable = FunctionTransformer(func=np.log1p) >> LogisticRegression() trained = trainable.fit(self.train_X, self.train_y) _ = trained.predict(self.test_X)
def test_not_callable(self): with EnableSchemaValidation(): with self.assertRaises(jsonschema.ValidationError): _ = FunctionTransformer(func='"not callable"')
def test_pipeline_spark(self): pipeline = (RaslMinMaxScaler() >> FunctionTransformer( func=lambda X: X.toPandas()) >> LogisticRegression()) trained = pipeline.fit(self.X_train_spark, self.y_train) _ = trained.predict(self.X_test_spark)