Пример #1
0
 def test_with_hyperopt(self):
     planned = FunctionTransformer(func=np.log1p) >> LogisticRegression
     trained = planned.auto_configure(self.train_X,
                                      self.train_y,
                                      optimizer=Hyperopt,
                                      cv=3,
                                      max_evals=3)
     _ = trained.predict(self.test_X)
Пример #2
0
 def test_scorers_np_cat(self):
     fairness_info = self.creditg_np_cat["fairness_info"]
     train_X = self.creditg_np_cat["train_X"]
     train_y = self.creditg_np_cat["train_y"]
     cat_columns, num_columns = [], []
     for i in range(train_X.shape[1]):
         try:
             _ = train_X[:, i].astype(np.float64)
             num_columns.append(i)
         except ValueError:
             cat_columns.append(i)
     trainable = (
         (
             (Project(columns=cat_columns) >> OneHotEncoder(handle_unknown="ignore"))
             & (
                 Project(columns=num_columns)
                 >> FunctionTransformer(func=lambda x: x.astype(np.float64))
             )
         )
         >> ConcatFeatures
         >> LogisticRegression(max_iter=1000)
     )
     trained = trainable.fit(train_X, train_y)
     test_X = self.creditg_np_cat["test_X"]
     test_y = self.creditg_np_cat["test_y"]
     self._attempt_scorers(fairness_info, trained, test_X, test_y)
Пример #3
0
 def test_init_fit_predict(self):
     import numpy as np
     import lale.datasets
     ft = FunctionTransformer(func=np.log1p)
     lr = LogisticRegression()
     trainable = ft >> lr
     (train_X, train_y), (test_X, test_y) = lale.datasets.digits_df()
     trained = trainable.fit(train_X, train_y)
     predicted = trained.predict(test_X)
Пример #4
0
 def test_predict(self):
     (train_X_pd, train_y_pd), (test_X_pd,
                                test_y_pd) = self.tgt2creditg["pandas"]
     to_pd = FunctionTransformer(
         func=lambda X: X if isinstance(X, pd.DataFrame) else X.toPandas())
     lr = LogisticRegression()
     sk_trainable = SkStandardScaler() >> lr
     sk_trained = sk_trainable.fit(train_X_pd, train_y_pd)
     sk_predicted = sk_trained.predict(test_X_pd)
     rasl_trainable = RaslStandardScaler() >> to_pd >> lr
     for tgt, dataset in self.tgt2creditg.items():
         (train_X, train_y), (test_X, test_y) = dataset
         rasl_trained = rasl_trainable.fit(train_X, train_y)
         rasl_predicted = rasl_trained.predict(test_X)
         self.assertEqual(sk_predicted.shape, rasl_predicted.shape, tgt)
         self.assertEqual(sk_predicted.tolist(), rasl_predicted.tolist(),
                          tgt)
Пример #5
0
 def test_predict(self):
     (train_X_pd, train_y_pd), (test_X_pd,
                                test_y_pd) = self.tgt2creditg["pandas"]
     cat_columns = categorical()(train_X_pd)
     prefix = Map(columns={c: it[c] for c in cat_columns})
     to_pd = FunctionTransformer(
         func=lambda X: X if isinstance(X, pd.DataFrame) else X.toPandas())
     lr = LogisticRegression()
     sk_trainable = prefix >> SkOneHotEncoder(sparse=False) >> lr
     sk_trained = sk_trainable.fit(train_X_pd, train_y_pd)
     sk_predicted = sk_trained.predict(test_X_pd)
     rasl_trainable = prefix >> RaslOneHotEncoder(
         sparse=False) >> to_pd >> lr
     for tgt, dataset in self.tgt2creditg.items():
         (train_X, train_y), (test_X, test_y) = dataset
         rasl_trained = rasl_trainable.fit(train_X, train_y)
         rasl_predicted = rasl_trained.predict(test_X)
         self.assertEqual(sk_predicted.shape, rasl_predicted.shape, tgt)
         self.assertEqual(sk_predicted.tolist(), rasl_predicted.tolist(),
                          tgt)
Пример #6
0
 def test_predict(self):
     self._fill_missing_value("age", 36.0, np.nan)
     (train_X_pd, train_y_pd), (test_X_pd,
                                test_y_pd) = self.tgt2adult["pandas"]
     num_columns = ["age", "fnlwgt", "education-num"]
     prefix = Map(columns={c: it[c] for c in num_columns})
     to_pd = FunctionTransformer(
         func=lambda X: X if isinstance(X, pd.DataFrame) else X.toPandas())
     lr = LogisticRegression()
     imputer_args = {"strategy": "mean"}
     sk_trainable = prefix >> SkSimpleImputer(**imputer_args) >> lr
     sk_trained = sk_trainable.fit(train_X_pd, train_y_pd)
     sk_predicted = sk_trained.predict(test_X_pd)
     rasl_trainable = prefix >> RaslSimpleImputer(
         **imputer_args) >> to_pd >> lr
     for tgt, dataset in self.tgt2adult.items():
         (train_X, train_y), (test_X, test_y) = dataset
         rasl_trained = rasl_trainable.fit(train_X, train_y)
         rasl_predicted = rasl_trained.predict(test_X)
         self.assertEqual(sk_predicted.shape, rasl_predicted.shape, tgt)
         self.assertEqual(sk_predicted.tolist(), rasl_predicted.tolist(),
                          tgt)
Пример #7
0
 def test_validate(self):
     default = FunctionTransformer.hyperparam_defaults()["validate"]
     self.assertEqual(default, True)
Пример #8
0
 def test_pass_y(self):
     trainable = (FunctionTransformer(func=np.log1p, pass_y=False) >>
                  LogisticRegression())
     trained = trainable.fit(self.train_X, self.train_y)
     _ = trained.predict(self.test_X)
Пример #9
0
 def test_with_defaults(self):
     trainable = FunctionTransformer(func=np.log1p) >> LogisticRegression()
     trained = trainable.fit(self.train_X, self.train_y)
     _ = trained.predict(self.test_X)
Пример #10
0
 def test_not_callable(self):
     with EnableSchemaValidation():
         with self.assertRaises(jsonschema.ValidationError):
             _ = FunctionTransformer(func='"not callable"')
Пример #11
0
 def test_pipeline_spark(self):
     pipeline = (RaslMinMaxScaler() >> FunctionTransformer(
         func=lambda X: X.toPandas()) >> LogisticRegression())
     trained = pipeline.fit(self.X_train_spark, self.y_train)
     _ = trained.predict(self.X_test_spark)