Пример #1
0
 def test_regression_xgboost(self):
     X_train = scipy.sparse.random(300, 900, density=0.0001)
     y_train = np.random.uniform(size=300)
     X_val = scipy.sparse.random(100, 900, density=0.0001)
     y_val = np.random.uniform(size=100)
     automl_experiment = AutoML()
     automl_experiment.add_learner(learner_name="my_xgb1",
                                   learner_class=MyXGB1)
     automl_experiment.add_learner(learner_name="my_xgb2",
                                   learner_class=MyXGB2)
     automl_settings = {
         "time_budget": 2,
         "estimator_list": ["my_xgb1", "my_xgb2"],
         "task": "regression",
         "log_file_name": "test/regression_xgboost.log",
         "n_jobs": 1,
         "model_history": True,
         "keep_search_state": True,
         "early_stop": True,
     }
     automl_experiment.fit(X_train=X_train,
                           y_train=y_train,
                           X_val=X_val,
                           y_val=y_val,
                           **automl_settings)
     assert automl_experiment._state.X_val.shape == X_val.shape
     print(automl_experiment.predict(X_train))
     print(automl_experiment.model)
     print(automl_experiment.config_history)
     print(automl_experiment.best_model_for_estimator("my_xgb2"))
     print(automl_experiment.best_iteration)
     print(automl_experiment.best_estimator)
     print(automl_experiment.best_config)
     print(automl_experiment.best_loss)
     print(automl_experiment.best_config_train_time)
Пример #2
0
 def test_time_limit(self):
     automl_experiment = AutoML()
     automl_experiment.add_learner(
         learner_name="large_lgbm", learner_class=MyLargeLGBM
     )
     automl_experiment.add_learner(
         learner_name="large_xgb", learner_class=MyLargeXGB
     )
     automl_settings = {
         "time_budget": 0.5,
         "task": "classification",
         "log_file_name": "test/classification_timeout.log",
         "estimator_list": ["catboost"],
         "log_type": "all",
         "hpo_method": "random",
     }
     X_train, y_train = load_iris(return_X_y=True, as_frame=True)
     automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
     print(automl_experiment.model.params)
     automl_settings["estimator_list"] = ["large_xgb"]
     automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
     print(automl_experiment.model)
     automl_settings["estimator_list"] = ["large_lgbm"]
     automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
     print(automl_experiment.model)
Пример #3
0
    def test_random_skip_oom(self):
        automl_experiment = AutoML()
        automl_experiment.add_learner(learner_name="large_lgbm",
                                      learner_class=MyLargeLGBM)
        automl_settings = {
            "time_budget": 2,
            "task": "classification",
            "log_file_name": "test/sparse_classification_oom.log",
            "estimator_list": ["large_lgbm"],
            "log_type": "all",
            "n_jobs": 1,
            "hpo_method": "random",
            "n_concurrent_trials": 2,
        }
        X_train = scipy.sparse.eye(900000)
        y_train = np.random.randint(2, size=900000)

        try:
            automl_experiment.fit(X_train=X_train,
                                  y_train=y_train,
                                  **automl_settings)
            print(automl_experiment.predict(X_train))
            print(automl_experiment.model)
            print(automl_experiment.config_history)
            print(automl_experiment.best_model_for_estimator("large_lgbm"))
            print(automl_experiment.best_iteration)
            print(automl_experiment.best_estimator)
        except ImportError:
            print("skipping concurrency test as ray is not installed")
            return
Пример #4
0
 def test_custom_learner(self):
     automl = AutoML()
     automl.add_learner(learner_name='RGF',
                        learner_class=MyRegularizedGreedyForest)
     X_train, y_train = load_wine(return_X_y=True)
     settings = {
         "time_budget": 10,  # total running time in seconds
         "estimator_list": ['RGF', 'lgbm', 'rf', 'xgboost'],
         "task": 'classification',  # task type    
         "sample": True,  # whether to subsample training data
         "log_file_name": "test/wine.log",
         "log_training_metric": True,  # whether to log training metric
         "n_jobs": 1,
     }
     '''The main flaml automl API'''
     automl.fit(X_train=X_train, y_train=y_train, **settings)
Пример #5
0
    def _test_memory_limit(self):
        automl_experiment = AutoML()
        automl_experiment.add_learner(
            learner_name="large_lgbm", learner_class=MyLargeLGBM
        )
        automl_settings = {
            "time_budget": -1,
            "task": "classification",
            "log_file_name": "test/classification_oom.log",
            "estimator_list": ["large_lgbm"],
            "log_type": "all",
            "hpo_method": "random",
        }
        X_train, y_train = load_iris(return_X_y=True, as_frame=True)

        automl_experiment.fit(
            X_train=X_train, y_train=y_train, max_iter=1, **automl_settings
        )
        print(automl_experiment.model)
Пример #6
0
    def test_ensemble(self):
        automl = AutoML()
        automl.add_learner(learner_name="RGF", learner_class=MyRegularizedGreedyForest)
        X_train, y_train = load_wine(return_X_y=True)
        settings = {
            "time_budget": 5,  # total running time in seconds
            "estimator_list": ["rf", "xgboost", "catboost"],
            "task": "classification",  # task type
            "sample": True,  # whether to subsample training data
            "log_file_name": "test/wine.log",
            "log_training_metric": True,  # whether to log training metric
            "ensemble": {
                "final_estimator": MyRegularizedGreedyForest(),
                "passthrough": False,
            },
            "n_jobs": 1,
        }

        """The main flaml automl API"""
        automl.fit(X_train=X_train, y_train=y_train, **settings)
Пример #7
0
    def test_custom_learner(self):
        automl = AutoML()
        automl.add_learner(learner_name="RGF", learner_class=MyRegularizedGreedyForest)
        X_train, y_train = load_wine(return_X_y=True)
        settings = {
            "time_budget": 8,  # total running time in seconds
            "estimator_list": ["RGF", "lgbm", "rf", "xgboost"],
            "task": "classification",  # task type
            "sample": True,  # whether to subsample training data
            "log_file_name": "test/wine.log",
            "log_training_metric": True,  # whether to log training metric
            "n_jobs": 1,
        }

        """The main flaml automl API"""
        automl.fit(X_train=X_train, y_train=y_train, **settings)
        # print the best model found for RGF
        print(automl.best_model_for_estimator("RGF"))

        MyRegularizedGreedyForest.search_space = lambda data_size, task: {}
        automl.fit(X_train=X_train, y_train=y_train, **settings)
Пример #8
0
                                                       data_dir="./")
X_train = X_train.iloc[:1000]
y_train = y_train.iloc[:1000]


class ExtraTreesEstimatorSeeded(ExtraTreesEstimator):
    """ExtraTreesEstimator for reproducible FLAML run."""
    def config2params(self, config: dict) -> dict:
        params = super().config2params(config)
        params["random_state"] = 0
        return params


settings = {
    "time_budget": 1e10,  # total running time in seconds
    "max_iter": 3,
    "metric": "ap",  # average_precision
    "task": "classification",  # task type
    "seed": 7654321,  # random seed
    "estimator_list": ["extra_trees_seeded"],
    "verbose": False,
}

for trial_num in range(8):
    automl = AutoML()
    automl.add_learner(learner_name="extra_trees_seeded",
                       learner_class=ExtraTreesEstimatorSeeded)
    automl.fit(X_train=X_train, y_train=y_train, **settings)
    print(automl.best_loss)
    print(automl.best_config)
Пример #9
0
    def test_fit_w_freezinghp_starting_point(self, as_frame=True):
        automl_experiment = AutoML()
        automl_settings = {
            "time_budget": 1,
            "metric": "accuracy",
            "task": "classification",
            "estimator_list": ["lgbm"],
            "log_file_name": "test/iris.log",
            "log_training_metric": True,
            "n_jobs": 1,
            "model_history": True,
        }
        X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame)
        if as_frame:
            # test drop column
            X_train.columns = range(X_train.shape[1])
            X_train[X_train.shape[1]] = np.zeros(len(y_train))
        automl_experiment.fit(X_train=X_train,
                              y_train=y_train,
                              **automl_settings)
        automl_val_accuracy = 1.0 - automl_experiment.best_loss
        print("Best ML leaner:", automl_experiment.best_estimator)
        print("Best hyperparmeter config:", automl_experiment.best_config)
        print("Best accuracy on validation data: {0:.4g}".format(
            automl_val_accuracy))
        print("Training duration of best run: {0:.4g} s".format(
            automl_experiment.best_config_train_time))
        # 1. Get starting points from previous experiments.
        starting_points = automl_experiment.best_config_per_estimator
        print("starting_points", starting_points)
        print("loss of the starting_points",
              automl_experiment.best_loss_per_estimator)
        starting_point = starting_points["lgbm"]
        hps_to_freeze = [
            "colsample_bytree", "reg_alpha", "reg_lambda", "log_max_bin"
        ]

        # 2. Constrct a new class:
        # a. write the hps you want to freeze as hps with constant 'domain';
        # b. specify the new search space of the other hps accrodingly.

        class MyPartiallyFreezedLargeLGBM(LGBMEstimator):
            @classmethod
            def search_space(cls, **params):
                # (1) Get the hps in the original search space
                space = LGBMEstimator.search_space(**params)
                # (2) Set up the fixed value from hps from the starting point
                for hp_name in hps_to_freeze:
                    # if an hp is specifed to be freezed, use tine value provided in the starting_point
                    # otherwise use the setting from the original search space
                    if hp_name in starting_point:
                        space[hp_name] = {"domain": starting_point[hp_name]}
                # (3.1) Configure the search space for hps that are in the original search space
                #  but you want to change something, for example the range.
                revised_hps_to_search = {
                    "n_estimators": {
                        "domain":
                        tune.lograndint(lower=10, upper=32768),
                        "init_value":
                        starting_point.get("n_estimators")
                        or space["n_estimators"].get("init_value", 10),
                        "low_cost_init_value":
                        space["n_estimators"].get("low_cost_init_value", 10),
                    },
                    "num_leaves": {
                        "domain":
                        tune.lograndint(lower=10, upper=3276),
                        "init_value":
                        starting_point.get("num_leaves")
                        or space["num_leaves"].get("init_value", 10),
                        "low_cost_init_value":
                        space["num_leaves"].get("low_cost_init_value", 10),
                    },
                    # (3.2) Add a new hp which is not in the original search space
                    "subsample": {
                        "domain": tune.uniform(lower=0.1, upper=1.0),
                        "init_value": 0.1,
                    },
                }
                space.update(revised_hps_to_search)
                return space

        new_estimator_name = "large_lgbm"
        new_automl_experiment = AutoML()
        new_automl_experiment.add_learner(
            learner_name=new_estimator_name,
            learner_class=MyPartiallyFreezedLargeLGBM)

        automl_settings_resume = {
            "time_budget": 3,
            "metric": "accuracy",
            "task": "classification",
            "estimator_list": [new_estimator_name],
            "log_file_name": "test/iris_resume.log",
            "log_training_metric": True,
            "n_jobs": 1,
            "model_history": True,
            "log_type": "all",
            "starting_points": {
                new_estimator_name: starting_point
            },
        }

        new_automl_experiment.fit(X_train=X_train,
                                  y_train=y_train,
                                  **automl_settings_resume)

        new_automl_val_accuracy = 1.0 - new_automl_experiment.best_loss
        print("Best ML leaner:", new_automl_experiment.best_estimator)
        print("Best hyperparmeter config:", new_automl_experiment.best_config)
        print("Best accuracy on validation data: {0:.4g}".format(
            new_automl_val_accuracy))
        print("Training duration of best run: {0:.4g} s".format(
            new_automl_experiment.best_config_train_time))