Пример #1
0
        def hp_space(trial):
            from ray import tune

            return {
                "a": tune.randint(-4, 4),
                "b": tune.randint(-4, 4),
            }
Пример #2
0
def tune_fn():
    mlflow.set_experiment(experiment_name=experiment_name)

    optuna_search = OptunaSearch(metric="auroc", mode="max")

    ax_search = AxSearch(metric="auroc", mode="max")

    tune.run(objective,
             name="mlflow_gbdt",
             num_samples=65,
             config={
                 "num_leaves": tune.randint(5, 95),
                 "learning_rate": tune.loguniform(1e-4, 1.0),
                 "n_estimators": tune.randint(100, 100000),
                 "subsample": tune.loguniform(0.01, 1.0),
                 "subsample_freq": tune.randint(1, 5),
                 "objective": "binary",
                 "reg_alpha": tune.loguniform(1e-4, 1.0),
                 "reg_lambda": tune.loguniform(1e-4, 1.0),
                 "tree_learner": "feature",
                 "feature_sel": 0,
                 "mlflow": {
                     "experiment_name": experiment_name,
                     "tracking_uri": mlflow.get_tracking_uri()
                 }
             },
             search_alg=optuna_search)
Пример #3
0
 def search_space(self, all_available_features):
     space = {
         "n_estimators": tune.randint(self.n_estimators_range[0],
                                      self.n_estimators_range[1]),
         "max_depth": tune.randint(self.max_depth_range[0],
                                   self.max_depth_range[1]),
     }
     return space
Пример #4
0
 def fixed_params(self):
     total_fixed_params = {
         "n_estimators":
         tune.randint(self.n_estimators_range[0],
                      self.n_estimators_range[1]),
         "max_depth":
         tune.randint(self.max_depth_range[0], self.max_depth_range[1]),
     }
     return total_fixed_params
Пример #5
0
def tune_function(mlflow_tracking_uri, finish_fast=False):
    tune.run(easy_objective,
             name="mlflow",
             num_samples=5,
             callbacks=[
                 MLflowLoggerCallback(tracking_uri=mlflow_tracking_uri,
                                      experiment_name="example",
                                      save_artifact=True)
             ],
             config={
                 "width": tune.randint(10, 100),
                 "height": tune.randint(0, 100),
                 "steps": 5 if finish_fast else 100,
             })
Пример #6
0
def main(cpus_per_actor, num_actors, num_samples):
    # Set XGBoost config.
    config = {
        "tree_method": "approx",
        "objective": "binary:logistic",
        "eval_metric": ["logloss", "error"],
        "eta": tune.loguniform(1e-4, 1e-1),
        "subsample": tune.uniform(0.5, 1.0),
        "max_depth": tune.randint(1, 9)
    }

    analysis = tune.run(
        tune.with_parameters(train_breast_cancer,
                             cpus_per_actor=cpus_per_actor,
                             num_actors=num_actors),
        # extra_cpu is used if the trainable creates additional remote actors.
        # https://docs.ray.io/en/master/tune/api_docs/trainable.html#advanced-resource-allocation
        resources_per_trial={
            "cpu": 1,
            "extra_cpu": cpus_per_actor * num_actors
        },
        config=config,
        num_samples=num_samples,
        metric="eval-error",
        mode="min")

    # Load the best model checkpoint
    best_bst = xgb.Booster()
    best_bst.load_model(os.path.join(analysis.best_logdir, "simple.xgb"))
    accuracy = 1. - analysis.best_result["eval-error"]
    print(f"Best model parameters: {analysis.best_config}")
    print(f"Best model total accuracy: {accuracy:.4f}")
Пример #7
0
def tune_mnist(num_training_iterations):
    sched = AsyncHyperBandScheduler(time_attr="training_iteration",
                                    max_t=400,
                                    grace_period=20)

    analysis = tune.run(
        train_mnist,
        name="exp",
        scheduler=sched,
        metric="mean_accuracy",
        mode="max",
        stop={
            "mean_accuracy": 0.99,
            "training_iteration": num_training_iterations
        },
        num_samples=10,
        resources_per_trial={
            "cpu": 2,
            "gpu": 0
        },
        config={
            "threads": 2,
            "lr": tune.uniform(0.001, 0.1),
            "momentum": tune.uniform(0.1, 0.9),
            "hidden": tune.randint(32, 512),
        },
    )
    print("Best hyperparameters found were: ", analysis.best_config)
Пример #8
0
def randint(lower, upper):
    '''
    Uniformly sample integer between lower and upper. (Both inclusive)
    :param lower: Lower bound of the sampling range.
    :param upper: Upper bound of the sampling range.
    '''
    return tune.randint(lower, upper)
Пример #9
0
def run_tune(
    sync_to_driver: bool,
    upload_dir: Optional[str] = None,
    durable: bool = False,
    experiment_name: str = "cloud_test",
    indicator_file: str = "/tmp/tune_cloud_indicator",
):
    num_cpus_per_trial = int(os.environ.get("TUNE_NUM_CPUS_PER_TRIAL", "2"))

    if durable:
        trainable = tune.durable(train)
    else:
        trainable = train

    tune.run(trainable,
             name=experiment_name,
             resume="AUTO",
             num_samples=4,
             config={
                 "max_iterations": 30,
                 "sleep_time": 5,
                 "checkpoint_freq": 2,
                 "score_multiplied": tune.randint(0, 100),
             },
             sync_config=tune.SyncConfig(
                 sync_to_driver=sync_to_driver,
                 upload_dir=upload_dir,
                 sync_on_checkpoint=True,
                 cloud_sync_period=0.5,
             ),
             keep_checkpoints_num=2,
             resources_per_trial={"cpu": num_cpus_per_trial},
             callbacks=[IndicatorCallback(indicator_file=indicator_file)],
             verbose=2)
Пример #10
0
 def read_tune_ax(name, this):
     dict_ = dict(name=name)
     min_, max_ = this['minmax']
     if min_ == max_:
         dict_["type"] = "fixed"
         dict_["value"] = min_
     elif this['type'] == 'int':
         if this['step'] == 1:
             dict_["type"] = "range"
             dict_["bounds"] = [min_, max_]
             dict_["value_type"] = "int"
         else:
             dict_["type"] = "choice"
             dict_["values"] = tune.randint(min_, max_, this['step'])
             dict_["value_type"] = "int"
     elif this['type'] == 'float':
         if this['step'] == 1:
             dict_["type"] = "choice"
             dict_["values"] = tune.choice(
                 np.adarray.tolist(
                     np.arange(config_min, config_max, config_step)))
             dict_["value_type"] = "float"
         else:
             dict_["type"] = "range"
             dict_["bounds"] = [min_, max_]
             dict_["value_type"] = "float"
     return dict_
Пример #11
0
def main(cpus_per_actor, num_actors, num_samples):
    # Set XGBoost config.
    config = {
        "tree_method": "approx",
        "objective": "binary:logistic",
        "eval_metric": ["logloss", "error"],
        "eta": tune.loguniform(1e-4, 1e-1),
        "subsample": tune.uniform(0.5, 1.0),
        "max_depth": tune.randint(1, 9)
    }

    ray_params = RayParams(max_actor_restarts=1,
                           gpus_per_actor=0,
                           cpus_per_actor=cpus_per_actor,
                           num_actors=num_actors)

    analysis = tune.run(
        tune.with_parameters(train_breast_cancer, ray_params=ray_params),
        # Use the `get_tune_resources` helper function to set the resources.
        resources_per_trial=ray_params.get_tune_resources(),
        config=config,
        num_samples=num_samples,
        metric="eval-error",
        mode="min")

    # Load the best model checkpoint.
    best_bst = xgboost_ray.tune.load_model(
        os.path.join(analysis.best_logdir, "tuned.xgb"))

    best_bst.save_model("best_model.xgb")

    accuracy = 1. - analysis.best_result["eval-error"]
    print(f"Best model parameters: {analysis.best_config}")
    print(f"Best model total accuracy: {accuracy:.4f}")
Пример #12
0
def tune_decorated(mlflow_tracking_uri, finish_fast=False):
    # Set the experiment, or create a new one if does not exist yet.
    mlflow.set_tracking_uri(mlflow_tracking_uri)
    mlflow.set_experiment(experiment_name="mixin_example")
    tune.run(decorated_easy_objective,
             name="mlflow",
             num_samples=5,
             config={
                 "width": tune.randint(10, 100),
                 "height": tune.randint(0, 100),
                 "steps": 5 if finish_fast else 100,
                 "mlflow": {
                     "experiment_name": "mixin_example",
                     "tracking_uri": mlflow.get_tracking_uri()
                 }
             })
Пример #13
0
 def update_search_space(self, search_space):
     '''
     Tuners are advised to support updating search space at run-time.
     If a tuner can only set search space once before generating first hyper-parameters,
     it should explicitly document this behaviour.
     search_space: JSON object created by experiment owner
     '''
     config = {}
     for key, value in search_space:
         v = value.get("_value")
         _type = value['_type']
         if _type == 'choice':
             config[key] = choice(v)
         elif _type == 'randint':
             config[key] = randint(v[0], v[1] - 1)
         elif _type == 'uniform':
             config[key] = uniform(v[0], v[1])
         elif _type == 'quniform':
             config[key] = quniform(v[0], v[1], v[2])
         elif _type == 'loguniform':
             config[key] = loguniform(v[0], v[1])
         elif _type == 'qloguniform':
             config[key] = qloguniform(v[0], v[1], v[2])
         elif _type == 'normal':
             config[key] = randn(v[1], v[2])
         elif _type == 'qnormal':
             config[key] = qrandn(v[1], v[2], v[3])
         else:
             raise ValueError(
                 f'unsupported type in search_space {_type}')
     self._ls.set_search_properties(None, None, config)
     if self._gs is not None:
         self._gs.set_search_properties(None, None, config)
     self._init_search()
Пример #14
0
def tune_mnist():
    sched = ASHAScheduler(time_attr="training_iteration")

    metric = "mean_accuracy"

    analysis = tune.run(
        train_mnist,
        name="foo",
        scheduler=sched,
        metric=metric,
        mode="max",
        #stop={
        #    "mean_accuracy": 0.99,
        #    "training_iteration": num_training_iterations
        #},
        num_samples=50,
        resources_per_trial={
            "cpu": 1,
            "gpu": 0
        },
        config={
            "dropout": tune.uniform(0.05, 0.5),
            "lr": tune.uniform(0.001, 0.1),
            "momentum": tune.uniform(0.1, 0.9),
            "hidden": tune.randint(32, 512),
        })
    print("Best hyperparameters found were: ", analysis.best_config)
    print("Best value for", metric, ':', analysis.best_result[metric])
Пример #15
0
 def get_past_seq_config(look_back):
     """
     generate pass sequence config based on look_back
     :param look_back: look_back configuration
     :return: search configuration for past sequence
     """
     if isinstance(
         look_back,
         tuple) and len(look_back) == 2 and isinstance(
             look_back[0],
             int) and isinstance(
             look_back[1],
             int):
         if look_back[1] < 2:
             raise ValueError(
                 "The max look back value should be at least 2")
         if look_back[0] < 2:
             print(
                 "The input min look back value is smaller than 2. "
                 "We sample from range (2, {}) instead.".format(
                     look_back[1]))
         past_seq_config = tune.randint(look_back[0], look_back[1] + 1)
     elif isinstance(look_back, int):
         if look_back < 2:
             raise ValueError(
                 "look back value should not be smaller than 2. "
                 "Current value is ", look_back)
         past_seq_config = look_back
     else:
         raise ValueError(
             "look back is {}.\n "
             "look_back should be either a tuple with 2 int values:"
             " (min_len, max_len) or a single int".format(look_back))
     return past_seq_config
Пример #16
0
def test_tune(ray_start_4_cpus):
    train_dataset = ray.data.from_pandas(train_df)
    valid_dataset = ray.data.from_pandas(test_df)
    trainer = XGBoostTrainer(
        scaling_config=scale_config,
        label_column="target",
        params={
            **params,
            **{
                "max_depth": 1
            }
        },
        datasets={
            TRAIN_DATASET_KEY: train_dataset,
            "valid": valid_dataset
        },
    )

    tune.run(
        trainer.as_trainable(),
        config={"params": {
            "max_depth": tune.randint(2, 4)
        }},
        num_samples=2,
    )

    # Make sure original Trainer is not affected.
    assert trainer.params["max_depth"] == 1
Пример #17
0
def test_tune(ray_start_4_cpus):
    train_dataset = ray.data.from_pandas(train_df)
    valid_dataset = ray.data.from_pandas(test_df)
    trainer = SklearnTrainer(
        estimator=RandomForestClassifier(),
        scaling_config=scale_config,
        label_column="target",
        params={"max_depth": 4},
        cv=5,
        datasets={
            TRAIN_DATASET_KEY: train_dataset,
            "valid": valid_dataset
        },
    )

    tune.run(
        trainer.as_trainable(),
        config={"params": {
            "max_depth": tune.randint(2, 4)
        }},
        num_samples=2,
        metric="cv/test_score_mean",
        mode="max",
    )

    # Make sure original Trainer is not affected.
    assert trainer.params["max_depth"] == 4
Пример #18
0
def tune_xgboost(train_df, test_df, target_column):
    # Set XGBoost config.
    config = {
        "tree_method": "approx",
        "objective": "binary:logistic",
        "eval_metric": ["logloss", "error"],
        "eta": tune.loguniform(1e-4, 1e-1),
        "subsample": tune.uniform(0.5, 1.0),
        "max_depth": tune.randint(1, 9)
    }

    ray_params = RayParams(max_actor_restarts=1,
                           gpus_per_actor=0,
                           cpus_per_actor=4,
                           num_actors=4)

    analysis = tune.run(
        tune.with_parameters(train_xgboost,
                             train_df=train_df,
                             test_df=test_df,
                             target_column=target_column,
                             ray_params=ray_params),
        # Use the `get_tune_resources` helper function to set the resources.
        resources_per_trial=ray_params.get_tune_resources(),
        config=config,
        num_samples=1,
        metric="eval-error",
        mode="min",
        verbose=1)

    accuracy = 1. - analysis.best_result["eval-error"]
    print(f"Best model parameters: {analysis.best_config}")
    print(f"Best model total accuracy: {accuracy:.4f}")

    return analysis.best_config
Пример #19
0
def tune_xgboost(use_cv: bool = False):
    search_space = {
        # You can mix constants with search space objects.
        "objective": "binary:logistic",
        "eval_metric": ["logloss", "error"],
        "max_depth": tune.randint(1, 9),
        "min_child_weight": tune.choice([1, 2, 3]),
        "subsample": tune.uniform(0.5, 1.0),
        "eta": tune.loguniform(1e-4, 1e-1),
    }
    # This will enable aggressive early stopping of bad trials.
    scheduler = ASHAScheduler(
        max_t=10,
        grace_period=1,
        reduction_factor=2  # 10 training iterations
    )

    analysis = tune.run(
        train_breast_cancer if not use_cv else train_breast_cancer_cv,
        metric="test-logloss",
        mode="min",
        # You can add "gpu": 0.1 to allocate GPUs
        resources_per_trial={"cpu": 1},
        config=search_space,
        num_samples=10,
        scheduler=scheduler,
    )

    return analysis
Пример #20
0
def tunerTrain():
    ray.init(_memory=4000000000, num_cpus=5)
    searchSpace = {
        'lr': tune.loguniform(1e-4, 9e-1),
        'finalOutput': tune.randint(2, 50),  # minimum of 2, other 1//2 = 0 activation maps
        'stride1': tune.grid_search(np.arange(1, 4).tolist()),
        'stride2': tune.grid_search(np.arange(1, 4).tolist()),
        'batchSize': tune.grid_search([2, 4, 8, 16, 32, 64, 128, 256]),
        'finalChannel': tune.randint(1, 50),
    }

    analysis = tune.run(train, num_samples=1, scheduler=ASHAScheduler(metric='score', mode='max'),
                        config=searchSpace)
    print(f"Best Config: {analysis.get_all_configs(metric='score', mode='max')}")
    df = analysis.results_df
    logdir = analysis.get_best_logdir("mean_accuracy", mode="max")
    print(f"dir of best: {logdir}")
Пример #21
0
def create_config(backtest_config: dict) -> dict:
    config = {}
    config['starting_balance'] = backtest_config['starting_balance']
    config['min_size'] = backtest_config['min_size']
    config['min_step'] = backtest_config['min_step']
    config['taker_fee'] = backtest_config['taker_fee']

    config['qty_pct'] = tune.uniform(backtest_config['ranges']['qty_pct'][0],
                                     backtest_config['ranges']['qty_pct'][1])

    config['leverage'] = tune.randint(backtest_config['ranges']['leverage'][0],
                                      backtest_config['ranges']['leverage'][1])
    config['ema_fast'] = tune.randint(backtest_config['ranges']['ema_fast'][0],
                                      backtest_config['ranges']['ema_fast'][1])
    config['ema_slow'] = tune.randint(backtest_config['ranges']['ema_slow'][0],
                                      backtest_config['ranges']['ema_slow'][1])
    return config
Пример #22
0
    def update_search_space(self, search_space):
        """Required by NNI.

        Tuners are advised to support updating search space at run-time.
        If a tuner can only set search space once before generating first hyper-parameters,
        it should explicitly document this behaviour.

        Args:
            search_space: JSON object created by experiment owner.
        """
        config = {}
        for key, value in search_space.items():
            v = value.get("_value")
            _type = value["_type"]
            if _type == "choice":
                config[key] = choice(v)
            elif _type == "randint":
                config[key] = randint(*v)
            elif _type == "uniform":
                config[key] = uniform(*v)
            elif _type == "quniform":
                config[key] = quniform(*v)
            elif _type == "loguniform":
                config[key] = loguniform(*v)
            elif _type == "qloguniform":
                config[key] = qloguniform(*v)
            elif _type == "normal":
                config[key] = randn(*v)
            elif _type == "qnormal":
                config[key] = qrandn(*v)
            else:
                raise ValueError(f"unsupported type in search_space {_type}")
        # low_cost_partial_config is passed to constructor,
        # which is before update_search_space() is called
        init_config = self._ls.init_config
        add_cost_to_space(config, init_config, self._cat_hp_cost)
        self._ls = self.LocalSearch(
            init_config,
            self._ls.metric,
            self._mode,
            config,
            self._ls.resource_attr,
            self._ls.min_resource,
            self._ls.max_resource,
            self._ls.resource_multiple_factor,
            cost_attr=self.cost_attr,
            seed=self._ls.seed,
        )
        if self._gs is not None:
            self._gs = GlobalSearch(
                space=config,
                metric=self._metric,
                mode=self._mode,
                sampler=self._gs._sampler,
            )
            self._gs.space = config
        self._init_search()
Пример #23
0
def qrandint(lower, upper, q=1):
    '''
    Uniformly sample integer between lower and upper. (Both inclusive)
    Round the result to nearest value with granularity q.
    :param lower: Lower bound of the sampling range.
    :param upper: Upper bound of the sampling range.
    :param q: Integer Granularity for increment.
    '''
    return tune.randint(lower, upper, q)
Пример #24
0
def run_tune(
    no_syncer: bool,
    upload_dir: Optional[str] = None,
    experiment_name: str = "cloud_test",
    indicator_file: str = "/tmp/tune_cloud_indicator",
    trainable: str = "function",
    num_cpus_per_trial: int = 2,
):
    if trainable == "function":
        train = fn_trainable
        config = {
            "max_iterations": 100,
            "sleep_time": 5,
            "checkpoint_freq": 2,
            "score_multiplied": tune.randint(0, 100),
        }
        kwargs = {"resources_per_trial": {"cpu": num_cpus_per_trial}}
    elif trainable == "rllib_str" or trainable == "rllib_trainer":
        if trainable == "rllib_str":
            train = "PPO"
        else:
            train = PPO

        config = {
            "env": "CartPole-v1",
            "num_workers": 1,
            "num_envs_per_worker": 1,
            "callbacks": RLlibCallback,
        }
        kwargs = {
            "stop": {
                "training_iteration": 100
            },
            "checkpoint_freq": 2,
            "checkpoint_at_end": True,
        }
    else:
        raise RuntimeError(f"Unknown trainable: {trainable}")

    tune.run(
        train,
        name=experiment_name,
        resume="AUTO",
        num_samples=4,
        config=config,
        sync_config=tune.SyncConfig(
            syncer="auto" if not no_syncer else None,
            upload_dir=upload_dir,
            sync_on_checkpoint=True,
            sync_period=0.5,
        ),
        keep_checkpoints_num=2,
        callbacks=[IndicatorCallback(indicator_file=indicator_file)],
        verbose=2,
        **kwargs,
    )
Пример #25
0
    def testRelativeLogdir(self):
        """Moving the experiment folder into another location.

        This should still work and is an important use case,
        because training in the cloud usually requests such
        relocations.
        """
        local_dir_path = Path("/tmp/test_rel")
        if local_dir_path.exists():
            local_dir = tempfile.mkdtemp(prefix=str(local_dir_path) + "_")
        else:
            local_dir = str(local_dir_path)

        tune.run("rel_logdir",
                 config={"a": tune.randint(0, 10)},
                 local_dir=local_dir)

        # Copy the folder
        local_dir_moved = local_dir + "_moved"
        shutil.copytree(local_dir, local_dir_moved)

        # Load the moved trials
        analysis = tune.ExperimentAnalysis(local_dir)
        analysis_moved = tune.ExperimentAnalysis(local_dir_moved)

        configs = analysis.get_all_configs()
        configs_moved = analysis_moved.get_all_configs()
        config = configs[next(iter(configs))]
        config_moved = configs_moved[next(iter(configs_moved))]

        # Check, if the trial attributes can be loaded.
        self.assertEqual(len(configs), 1)
        self.assertEqual(len(configs_moved), 1)

        # Check, if the two configs are equal.
        self.assertDictEqual(config, config_moved)

        metric = "metric1"
        mode = "max"
        analysis_df = analysis.dataframe(metric, mode)
        analysis_moved_df = analysis_moved.dataframe(metric, mode)

        self.assertEqual(analysis_df.shape[0], 1)
        self.assertEqual(analysis_moved_df.shape[0], 1)

        # Drop the `logdir` column as this should be different
        # between the two trials.
        analysis_df.drop(columns="logdir", inplace=True)
        analysis_moved_df.drop(columns="logdir", inplace=True)
        self.assertEqual(analysis_df, analysis_moved_df)

        # Remove the files and directories.
        if shutil.rmtree.avoids_symlink_attacks:
            if local_dir_path.exists():
                shutil.rmtree(local_dir)
            shutil.rmtree(local_dir_moved)
Пример #26
0
def create_config(backtest_config: dict) -> dict:
    config = {k: backtest_config[k] for k in backtest_config
              if k not in {'session_name', 'user', 'symbol', 'start_date', 'end_date', 'ranges'}}
    for k in backtest_config['ranges']:
        if backtest_config['ranges'][k][0] == backtest_config['ranges'][k][1]:
            config[k] = backtest_config['ranges'][k][0]
        elif k in ['n_close_orders', 'leverage']:
            config[k] = tune.randint(backtest_config['ranges'][k][0], backtest_config['ranges'][k][1] + 1)
        else:
            config[k] = tune.uniform(backtest_config['ranges'][k][0], backtest_config['ranges'][k][1])
    return config
Пример #27
0
    def testTuneSampleAPI(self):
        config = {
            "func": tune.sample_from(lambda spec: spec.config.uniform * 0.01),
            "uniform": tune.uniform(-5, -1),
            "quniform": tune.quniform(3.2, 5.4, 0.2),
            "loguniform": tune.loguniform(1e-4, 1e-2),
            "qloguniform": tune.qloguniform(1e-4, 1e-1, 5e-5),
            "choice": tune.choice([2, 3, 4]),
            "randint": tune.randint(-9, 15),
            "qrandint": tune.qrandint(-21, 12, 3),
            "randn": tune.randn(10, 2),
            "qrandn": tune.qrandn(10, 2, 0.2),
        }
        for _, (_, generated) in zip(
                range(1000), generate_variants({
                    "config": config
                })):
            out = generated["config"]

            self.assertAlmostEqual(out["func"], out["uniform"] * 0.01)

            self.assertGreaterEqual(out["uniform"], -5)
            self.assertLess(out["uniform"], -1)

            self.assertGreaterEqual(out["quniform"], 3.2)
            self.assertLessEqual(out["quniform"], 5.4)
            self.assertAlmostEqual(out["quniform"] / 0.2,
                                   round(out["quniform"] / 0.2))

            self.assertGreaterEqual(out["loguniform"], 1e-4)
            self.assertLess(out["loguniform"], 1e-2)

            self.assertGreaterEqual(out["qloguniform"], 1e-4)
            self.assertLessEqual(out["qloguniform"], 1e-1)
            self.assertAlmostEqual(out["qloguniform"] / 5e-5,
                                   round(out["qloguniform"] / 5e-5))

            self.assertIn(out["choice"], [2, 3, 4])

            self.assertGreaterEqual(out["randint"], -9)
            self.assertLess(out["randint"], 15)

            self.assertGreaterEqual(out["qrandint"], -21)
            self.assertLessEqual(out["qrandint"], 12)
            self.assertEqual(out["qrandint"] % 3, 0)

            # Very improbable
            self.assertGreater(out["randn"], 0)
            self.assertLess(out["randn"], 20)

            self.assertGreater(out["qrandn"], 0)
            self.assertLess(out["qrandn"], 20)
            self.assertAlmostEqual(out["qrandn"] / 0.2,
                                   round(out["qrandn"] / 0.2))
Пример #28
0
def main(loss_function="L1", num_samples=25, max_num_epochs=25, gpus_per_trial=1, cpus_per_trial=10):

    experiment_name = loss_function + "_shuffle_validation"
    save_dir = '/data/results/vcpujol/transformers/single_deployment/predict_maxmax/pytorch_transformer/'

    config = {
        "lr": tune.loguniform(1e-4, 5e-1),
        "lr_step": tune.randint(1,10),
        "gamma": tune.loguniform(0.85,0.9999),
        "epochs": tune.choice([5, 10, 15, 20, 25]),
        "n_heads": tune.randint(2,10),
        "dim_val": tune.choice([2,4,6]), # FIXME requires numero parell...
        "dim_att": tune.randint(2,12),
        "encoder_layers": tune.randint(1,7),
        "decoder_layers": tune.randint(1,7),
        "batch_size": tune.randint(1,10),
        "input_feat_enc": tune.choice([94]),
        "input_feat_dec": tune.choice([1]),
        "seq_len": tune.choice([16, 32, 64, 96, 128, 180, 220, 256, 312, 350, 420, 470, 512]),  #[16, 32, 64, 128, 256, 512, 1024, 2048]
        "prediction_step": tune.choice([1])
    }
    scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t=max_num_epochs,
        grace_period=4,
        reduction_factor=2)
    reporter = CLIReporter(
        parameter_columns=["lr", "lr_step", "gamma", "epochs", "n_heads", "dim_val", "dim_att", "encoder_layers",
                           "decoder_layers", "batch_size", "seq_len"],
        metric_columns=["loss", "training_iteration"])
    result = tune.run(
        partial(transformer_train, save_dir=save_dir, loss_function=loss_function),
        resources_per_trial={"cpu": cpus_per_trial, "gpu": gpus_per_trial},
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter,
        local_dir=save_dir,
        name=experiment_name)

    best_trial = result.get_best_trial("loss", "min", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(best_trial.last_result["loss"]))
    # print("Best trial final validation accuracy: {}".format(best_trial.last_result["accuracy"]))

    best_trained_model = Transformer(best_trial.config["dim_val"], best_trial.config["dim_att"],
                                     best_trial.config["input_feat_enc"], best_trial.config["input_feat_dec"],
                                     best_trial.config["seq_len"], best_trial.config["decoder_layers"],
                                     best_trial.config["encoder_layers"], best_trial.config["n_heads"])

    best_checkpoint_dir = best_trial.checkpoint.value
    model_state, optimizer_state = torch.load(os.path.join(
        best_checkpoint_dir, "checkpoint"))
    best_trained_model.load_state_dict(model_state)

    local_dir = save_dir
    exp_name = experiment_name
    test_acc = test_transformer(best_trained_model, best_trial.config, local_dir, exp_name, loss_function)
    print("Best trial test set accuracy: {}".format(test_acc))
Пример #29
0
def gfp(local_dir, cpus, gpus, num_parallel, num_samples):
    """Evaluate Conservative Objective Models on GFP-v0
    """

    from design_baselines.coms_original import coms_original
    ray.init(num_cpus=cpus,
             num_gpus=gpus,
             include_dashboard=False,
             _temp_dir=os.path.expanduser('~/tmp'))
    tune.run(coms_original,
             config={
                 "logging_dir": "data",
                 "task": "GFP-v0",
                 "task_kwargs": {
                     'seed': tune.randint(1000)
                 },
                 "is_discrete": True,
                 "constraint_type": "mix",
                 "normalize_ys": True,
                 "normalize_xs": True,
                 "continuous_noise_std": 0.2,
                 "discrete_clip": 0.6,
                 "val_size": 500,
                 "batch_size": 128,
                 "epochs": 500,
                 "activations": ['leaky_relu', 'leaky_relu'],
                 "hidden_size": 2048,
                 "initial_max_std": 0.2,
                 "initial_min_std": 0.1,
                 "forward_model_lr": 0.001,
                 "initial_alpha": 1.0,
                 "alpha_lr": 0.05,
                 "target_conservatism": 0.05,
                 "negatives_fraction": 1.0,
                 "lookahead_steps": 1,
                 "lookahead_backprop": True,
                 "evaluate_steps": [450],
                 "solver_lr": 0.01,
                 "solver_interval": 1,
                 "solver_warmup": 50,
                 "solver_steps": 1,
                 "solver_beta":
                 tune.grid_search([0.0, 0.1, 0.3, 0.7, 0.9, 1.0])
             },
             num_samples=num_samples,
             local_dir=local_dir,
             resources_per_trial={
                 'cpu': cpus // num_parallel,
                 'gpu': gpus / num_parallel - 0.01
             })
Пример #30
0
def get_model_search_space(model_type: ModelEnum) -> Mapping[str, Any]:
    if model_type == ModelEnum.RF:
        return dict(
            max_depth=tune.randint(1, 10),
            n_estimators=tune_q_log_uniform(high=100, q=1),
        )
    elif model_type == ModelEnum.DT:
        return dict(max_leaf_nodes=tune.sample_from(_sample_max_leaf_nodes), )
    elif model_type == ModelEnum.ADB:
        return dict(
            n_estimators=tune_q_log_uniform(low=1, high=500, q=1),
            learning_rate=tune.loguniform(1.0e-04, 1.0e+01),
        )
    elif model_type == ModelEnum.GB:
        return dict(
            max_leaf_nodes=tune_q_log_uniform(low=4, high=15, q=1),
            n_estimators=tune_q_log_uniform(high=500, q=1),
            learning_rate=tune.loguniform(1.0e-04, 1.0e+01),
        )
    elif model_type == ModelEnum.NN:
        return dict(
            units=tune.randint(10, 28),
            layers=tune.randint(2, 9),
            dropout=tune_bool(),
            dropout_rate=tune.uniform(0.1, 0.5),
            batch_size=tune.choice([2**i for i in range(6, 10)]),
            loss=tune.choice(['mean_squared_error', 'mean_absolute_error']),
            batch_normalization=tune_bool(),
            is_normalized=tune_bool(),
            patience=4,
        )
    elif model_type == ModelEnum.KN:
        return dict(n_neighbors=1, )
    elif model_type == ModelEnum.COP:
        return dict()
    else:
        raise ValueError(model_type)