Exemplo n.º 1
0
def test_get_base_storage(storage_specifier):
    with Client():
        with get_storage_url(storage_specifier) as url:
            dask_storage = dask_optuna.DaskStorage(url)
            storage = dask_storage.get_base_storage()
            expected_type = type(optuna.storages.get_storage(url))
            assert isinstance(storage, expected_type)
Exemplo n.º 2
0
def test_optuna_joblib_backend(storage_specifier, processes):
    with Client(processes=processes):
        with get_storage_url(storage_specifier) as url:
            storage = dask_optuna.DaskStorage(url)
            study = optuna.create_study(storage=storage)
            with joblib.parallel_backend("dask"):
                study.optimize(objective, n_trials=10, n_jobs=-1)
            assert len(study.trials) == 10
Exemplo n.º 3
0
async def test_in_memory(c, s, a, b):
    storage = None
    dask_storage = dask_optuna.DaskStorage(storage=storage)
    futures = [c.submit(_optimize, storage=storage, pure=False) for _ in range(5)]
    await wait(futures)
    await futures[0]

    results = await dask_storage.get_all_study_summaries()
    assert len(results) == 1
    assert results[0].n_trials == 10
Exemplo n.º 4
0
async def test_sqlite(c, s, a, b):
    with tempfile.TemporaryDirectory() as tmpdirname:
        storage = "sqlite:///" + os.path.join(tmpdirname, "example.db")

        dask_storage = dask_optuna.DaskStorage(storage=storage)
        futures = [c.submit(_optimize, storage=storage, pure=False) for _ in range(5)]
        await wait(futures)
        await futures[0]

        results = await dask_storage.get_all_study_summaries()
        assert len(results) == 1
        assert results[0].n_trials == 10
Exemplo n.º 5
0
def test_study_direction_best_value(processes, direction):
    # Regression test for https://github.com/jrbourbeau/dask-optuna/issues/15
    pytest.importorskip("pandas")
    with Client(processes=processes):
        dask_storage = dask_optuna.DaskStorage()
        study = optuna.create_study(storage=dask_storage, direction=direction)
        with joblib.parallel_backend("dask"):
            study.optimize(objective, n_trials=10, n_jobs=-1)

        # Ensure that study.best_value matches up with the expected value from
        # the trials DataFrame
        trials_value = study.trials_dataframe()["value"]
        if direction == "maximize":
            expected = trials_value.max()
        else:
            expected = trials_value.min()

        np.testing.assert_allclose(expected, study.best_value)
Exemplo n.º 6
0
        param["rate_drop"] = trial.suggest_float("rate_drop",
                                                 1e-8,
                                                 1.0,
                                                 log=True)
        param["skip_drop"] = trial.suggest_float("skip_drop",
                                                 1e-8,
                                                 1.0,
                                                 log=True)

    bst = xgb.train(param, dtrain)
    preds = bst.predict(dtest)
    pred_labels = np.rint(preds)
    accuracy = sklearn.metrics.accuracy_score(y_test, pred_labels)
    return accuracy


if __name__ == "__main__":
    with coiled.Cluster(n_workers=5,
                        configuration="jrbourbeau/optuna") as cluster:
        with Client(cluster) as client:
            print(f"Dask dashboard is available at {client.dashboard_link}")
            client.wait_for_workers(5)

            storage = dask_optuna.DaskStorage("sqlite:///coiled-example.db")
            study = optuna.create_study(storage=storage, direction="maximize")
            with joblib.parallel_backend("dask"):
                study.optimize(objective, n_trials=100, n_jobs=-1)

            print("Best params:")
            pprint(study.best_params)
Exemplo n.º 7
0
"""
Example to demonstrate using Dask-Optuna with Optuna's Joblib internals
to run optimization trials on a Dask cluster in parallel.
"""

import optuna
import joblib
from dask.distributed import Client
import dask_optuna

optuna.logging.set_verbosity(optuna.logging.WARN)


def objective(trial):
    x = trial.suggest_uniform("x", -10, 10)
    return (x - 2)**2


if __name__ == "__main__":

    with Client() as client:
        print(f"Dask dashboard is available at {client.dashboard_link}")
        dask_storage = dask_optuna.DaskStorage()
        study = optuna.create_study(storage=dask_storage)
        with joblib.parallel_backend("dask"):
            study.optimize(objective, n_trials=500, n_jobs=-1)

        print(f"best_params = {study.best_params}")
Exemplo n.º 8
0
    c = Client(cluster)

    # Query the client for all connected workers
    workers = c.has_what().keys()
    n_workers = len(workers)
    df = cudf.read_csv(os.path.join(data_dir, "train.csv"))
    N_TRIALS = 5

    # Drop non-numerical data and fill NaNs before passing to cuML RF
    CAT_COLS = list(df.select_dtypes('object').columns)
    df = df.drop(CAT_COLS, axis=1)
    df = df.fillna(0)

    df = df.astype("float32")
    X, y = df.drop(["target"], axis=1), df["target"].astype('int32')

    study_name = "dask_optuna_lr_log_loss_tpe"
    storage_name = "sqlite:///study_stores.db"

    storage = dask_optuna.DaskStorage(storage_name)
    study = optuna.create_study(sampler=optuna.samplers.TPESampler(),
                                study_name=study_name,
                                direction="minimize",
                                storage=storage)
    # Optimize in parallel on your Dask cluster
    with parallel_backend("dask"):
        study.optimize(lambda trial: objective(trial, X, y),
                       n_trials=N_TRIALS,
                       n_jobs=n_workers)
    print('Best params{} and best score{}'.format(study.best_params,
                                                  study.best_value))
Exemplo n.º 9
0
def _optimize(storage):
    dask_storage = dask_optuna.DaskStorage(storage=storage)
    study = optuna.create_study(
        study_name="foo", storage=dask_storage, load_if_exists=True
    )
    study.optimize(objective, n_trials=2)