Exemplo n.º 1
0
def run_hyperopt_executor(
    sampler,
    executor,
    csv_filename,
    ray_mock_dir,
    validate_output_feature=False,
    validation_metric=None,
):
    with ray_start_4_cpus():
        config = _get_config(sampler, executor)

        csv_filename = os.path.join(ray_mock_dir, "dataset.csv")
        dataset_csv = generate_data(config["input_features"], config["output_features"], csv_filename, num_examples=100)
        dataset_parquet = create_data_set_to_use("parquet", dataset_csv)

        config = merge_with_defaults(config)

        hyperopt_config = config["hyperopt"]

        if validate_output_feature:
            hyperopt_config["output_feature"] = config["output_features"][0]["name"]
        if validation_metric:
            hyperopt_config["validation_metric"] = validation_metric

        update_hyperopt_params_with_defaults(hyperopt_config)

        parameters = hyperopt_config["parameters"]
        if sampler.get("search_alg", {}).get("type", "") == "bohb":
            # bohb does not support grid_search search space
            del parameters["combiner.num_steps"]

        split = hyperopt_config["split"]
        output_feature = hyperopt_config["output_feature"]
        metric = hyperopt_config["metric"]
        goal = hyperopt_config["goal"]

        hyperopt_sampler = get_build_hyperopt_sampler(sampler["type"])(goal, parameters, **sampler)

        # preprocess
        backend = RayBackend(**RAY_BACKEND_KWARGS)
        model = LudwigModel(config=config, backend=backend)
        training_set, validation_set, test_set, training_set_metadata = model.preprocess(
            dataset=dataset_parquet,
        )

        # hyperopt
        hyperopt_executor = MockRayTuneExecutor(hyperopt_sampler, output_feature, metric, split, **executor)
        hyperopt_executor.mock_path = os.path.join(ray_mock_dir, "bucket")

        hyperopt_executor.execute(
            config,
            training_set=training_set,
            validation_set=validation_set,
            test_set=test_set,
            training_set_metadata=training_set_metadata,
            backend=backend,
            output_directory=ray_mock_dir,
            skip_save_processed_input=True,
            skip_save_unprocessed_output=True,
        )
Exemplo n.º 2
0
def test_hyperopt_executor(sampler,
                           executor,
                           csv_filename,
                           validate_output_feature=False,
                           validation_metric=None):
    if executor['type'] == 'fiber' and sampler['type'] == 'grid':
        # This test is very slow and doesn't give us additional converage
        pytest.skip('Skipping Fiber grid search')

    input_features = [
        text_feature(name="utterance", cell_type="lstm", reduce_output="sum"),
        category_feature(vocab_size=2, reduce_input="sum")
    ]

    output_features = [category_feature(vocab_size=2, reduce_input="sum")]

    rel_path = generate_data(input_features, output_features, csv_filename)

    config = {
        "input_features": input_features,
        "output_features": output_features,
        "combiner": {
            "type": "concat",
            "num_fc_layers": 2
        },
        "training": {
            "epochs": 2,
            "learning_rate": 0.001
        }
    }

    config = merge_with_defaults(config)

    hyperopt_config = HYPEROPT_CONFIG.copy()

    if validate_output_feature:
        hyperopt_config['output_feature'] = output_features[0]['name']
    if validation_metric:
        hyperopt_config['validation_metric'] = validation_metric

    update_hyperopt_params_with_defaults(hyperopt_config)

    parameters = hyperopt_config["parameters"]
    split = hyperopt_config["split"]
    output_feature = hyperopt_config["output_feature"]
    metric = hyperopt_config["metric"]
    goal = hyperopt_config["goal"]

    hyperopt_sampler = get_build_hyperopt_sampler(sampler["type"])(goal,
                                                                   parameters,
                                                                   **sampler)

    hyperopt_executor = get_build_hyperopt_executor(executor["type"])(
        hyperopt_sampler, output_feature, metric, split, **executor)

    hyperopt_executor.execute(config,
                              dataset=rel_path,
                              gpus=get_available_gpus_cuda_string())
Exemplo n.º 3
0
def test_hyperopt_executor(sampler,
                           executor,
                           csv_filename,
                           validate_output_feature=False,
                           validation_metric=None):
    input_features = [
        text_feature(name="utterance", cell_type="lstm", reduce_output="sum"),
        category_feature(vocab_size=2, reduce_input="sum"),
    ]

    output_features = [category_feature(vocab_size=2, reduce_input="sum")]

    rel_path = generate_data(input_features, output_features, csv_filename)

    config = {
        "input_features": input_features,
        "output_features": output_features,
        "combiner": {
            "type": "concat",
            "num_fc_layers": 2
        },
        TRAINER: {
            "epochs": 2,
            "learning_rate": 0.001
        },
    }

    config = merge_with_defaults(config)

    hyperopt_config = HYPEROPT_CONFIG.copy()

    if validate_output_feature:
        hyperopt_config["output_feature"] = output_features[0]["name"]
    if validation_metric:
        hyperopt_config["validation_metric"] = validation_metric

    update_hyperopt_params_with_defaults(hyperopt_config)

    parameters = hyperopt_config["parameters"]
    split = hyperopt_config["split"]
    output_feature = hyperopt_config["output_feature"]
    metric = hyperopt_config["metric"]
    goal = hyperopt_config["goal"]

    hyperopt_sampler = get_build_hyperopt_sampler(sampler["type"])(goal,
                                                                   parameters,
                                                                   **sampler)

    hyperopt_executor = get_build_hyperopt_executor(executor["type"])(
        hyperopt_sampler, output_feature, metric, split, **executor)

    gpus = [i for i in range(torch.cuda.device_count())]
    hyperopt_executor.execute(config, dataset=rel_path, gpus=gpus)
Exemplo n.º 4
0
def run_hyperopt_executor(sampler, executor, csv_filename,
                          validate_output_feature=False,
                          validation_metric=None):
    input_features = [
        text_feature(name="utterance", cell_type="lstm", reduce_output="sum"),
        category_feature(vocab_size=2, reduce_input="sum")]

    output_features = [category_feature(vocab_size=2, reduce_input="sum")]

    rel_path = generate_data(input_features, output_features, csv_filename)

    config = {
        "input_features": input_features,
        "output_features": output_features,
        "combiner": {"type": "concat", "num_fc_layers": 2},
        "training": {"epochs": 2, "learning_rate": 0.001},
        "hyperopt": {
            **HYPEROPT_CONFIG,
            "executor": executor,
            "sampler": sampler,
        },
    }

    config = merge_with_defaults(config)

    hyperopt_config = config["hyperopt"]

    if validate_output_feature:
        hyperopt_config['output_feature'] = output_features[0]['name']
    if validation_metric:
        hyperopt_config['validation_metric'] = validation_metric

    update_hyperopt_params_with_defaults(hyperopt_config)

    parameters = hyperopt_config["parameters"]
    if sampler.get("search_alg", {}).get("type", "") == 'bohb':
        # bohb does not support grid_search search space
        del parameters['utterance.cell_type']

    split = hyperopt_config["split"]
    output_feature = hyperopt_config["output_feature"]
    metric = hyperopt_config["metric"]
    goal = hyperopt_config["goal"]

    hyperopt_sampler = get_build_hyperopt_sampler(
        sampler["type"])(goal, parameters, **sampler)

    hyperopt_executor = get_build_hyperopt_executor(executor["type"])(
        hyperopt_sampler, output_feature, metric, split, **executor)

    hyperopt_executor.execute(config, dataset=rel_path)
Exemplo n.º 5
0
def run_hyperopt_executor(
    search_alg,
    executor,
    csv_filename,
    tmpdir,
    validate_output_feature=False,
    validation_metric=None,
    use_split=True,
):
    config = _get_config(search_alg, executor)
    rel_path = generate_data(config["input_features"], config["output_features"], csv_filename)

    if not use_split:
        df = pd.read_csv(rel_path)
        df["split"] = 0
        df.to_csv(rel_path)

    config = merge_with_defaults(config)

    hyperopt_config = config["hyperopt"]

    if validate_output_feature:
        hyperopt_config["output_feature"] = config["output_features"][0]["name"]
    if validation_metric:
        hyperopt_config["validation_metric"] = validation_metric

    update_hyperopt_params_with_defaults(hyperopt_config)

    parameters = hyperopt_config["parameters"]
    if search_alg.get("type", "") == "bohb":
        # bohb does not support grid_search search space
        del parameters["utterance.cell_type"]
        hyperopt_config["parameters"] = parameters

    split = hyperopt_config["split"]
    output_feature = hyperopt_config["output_feature"]
    metric = hyperopt_config["metric"]
    goal = hyperopt_config["goal"]
    search_alg = hyperopt_config["search_alg"]

    hyperopt_executor = get_build_hyperopt_executor(executor["type"])(
        parameters, output_feature, metric, goal, split, search_alg=search_alg, **executor
    )

    hyperopt_executor.execute(
        config,
        dataset=rel_path,
        output_directory=tmpdir,
        backend="local",
    )
Exemplo n.º 6
0
def test_hyperopt_search_alg(search_alg,
                             csv_filename,
                             tmpdir,
                             ray_cluster,
                             validate_output_feature=False,
                             validation_metric=None):
    config, rel_path = _setup_ludwig_config(csv_filename)

    hyperopt_config = HYPEROPT_CONFIG.copy()

    # finalize hyperopt config settings
    if search_alg == "dragonfly":
        hyperopt_config["search_alg"] = {
            "type": search_alg,
            "domain": "euclidean",
            "optimizer": "random",
        }
    elif search_alg is None:
        hyperopt_config["search_alg"] = {}
    else:
        hyperopt_config["search_alg"] = {
            "type": search_alg,
        }

    if validate_output_feature:
        hyperopt_config["output_feature"] = config[OUTPUT_FEATURES][0][NAME]
    if validation_metric:
        hyperopt_config["validation_metric"] = validation_metric

    update_hyperopt_params_with_defaults(hyperopt_config)

    parameters = hyperopt_config["parameters"]
    split = hyperopt_config["split"]
    output_feature = hyperopt_config["output_feature"]
    metric = hyperopt_config["metric"]
    goal = hyperopt_config["goal"]
    executor = hyperopt_config["executor"]
    search_alg = hyperopt_config["search_alg"]

    hyperopt_executor = get_build_hyperopt_executor(RAY)(parameters,
                                                         output_feature,
                                                         metric,
                                                         goal,
                                                         split,
                                                         search_alg=search_alg,
                                                         **executor)
    raytune_results = hyperopt_executor.execute(config,
                                                dataset=rel_path,
                                                output_directory=tmpdir)
    assert isinstance(raytune_results, RayTuneResults)
Exemplo n.º 7
0
def run_hyperopt_executor(
    sampler, executor, csv_filename, ray_mock_dir,
    validate_output_feature=False,
    validation_metric=None,
):
    config = _get_config(sampler, executor)

    csv_filename = os.path.join(ray_mock_dir, 'dataset.csv')
    dataset_csv = generate_data(
        config['input_features'], config['output_features'], csv_filename, num_examples=100)
    dataset_parquet = create_data_set_to_use('parquet', dataset_csv)

    config = merge_with_defaults(config)

    hyperopt_config = config["hyperopt"]

    if validate_output_feature:
        hyperopt_config['output_feature'] = config['output_features'][0]['name']
    if validation_metric:
        hyperopt_config['validation_metric'] = validation_metric

    update_hyperopt_params_with_defaults(hyperopt_config)

    parameters = hyperopt_config["parameters"]
    if sampler.get("search_alg", {}).get("type", "") == 'bohb':
        # bohb does not support grid_search search space
        del parameters['combiner.num_steps']

    split = hyperopt_config["split"]
    output_feature = hyperopt_config["output_feature"]
    metric = hyperopt_config["metric"]
    goal = hyperopt_config["goal"]

    hyperopt_sampler = get_build_hyperopt_sampler(
        sampler["type"])(goal, parameters, **sampler)

    hyperopt_executor = MockRayTuneExecutor(
        hyperopt_sampler, output_feature, metric, split, **executor)
    hyperopt_executor.mock_path = os.path.join(ray_mock_dir, "bucket")

    hyperopt_executor.execute(
        config,
        dataset=dataset_parquet,
        backend=RayBackend(processor={'parallelism': 4,}),
        output_directory=ray_mock_dir,
        skip_save_processed_input=True,
        skip_save_unprocessed_output=True
    )
Exemplo n.º 8
0
def run_hyperopt_executor(
    sampler,
    executor,
    csv_filename,
    validate_output_feature=False,
    validation_metric=None,
):
    config = _get_config(sampler, executor)
    rel_path = generate_data(config["input_features"],
                             config["output_features"], csv_filename)

    config = merge_with_defaults(config)

    hyperopt_config = config["hyperopt"]

    if validate_output_feature:
        hyperopt_config["output_feature"] = config["output_features"][0][
            "name"]
    if validation_metric:
        hyperopt_config["validation_metric"] = validation_metric

    update_hyperopt_params_with_defaults(hyperopt_config)

    parameters = hyperopt_config["parameters"]
    if sampler.get("search_alg", {}).get("type", "") == "bohb":
        # bohb does not support grid_search search space
        del parameters["utterance.cell_type"]

    split = hyperopt_config["split"]
    output_feature = hyperopt_config["output_feature"]
    metric = hyperopt_config["metric"]
    goal = hyperopt_config["goal"]

    hyperopt_sampler = get_build_hyperopt_sampler(sampler["type"])(goal,
                                                                   parameters,
                                                                   **sampler)

    hyperopt_executor = get_build_hyperopt_executor(executor["type"])(
        hyperopt_sampler, output_feature, metric, split, **executor)

    hyperopt_executor.execute(
        config,
        dataset=rel_path,
        backend="local",
    )
Exemplo n.º 9
0
def test_hyperopt_scheduler(scheduler,
                            csv_filename,
                            tmpdir,
                            ray_cluster,
                            validate_output_feature=False,
                            validation_metric=None):
    config, rel_path = _setup_ludwig_config(csv_filename)

    hyperopt_config = HYPEROPT_CONFIG.copy()

    # finalize hyperopt config settings
    if scheduler == "pb2":
        # setup scheduler hyperparam_bounds parameter
        min = hyperopt_config["parameters"]["trainer.learning_rate"]["lower"]
        max = hyperopt_config["parameters"]["trainer.learning_rate"]["upper"]
        hyperparam_bounds = {
            "trainer.learning_rate": [min, max],
        }
        hyperopt_config["executor"]["scheduler"] = {
            "type": scheduler,
            "hyperparam_bounds": hyperparam_bounds,
        }
    else:
        hyperopt_config["executor"]["scheduler"] = {
            "type": scheduler,
        }

    if validate_output_feature:
        hyperopt_config["output_feature"] = config[OUTPUT_FEATURES][0][NAME]
    if validation_metric:
        hyperopt_config["validation_metric"] = validation_metric

    update_hyperopt_params_with_defaults(hyperopt_config)

    parameters = hyperopt_config["parameters"]
    split = hyperopt_config["split"]
    output_feature = hyperopt_config["output_feature"]
    metric = hyperopt_config["metric"]
    goal = hyperopt_config["goal"]
    executor = hyperopt_config["executor"]
    search_alg = hyperopt_config["search_alg"]

    # TODO: Determine if we still need this if-then-else construct
    if search_alg["type"] in {""}:
        with pytest.raises(ImportError):
            get_build_hyperopt_executor(RAY)(parameters,
                                             output_feature,
                                             metric,
                                             goal,
                                             split,
                                             search_alg=search_alg,
                                             **executor)
    else:
        hyperopt_executor = get_build_hyperopt_executor(RAY)(
            parameters,
            output_feature,
            metric,
            goal,
            split,
            search_alg=search_alg,
            **executor)
        raytune_results = hyperopt_executor.execute(config,
                                                    dataset=rel_path,
                                                    output_directory=tmpdir)
        assert isinstance(raytune_results, RayTuneResults)