示例#1
0
def memory_tune_config(config, dataset):
    fits_in_memory = False
    raw_config = merge_with_defaults(config)
    training_set_metadata = get_trainingset_metadata(raw_config, dataset)
    modified_hyperparam_search_space = copy.deepcopy(
        raw_config[HYPEROPT]["parameters"])
    params_to_modify = RANKED_MODIFIABLE_PARAM_LIST[get_model_name(raw_config)]
    param_list = list(params_to_modify.keys())
    current_param_values = {}
    max_memory = get_machine_memory()

    while param_list is not None:
        # compute memory utilization
        current_param_values = get_new_params(
            current_param_values, modified_hyperparam_search_space,
            params_to_modify)
        temp_config = sub_new_params(raw_config, current_param_values)
        if compute_memory_usage(temp_config,
                                training_set_metadata) < max_memory:
            fits_in_memory = True
            break
        # check if we have exhausted tuning of current param (e.g. we can no longer reduce the param value)
        param, min_value = param_list[0], params_to_modify[param_list[0]]

        if param in modified_hyperparam_search_space.keys():
            param_space = modified_hyperparam_search_space[param]["space"]
            if param_space == "choice":
                if (len(modified_hyperparam_search_space[param]["categories"])
                        > 2 and modified_hyperparam_search_space[param]
                    ["categories"][-2] > min_value):
                    modified_hyperparam_search_space[param][
                        "categories"] = modified_hyperparam_search_space[
                            param]["categories"][:-1]
                else:
                    param_list.pop(0)  # exhausted reduction of this parameter
            else:
                # reduce by 10%
                upper_bound, lower_bound = (
                    modified_hyperparam_search_space[param]["upper"],
                    modified_hyperparam_search_space[param]["lower"],
                )
                reduction_val = (upper_bound - lower_bound) * 0.1
                new_upper_bound = upper_bound - reduction_val
                if (new_upper_bound
                    ) > lower_bound and new_upper_bound > min_value:
                    modified_hyperparam_search_space[param][
                        "upper"] = new_upper_bound
                else:
                    param_list.pop(0)  # exhausted reduction of this parameter
        else:
            param_list.pop(0)  # param not in hyperopt search space

    modified_config = copy.deepcopy(config)

    modified_config[HYPEROPT]["parameters"] = modified_hyperparam_search_space
    return modified_config, fits_in_memory
示例#2
0
def train_with_config(
    dataset: Union[str, pd.DataFrame, dd.core.DataFrame],
    config: dict,
    output_directory: str = OUTPUT_DIR,
    **kwargs,
) -> AutoTrainResults:
    """
    Performs hyperparameter optimization with respect to the given config
    and selects the best model.

    # Inputs
    :param dataset: (str) filepath to dataset.
    :param config: (dict) optional Ludwig configuration to use for training, defaults
                   to `create_auto_config`.
    :param output_directory: (str) directory into which to write results, defaults to
        current working directory.

    # Returns
    :return: (AutoTrainResults) results containing hyperopt experiments and best model
    """
    _ray_init()
    model_name = get_model_name(config)
    hyperopt_results = _train(config,
                              dataset,
                              output_directory=output_directory,
                              model_name=model_name,
                              **kwargs)
    # catch edge case where metric_score is nan
    # TODO (ASN): Decide how we want to proceed if at least one trial has
    # completed
    for trial in hyperopt_results.ordered_trials:
        if np.isnan(trial.metric_score):
            warnings.warn(
                "There was an error running the experiment. "
                "A trial failed to start. "
                "Consider increasing the time budget for experiment. ")

    experiment_analysis = hyperopt_results.experiment_analysis
    return AutoTrainResults(experiment_analysis)