def create_auto_config( dataset: Union[str, pd.DataFrame, dd.core.DataFrame, DatasetInfo], target: Union[str, List[str]], time_limit_s: Union[int, float], tune_for_memory: bool, user_config: Dict = None, ) -> dict: """Returns an auto-generated Ludwig config with the intent of training the best model on given given dataset / target in the given time limit. # Inputs :param dataset: (str, pd.DataFrame, dd.core.DataFrame, DatasetInfo) data source to train over. :param target: (str, List[str]) name of target feature :param time_limit_s: (int, float) total time allocated to auto_train. acts as the stopping parameter :param tune_for_memroy: (bool) refine hyperopt search space for available host / GPU memory # Return :return: (dict) selected model configuration """ default_configs = _create_default_config(dataset, target, time_limit_s) model_config = _model_select(dataset, default_configs, user_config) if tune_for_memory: if ray.is_initialized(): model_config, _ = ray.get( ray.remote(num_cpus=1)(memory_tune_config).remote( model_config, dataset)) else: model_config, _ = memory_tune_config(model_config, dataset) return model_config
def create_auto_config( dataset: Union[str, pd.DataFrame, dd.core.DataFrame, DatasetInfo], target: str, time_limit_s: Union[int, float], tune_for_memory: bool, ) -> dict: """ Returns an auto-generated Ludwig config with the intent of training the best model on given given dataset / target in the given time limit. # Inputs :param dataset: (str) filepath to dataset. :param target: (str) name of target feature :param time_limit_s: (int, float) total time allocated to auto_train. acts as the stopping parameter # Return :return: (dict) selected model configuration """ default_configs = _create_default_config(dataset, target, time_limit_s) model_config = _model_select(default_configs) if tune_for_memory: if ray.is_initialized(): model_config, _ = ray.get( ray.remote(num_cpus=1)(memory_tune_config).remote( model_config, dataset)) else: model_config, _ = memory_tune_config(model_config, dataset) return model_config
def create_auto_config( dataset: Union[str, pd.DataFrame, dd.core.DataFrame, DatasetInfo], target: Union[str, List[str]], time_limit_s: Union[int, float], tune_for_memory: bool, user_config: Dict = None, random_seed: int = default_random_seed, use_reference_config: bool = False, ) -> dict: """Returns an auto-generated Ludwig config with the intent of training the best model on given given dataset / target in the given time limit. # Inputs :param dataset: (str, pd.DataFrame, dd.core.DataFrame, DatasetInfo) data source to train over. :param target: (str, List[str]) name of target feature :param time_limit_s: (int, float) total time allocated to auto_train. acts as the stopping parameter :param tune_for_memory: (bool) refine hyperopt search space for available host / GPU memory :param user_config: (dict) override automatic selection of specified config items :param random_seed: (int, default: `42`) a random seed that will be used anywhere there is a call to a random number generator, including hyperparameter search sampling, as well as data splitting, parameter initialization and training set shuffling :param use_reference_config: (bool) refine hyperopt search space by setting first search point from reference model config, if any # Return :return: (dict) selected model configuration """ default_configs = _create_default_config(dataset, target, time_limit_s, random_seed) model_config, model_category, row_count = _model_select( dataset, default_configs, user_config, use_reference_config) if tune_for_memory: if ray.is_initialized(): resources = get_available_resources() # check if cluster has GPUS if resources["gpu"] > 0: model_config, fits_in_memory = ray.get( ray.remote(num_gpus=1, num_cpus=1, max_calls=1)(memory_tune_config).remote( model_config, dataset, model_category, row_count)) else: model_config, fits_in_memory = ray.get( ray.remote(num_cpus=1)(memory_tune_config).remote( model_config, dataset, model_category, row_count)) else: model_config, fits_in_memory = memory_tune_config( model_config, dataset, model_category, row_count) if not fits_in_memory: warnings.warn( "AutoML with tune_for_memory enabled did not return estimation that model will fit in memory. " "If out-of-memory occurs, consider setting AutoML user_config to reduce model memory footprint. " ) return model_config