def get_machine_memory(): if ray.is_initialized(): # using ray cluster @ray.remote(num_gpus=1) def get_remote_gpu(): gpus = GPUtil.getGPUs() total_mem_mb = gpus[0].memory_total return total_mem_mb * BYTES_PER_MiB @ray.remote(num_cpus=1) def get_remote_cpu(): total_mem = psutil.virtual_memory().total return total_mem resources = get_available_resources() # check if cluster has GPUS if resources["gpu"] > 0: machine_mem = ray.get(get_remote_gpu.remote()) else: machine_mem = ray.get(get_remote_cpu.remote()) else: # not using ray cluster if GPUtil.getGPUs(): machine_mem = GPUtil.getGPUs()[0].memory_total * BYTES_PER_MiB else: machine_mem = psutil.virtual_memory().total return machine_mem
def create_auto_config( dataset: Union[str, pd.DataFrame, dd.core.DataFrame, DatasetInfo], target: Union[str, List[str]], time_limit_s: Union[int, float], tune_for_memory: bool, user_config: Dict = None, random_seed: int = default_random_seed, use_reference_config: bool = False, ) -> dict: """Returns an auto-generated Ludwig config with the intent of training the best model on given given dataset / target in the given time limit. # Inputs :param dataset: (str, pd.DataFrame, dd.core.DataFrame, DatasetInfo) data source to train over. :param target: (str, List[str]) name of target feature :param time_limit_s: (int, float) total time allocated to auto_train. acts as the stopping parameter :param tune_for_memory: (bool) refine hyperopt search space for available host / GPU memory :param user_config: (dict) override automatic selection of specified config items :param random_seed: (int, default: `42`) a random seed that will be used anywhere there is a call to a random number generator, including hyperparameter search sampling, as well as data splitting, parameter initialization and training set shuffling :param use_reference_config: (bool) refine hyperopt search space by setting first search point from reference model config, if any # Return :return: (dict) selected model configuration """ default_configs = _create_default_config(dataset, target, time_limit_s, random_seed) model_config, model_category, row_count = _model_select( dataset, default_configs, user_config, use_reference_config) if tune_for_memory: if ray.is_initialized(): resources = get_available_resources() # check if cluster has GPUS if resources["gpu"] > 0: model_config, fits_in_memory = ray.get( ray.remote(num_gpus=1, num_cpus=1, max_calls=1)(memory_tune_config).remote( model_config, dataset, model_category, row_count)) else: model_config, fits_in_memory = ray.get( ray.remote(num_cpus=1)(memory_tune_config).remote( model_config, dataset, model_category, row_count)) else: model_config, fits_in_memory = memory_tune_config( model_config, dataset, model_category, row_count) if not fits_in_memory: warnings.warn( "AutoML with tune_for_memory enabled did not return estimation that model will fit in memory. " "If out-of-memory occurs, consider setting AutoML user_config to reduce model memory footprint. " ) return model_config
def _create_default_config(dataset: Union[str, dd.core.DataFrame, pd.DataFrame, DatasetInfo], target_name: str = None, time_limit_s: Union[int, float] = None) -> dict: """ Returns auto_train configs for three available combiner models. Coordinates the following tasks: - extracts fields and generates list of FieldInfo objects - gets field metadata (i.e avg. words, total non-null entries) - builds input_features and output_feautures section of config - for each combiner, adds default training, hyperopt - infers resource constraints and adds gpu and cpu resource allocation per trial # Inputs :param dataset: (str) filepath to dataset. :param target_name: (str) name of target feature :param time_limit_s: (int, float) total time allocated to auto_train. acts as the stopping parameter # Return :return: (dict) dictionaries contain auto train config files for all available combiner types """ _ray_init() resources = get_available_resources() experiment_resources = allocate_experiment_resources(resources) dataset_info = dataset if not isinstance(dataset, DatasetInfo): dataset_info = get_dataset_info(dataset) input_and_output_feature_config = get_features_config( dataset_info.fields, dataset_info.row_count, resources, target_name) model_configs = {} for model_name, path_to_defaults in model_defaults.items(): default_model_config = load_yaml(path_to_defaults) default_model_config.update(input_and_output_feature_config) default_model_config['hyperopt']['executor'].update( experiment_resources) default_model_config['hyperopt']['executor'][ 'time_budget_s'] = time_limit_s model_configs[model_name] = default_model_config return model_configs
def _create_default_config( dataset: Union[str, dd.core.DataFrame, pd.DataFrame, DatasetInfo], target_name: Union[str, List[str]] = None, time_limit_s: Union[int, float] = None, ) -> dict: """Returns auto_train configs for three available combiner models. Coordinates the following tasks: - extracts fields and generates list of FieldInfo objects - gets field metadata (i.e avg. words, total non-null entries) - builds input_features and output_features section of config - for each combiner, adds default training, hyperopt - infers resource constraints and adds gpu and cpu resource allocation per trial # Inputs :param dataset: (str) filepath to dataset. :param target_name: (str, List[str]) name of target feature :param time_limit_s: (int, float) total time allocated to auto_train. acts as the stopping parameter # Return :return: (dict) dictionaries contain auto train config files for all available combiner types """ _ray_init() resources = get_available_resources() experiment_resources = allocate_experiment_resources(resources) dataset_info = dataset if not isinstance(dataset, DatasetInfo): dataset_info = get_dataset_info(dataset) input_and_output_feature_config = get_features_config( dataset_info.fields, dataset_info.row_count, resources, target_name) model_configs = {} # read in base config and update with experiment resources base_automl_config = load_yaml(BASE_AUTOML_CONFIG) base_automl_config["hyperopt"]["executor"].update(experiment_resources) base_automl_config["hyperopt"]["executor"]["time_budget_s"] = time_limit_s if time_limit_s is not None: base_automl_config["hyperopt"]["sampler"]["scheduler"][ "max_t"] = time_limit_s base_automl_config.update(input_and_output_feature_config) model_configs["base_config"] = base_automl_config # read in all encoder configs for feat_type, default_configs in encoder_defaults.items(): if feat_type not in model_configs.keys(): model_configs[feat_type] = {} else: for encoder_name, encoder_config_path in default_configs.items(): model_configs[feat_type][encoder_name] = load_yaml( encoder_config_path) # read in all combiner configs model_configs["combiner"] = {} for combiner_type, default_config in combiner_defaults.items(): combiner_config = load_yaml(default_config) model_configs["combiner"][combiner_type] = combiner_config return model_configs