def tpe_objective_function(config): metric = get_metric('bal_acc') _, estimator = get_estimator(config) X_train, y_train = train_data.data X_test, y_test = test_data.data estimator.fit(X_train, y_train) return -metric(estimator, X_test, y_test)
def __init__(self, estimator, metric, task_type, evaluation_strategy, **evaluation_params): self.estimator = estimator if task_type not in TASK_TYPES: raise ValueError('Unsupported task type: %s' % task_type) self.metric = get_metric(metric) self.metric_name = metric self.evaluation_strategy = evaluation_strategy self.evaluation_params = evaluation_params if self.evaluation_strategy == 'holdout': if 'train_size' not in self.evaluation_params: self.evaluation_params['train_size']
def __init__(self, task_type=CLASSIFICATION, optimizer_type='eval_base', metric='acc', trans_set=None, time_budget=None, maximum_evaluation_num=None, time_limit_per_trans=600, mem_limit_per_trans=1024, fe_enabled=True, evaluator=None, debug=False, seed=1, tmp_directory='logs', logging_config=None, model_id=None, task_id='Default'): self.fe_enabled = fe_enabled self.trans_set = trans_set self.maximum_evaluation_num = maximum_evaluation_num self.time_budget = time_budget self.time_limit_per_trans = time_limit_per_trans self.mem_limit_per_trans = mem_limit_per_trans self.optimizer_type = optimizer_type self.evaluator = evaluator self.optimizer = None self.metric = get_metric(metric) self.task_type = task_type self.task_id = task_id self.model_id = model_id self._seed = seed self.tmp_directory = tmp_directory self.logging_config = logging_config self._logger = self._get_logger(task_id) # Set up backend. if not os.path.exists(self.tmp_directory): os.makedirs(self.tmp_directory) # For data preprocessing. self.uninformative_columns, self.uninformative_idx = list(), list() self.variance_selector = None self.onehot_encoder = None self.label_encoder = None
cs.add_conditions(aug_space.get_conditions()) for estimator_id in algorithm_candidates: sub_cs = get_model_config_space(estimator_id, include_estimator=False, include_aug=False) parent_hyperparameter = { 'parent': estimator_choice, 'value': estimator_id } cs.add_configuration_space(estimator_id, sub_cs, parent_hyperparameter=parent_hyperparameter) return cs cs = get_pipeline_config_space(['resnet34', 'mobilenet']) dataset = 'cifar10' data_dir = 'data/img_datasets/%s/' % dataset image_data = ImageDataset(data_path=data_dir, train_val_split=True) hpo_evaluator = DLEvaluator(cs.get_default_configuration(), IMG_CLS, scorer=get_metric('acc'), dataset=image_data, device='cuda', image_size=32, seed=1) hpo_evaluator(cs.get_default_configuration())
def __init__(self, node_list, node_index, task_type, timestamp, fe_config_space: ConfigurationSpace, cash_config_space: ConfigurationSpace, data: DataNode, fixed_config=None, trial_num=0, time_limit=None, metric='acc', optimizer='smac', ensemble_method='ensemble_selection', ensemble_size=50, per_run_time_limit=300, output_dir="logs", dataset_name='default_dataset', eval_type='holdout', resampling_params=None, n_jobs=1, seed=1): # Tree setting self.node_list = node_list self.node_index = node_index # Set up backend. self.dataset_name = dataset_name self.trial_num = trial_num self.time_limit = time_limit self.per_run_time_limit = per_run_time_limit self.start_time = time.time() self.logger = get_logger('Soln-ml: %s' % dataset_name) # Basic settings. self.eval_type = eval_type self.resampling_params = resampling_params self.task_type = task_type self.timestamp = timestamp self.fe_config_space = fe_config_space self.cash_config_space = cash_config_space self.fixed_config = fixed_config self.original_data = data.copy_() self.metric = get_metric(metric) self.optimizer = optimizer self.ensemble_method = ensemble_method self.ensemble_size = ensemble_size self.n_jobs = n_jobs self.seed = seed self.output_dir = output_dir self.early_stop_flag = False self.timeout_flag = False self.incumbent_perf = -float("INF") self.incumbent = None self.eval_dict = dict() if self.task_type in CLS_TASKS: self.if_imbal = is_imbalanced_dataset(self.original_data) else: self.if_imbal = False self.es = None
def __init__(self, time_limit=300, dataset_name='default_name', amount_of_resource=None, task_type=None, metric='bal_acc', include_algorithms=None, include_preprocessors=None, optimizer='smac', ensemble_method='ensemble_selection', enable_meta_algorithm_selection=True, enable_fe=True, per_run_time_limit=150, ensemble_size=50, evaluation='holdout', resampling_params=None, output_dir="logs", logging_config=None, random_state=1, n_jobs=1): self.metric_id = metric self.metric = get_metric(self.metric_id) self.dataset_name = dataset_name self.time_limit = time_limit self.seed = random_state self.per_run_time_limit = per_run_time_limit self.output_dir = output_dir self.logging_config = logging_config self.logger = self._get_logger(self.dataset_name) self.evaluation_type = evaluation self.resampling_params = resampling_params self.include_preprocessors = include_preprocessors self.amount_of_resource = int(1e8) if amount_of_resource is None else amount_of_resource self.optimizer = optimizer self.ensemble_method = ensemble_method self.ensemble_size = ensemble_size self.enable_meta_algorithm_selection = enable_meta_algorithm_selection self.enable_fe = enable_fe self.task_type = task_type self.n_jobs = n_jobs self.solver = None self.global_start_time = time.time() self.eval_time = None self.total_time = None # Disable meta learning if self.include_preprocessors is not None: self.enable_meta_algorithm_selection = False if include_algorithms is not None: self.include_algorithms = include_algorithms else: if task_type in CLS_TASKS: if task_type in [IMG_CLS, TEXT_CLS]: raise ValueError('Please use AutoDL module, instead of AutoML.') else: self.include_algorithms = list(classification_algorithms) elif task_type in RGS_TASKS: self.include_algorithms = list(regression_algorithms) else: raise ValueError("Unknown task type %s" % task_type) if ensemble_method is not None and ensemble_method not in ensemble_list: raise ValueError("%s is not supported for ensemble!" % ensemble_method)