def _calculate_metafeatures(data_feat_type, data_info_task, basename, metalearning_cnt, x_train, y_train, watcher, logger): # == Calculate metafeatures task_name = 'CalculateMetafeatures' watcher.start_task(task_name) categorical = [ True if feat_type.lower() in ['categorical'] else False for feat_type in data_feat_type ] if metalearning_cnt <= 0: result = None elif data_info_task in \ [MULTICLASS_CLASSIFICATION, BINARY_CLASSIFICATION, MULTILABEL_CLASSIFICATION]: logger.info('Start calculating metafeatures for %s' % basename) result = calc_meta_features(x_train, y_train, categorical=categorical, dataset_name=basename) else: result = None logger.info('Metafeatures not calculated') watcher.stop_task(task_name) logger.info( 'Calculating Metafeatures (categorical attributes) took %5.2f' % watcher.wall_elapsed(task_name)) return result
def test_metalearning(self): dataset_name = 'digits' initial_challengers = { ACC_METRIC: "--initial-challengers \" " "-balancing:strategy 'weighting' " "-classifier:__choice__ 'proj_logit'", AUC_METRIC: "--initial-challengers \" " "-balancing:strategy 'none' " "-classifier:__choice__ 'random_forest'", BAC_METRIC: "--initial-challengers \" " "-balancing:strategy 'weighting' " "-classifier:__choice__ 'proj_logit'", F1_METRIC: "--initial-challengers \" " "-balancing:strategy 'weighting' " "-classifier:__choice__ 'proj_logit'", PAC_METRIC: "--initial-challengers \" " "-balancing:strategy 'none' " "-classifier:__choice__ 'random_forest'" } for metric in initial_challengers: configuration_space = get_configuration_space( { 'metric': metric, 'task': MULTICLASS_CLASSIFICATION, 'is_sparse': False }, include_preprocessors=['no_preprocessing']) X_train, Y_train, X_test, Y_test = get_dataset(dataset_name) categorical = [False] * X_train.shape[1] meta_features_label = calc_meta_features(X_train, Y_train, categorical, dataset_name) meta_features_encoded_label = calc_meta_features_encoded(X_train, Y_train, categorical, dataset_name) initial_configuration_strings_for_smac = \ create_metalearning_string_for_smac_call( meta_features_label, meta_features_encoded_label, configuration_space, dataset_name, metric, MULTICLASS_CLASSIFICATION, False, 1, None) print(metric) print(initial_configuration_strings_for_smac[0]) self.assertTrue(initial_configuration_strings_for_smac[ 0].startswith(initial_challengers[metric]))
def _calculate_metafeatures( data_feat_type, data_info_task, basename, metalearning_cnt, x_train, y_train, watcher, logger ): # == Calculate metafeatures task_name = "CalculateMetafeatures" watcher.start_task(task_name) categorical = [True if feat_type.lower() in ["categorical"] else False for feat_type in data_feat_type] if metalearning_cnt <= 0: result = None elif data_info_task in [MULTICLASS_CLASSIFICATION, BINARY_CLASSIFICATION, MULTILABEL_CLASSIFICATION]: logger.info("Start calculating metafeatures for %s" % basename) result = calc_meta_features(x_train, y_train, categorical=categorical, dataset_name=basename) else: result = None logger.info("Metafeatures not calculated") watcher.stop_task(task_name) logger.info("Calculating Metafeatures (categorical attributes) took %5.2f" % watcher.wall_elapsed(task_name)) return result