def run(dataset: Dataset, config: TaskConfig): #TODO: use rpy2 instead? not necessary here though as the call is very simple log.info(f"\n**** Autoxgboost (R) [{config.framework_version}] ****\n") save_metadata(config) is_classification = config.type == 'classification' here = dir_of(__file__) meta_results_file = os.path.join(config.output_dir, "meta_results.csv") run_cmd(r"""Rscript --vanilla -e " source('{script}'); run('{train}', '{test}', target.index = {target_index}, '{type}', '{output}', {cores}, time.budget = {time_budget}, meta_results_file='{meta_results}') " """.format(script=os.path.join(here, 'exec.R'), train=dataset.train.path, test=dataset.test.path, target_index=dataset.target.index + 1, type=config.type, output=config.output_predictions_file, cores=config.cores, time_budget=config.max_runtime_seconds, meta_results=meta_results_file), _live_output_=True) log.info("Predictions saved to %s", config.output_predictions_file) meta_results = read_csv(meta_results_file) return dict(training_duration=meta_result(meta_results, 'training_duration'), predict_duration=meta_result(meta_results, 'predict_duration'))
def run(dataset: Dataset, config: TaskConfig): #TODO: use rpy2 instead? not necessary here though as the call is very simple log.info("\n**** Random Forest (R) ****\n") save_metadata(config) is_classification = config.type == 'classification' if not is_classification: raise ValueError('Regression is not supported.') here = dir_of(__file__) meta_results_file = os.path.join(config.output_dir, "meta_results.csv") run_cmd(r"""Rscript --vanilla -e " source('{script}'); run('{train}', '{test}', '{output}', cores={cores}, meta_results_file='{meta_results}') " """.format(script=os.path.join(here, 'exec.R'), train=dataset.train.path, test=dataset.test.path, output=config.output_predictions_file, meta_results=meta_results_file, cores=config.cores), _live_output_=True) log.info("Predictions saved to %s", config.output_predictions_file) meta_results = read_csv(meta_results_file) return dict(training_duration=meta_result(meta_results, 'training_duration'), predict_duration=meta_result(meta_results, 'predict_duration'))
def run(dataset: Dataset, config: TaskConfig): log.info("\n**** Constant predictor (sklearn dummy) ****\n") save_metadata(config, version=sklearn.__version__) is_classification = config.type == 'classification' predictor = DummyClassifier( strategy='prior') if is_classification else DummyRegressor( strategy='median') encode = config.framework_params[ 'encode'] if 'encode' in config.framework_params else False X_train = dataset.train.X_enc if encode else dataset.train.X y_train = dataset.train.y_enc if encode else dataset.train.y X_test = dataset.test.X_enc if encode else dataset.test.X y_test = dataset.test.y_enc if encode else dataset.test.y with Timer() as training: predictor.fit(X_train, y_train) with Timer() as predict: predictions = predictor.predict(X_test) probabilities = predictor.predict_proba( X_test) if is_classification else None save_predictions(dataset=dataset, output_file=config.output_predictions_file, probabilities=probabilities, predictions=predictions, truth=y_test, target_is_encoded=encode) return dict(models_count=1, training_duration=training.duration, predict_duration=predict.duration)
def run(dataset: Dataset, config: TaskConfig): log.info( f"\n**** Gradient Boosting [sklearn v{sklearn.__version__}] ****\n") save_metadata(config, version=sklearn.__version__) is_classification = config.type == 'classification' X_train, X_test = impute(dataset.train.X_enc, dataset.test.X_enc) y_train, y_test = dataset.train.y, dataset.test.y estimator = GradientBoostingClassifier if is_classification else GradientBoostingRegressor predictor = estimator(random_state=config.seed, **config.framework_params) with Timer() as training: predictor.fit(X_train, y_train) predictions = predictor.predict(X_test) probabilities = predictor.predict_proba( X_test) if is_classification else None save_predictions(dataset=dataset, output_file=config.output_predictions_file, probabilities=probabilities, predictions=predictions, truth=y_test) return dict(models_count=1, training_duration=training.duration)
def run(dataset, config): log.info(f"\n**** Random Forest [sklearn v{sklearn.__version__}] ****\n") save_metadata(config, version=sklearn.__version__) is_classification = config.type == 'classification' X_train, X_test = dataset.train.X_enc, dataset.test.X_enc y_train, y_test = dataset.train.y_enc, dataset.test.y_enc training_params = { k: v for k, v in config.framework_params.items() if not k.startswith('_') } n_jobs = config.framework_params.get( '_n_jobs', config.cores ) # useful to disable multicore, regardless of the dataset config log.info( "Running RandomForest with a maximum time of {}s on {} cores.".format( config.max_runtime_seconds, n_jobs)) log.warning( "We completely ignore the requirement to stay within the time limit.") log.warning( "We completely ignore the advice to optimize towards metric: {}.". format(config.metric)) estimator = RandomForestClassifier if is_classification else RandomForestRegressor rf = estimator(n_jobs=n_jobs, random_state=config.seed, **training_params) with utils.Timer() as training: rf.fit(X_train, y_train) with utils.Timer() as predict: predictions = rf.predict(X_test) probabilities = rf.predict_proba(X_test) if is_classification else None return result(output_file=config.output_predictions_file, predictions=predictions, truth=y_test, probabilities=probabilities, target_is_encoded=is_classification, models_count=len(rf), training_duration=training.duration, predict_duration=predict.duration)
def run(dataset: Dataset, config: TaskConfig): log.info("****TabNet****") save_metadata(config) is_classification = config.type == 'classification' X_train, X_test = dataset.train.X, dataset.test.X X_train, X_test = impute(X_train, X_test) X = np.concatenate((X_train, X_test), axis=0) enc = OrdinalEncoder() enc.fit(X) X_train = enc.transform(X_train) X_test = enc.transform(X_test) y_train, y_test = dataset.train.y, dataset.test.y estimator = TabNetClassifier if is_classification else TabNetRegressor predictor = estimator() # you can change hyperparameters if not is_classification: y_train = np.reshape(y_train.astype(np.float32), (-1, 1)) y_test = np.reshape(y_test.astype(np.float32), (-1, 1)) with Timer() as training: predictor.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)]) with Timer() as predict: predictions = predictor.predict(X_test) probabilities = predictor.predict_proba( X_test) if is_classification else None save_predictions(dataset=dataset, output_file=config.output_predictions_file, probabilities=probabilities, predictions=predictions, truth=y_test) return dict(models_count=1, training_duration=training.duration, predict_duration=predict.duration)
def run(dataset: Dataset, config: TaskConfig): log.info(f"\n**** AutoWEKA [v{config.framework_version}]****\n") save_metadata(config) is_classification = config.type == 'classification' if not is_classification: raise ValueError('Regression is not supported.') # Mapping of benchmark metrics to Weka metrics metrics_mapping = dict(acc='errorRate', auc='areaUnderROC', logloss='kBInformation') metric = metrics_mapping[ config.metric] if config.metric in metrics_mapping else None if metric is None: raise ValueError("Performance metric {} not supported.".format( config.metric)) train_file = dataset.train.path test_file = dataset.test.path # Weka to requires target as the last attribute if dataset.target.index != len(dataset.predictors): train_file = reorder_dataset(dataset.train.path, target_src=dataset.target.index) test_file = reorder_dataset(dataset.test.path, target_src=dataset.target.index) training_params = { k: v for k, v in config.framework_params.items() if not k.startswith('_') } parallelRuns = config.framework_params.get('_parallelRuns', config.cores) memLimit = config.framework_params.get('_memLimit', 'auto') if memLimit == 'auto': memLimit = max( min(config.max_mem_size_mb, math.ceil(config.max_mem_size_mb / parallelRuns)), 1024) # AutoWEKA default memLimit log.info("Using %sMB memory per run on %s parallel runs.", memLimit, parallelRuns) f = split_path(config.output_predictions_file) f.extension = '.weka_pred.csv' weka_file = path_from_split(f) cmd_root = "java -cp {here}/lib/autoweka/autoweka.jar weka.classifiers.meta.AutoWEKAClassifier ".format( here=dir_of(__file__)) cmd_params = dict( t='"{}"'.format(train_file), T='"{}"'.format(test_file), memLimit=memLimit, classifications= '"weka.classifiers.evaluation.output.prediction.CSV -distribution -file \\\"{}\\\""' .format(weka_file), timeLimit=int(config.max_runtime_seconds / 60), parallelRuns=parallelRuns, metric=metric, seed=config.seed % (1 << 16), # weka accepts only int16 as seeds **training_params) cmd = cmd_root + ' '.join( ["-{} {}".format(k, v) for k, v in cmd_params.items()]) with Timer() as training: run_cmd(cmd, _live_output_=True) # if target values are not sorted alphabetically in the ARFF file, then class probabilities are returned in the original order # interestingly, other frameworks seem to always sort the target values first # that's why we need to specify the probabilities labels here: sorting+formatting is done in saving function probabilities_labels = dataset.target.values if not os.path.exists(weka_file): raise NoResultError("AutoWEKA failed producing any prediction.") with open(weka_file, 'r') as weka_file: probabilities = [] predictions = [] truth = [] for line in weka_file.readlines()[1:-1]: inst, actual, predicted, error, *distribution = line.split(',') pred_probabilities = [ pred_probability.replace('*', '').replace('\n', '') for pred_probability in distribution ] _, pred = predicted.split(':') _, tru = actual.split(':') probabilities.append(pred_probabilities) predictions.append(pred) truth.append(tru) save_predictions(dataset=dataset, output_file=config.output_predictions_file, probabilities=probabilities, predictions=predictions, truth=truth, probabilities_labels=probabilities_labels) return dict(training_duration=training.duration)
def run(dataset, config): log.info(f"\n**** TPOT [v{__version__}]****\n") save_metadata(config, version=__version__) is_classification = config.type == 'classification' # Mapping of benchmark metrics to TPOT metrics metrics_mapping = dict( acc='accuracy', auc='roc_auc', f1='f1', logloss='neg_log_loss', mae='neg_mean_absolute_error', mse='neg_mean_squared_error', msle='neg_mean_squared_log_error', r2='r2', rmse= 'neg_mean_squared_error', # TPOT can score on mse, as app computes rmse independently on predictions ) scoring_metric = metrics_mapping[ config.metric] if config.metric in metrics_mapping else None if scoring_metric is None: raise ValueError("Performance metric {} not supported.".format( config.metric)) X_train = dataset.train.X_enc y_train = dataset.train.y_enc training_params = { k: v for k, v in config.framework_params.items() if not k.startswith('_') } n_jobs = config.framework_params.get( '_n_jobs', config.cores ) # useful to disable multicore, regardless of the dataset config log.info( 'Running TPOT with a maximum time of %ss on %s cores, optimizing %s.', config.max_runtime_seconds, n_jobs, scoring_metric) runtime_min = (config.max_runtime_seconds / 60) estimator = TPOTClassifier if is_classification else TPOTRegressor tpot = estimator(n_jobs=n_jobs, max_time_mins=runtime_min, scoring=scoring_metric, random_state=config.seed, **training_params) with utils.Timer() as training: tpot.fit(X_train, y_train) log.info('Predicting on the test set.') X_test = dataset.test.X_enc y_test = dataset.test.y_enc with utils.Timer() as predict: predictions = tpot.predict(X_test) try: probabilities = tpot.predict_proba( X_test) if is_classification else None except RuntimeError: # TPOT throws a RuntimeError if the optimized pipeline does not support `predict_proba`. probabilities = "predictions" # encoding is handled by caller in `__init__.py` save_artifacts(tpot, config) return result(output_file=config.output_predictions_file, predictions=predictions, truth=y_test, probabilities=probabilities, target_is_encoded=is_classification, models_count=len(tpot.evaluated_individuals_), training_duration=training.duration, predict_duration=predict.duration)
def run(dataset, config): log.info(f"\n**** AutoGluon [v{__version__}] ****\n") save_metadata(config, version=__version__) metrics_mapping = dict( acc=metrics.accuracy, auc=metrics.roc_auc, f1=metrics.f1, logloss=metrics.log_loss, mae=metrics.mean_absolute_error, mse=metrics.mean_squared_error, r2=metrics.r2, # rmse=metrics.root_mean_squared_error, # metrics.root_mean_squared_error incorrectly registered in autogluon REGRESSION_METRICS rmse=metrics. mean_squared_error, # for now, we can let autogluon optimize training on mse: anyway we compute final score from predictions. ) perf_metric = metrics_mapping[ config.metric] if config.metric in metrics_mapping else None if perf_metric is None: # TODO: figure out if we are going to blindly pass metrics through, or if we use a strict mapping log.warning("Performance metric %s not supported.", config.metric) is_classification = config.type == 'classification' training_params = { k: v for k, v in config.framework_params.items() if not k.startswith('_') } column_names, _ = zip(*dataset.columns) column_types = dict(dataset.columns) train = pd.DataFrame(dataset.train.data, columns=column_names).astype(column_types, copy=False) label = dataset.target.name print(f"Columns dtypes:\n{train.dtypes}") output_dir = output_subdir("models", config) with utils.Timer() as training: predictor = task.fit(train_data=train, label=label, problem_type=dataset.problem_type, output_directory=output_dir, time_limits=config.max_runtime_seconds, eval_metric=perf_metric.name, **training_params) test = pd.DataFrame(dataset.test.data, columns=column_names).astype(column_types, copy=False) X_test = test.drop(columns=label) y_test = test[label] with utils.Timer() as predict: predictions = predictor.predict(X_test) probabilities = predictor.predict_proba( dataset=X_test, as_pandas=True, as_multiclass=True) if is_classification else None prob_labels = probabilities.columns.values.tolist( ) if probabilities is not None else None leaderboard = predictor._learner.leaderboard(X_test, y_test, silent=True) with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000): print(leaderboard) save_artifacts(predictor, leaderboard, config) num_models_trained = len(leaderboard) num_models_ensemble = len( predictor._trainer.get_minimum_model_set( predictor._trainer.model_best)) return result(output_file=config.output_predictions_file, predictions=predictions, truth=y_test, probabilities=probabilities, probabilities_labels=prob_labels, target_is_encoded=False, models_count=num_models_trained, models_ensemble_count=num_models_ensemble, training_duration=training.duration, predict_duration=predict.duration)
def run(dataset, config): log.info(f"\n**** lightautoml (R) [{__version__}] ****\n") save_metadata(config, version=__version__) warnings.simplefilter(action='ignore', category=FutureWarning) warnings.simplefilter(action='ignore', category=DeprecationWarning) is_classification = config.type == 'classification' y_train, y_test = dataset.train.y_enc, dataset.test.y_enc column_names, _ = zip(*dataset.columns) column_types = dict(dataset.columns) label = dataset.target.name df_train = pd.DataFrame(dataset.train.data, columns=column_names).astype(column_types, copy=False) df_train[dataset.target.name] = y_train max_mem_size_gb = float(config.max_mem_size_mb) / 1024 task = Task(dataset.problem_type if dataset.problem_type != 'regression' else 'reg') automl = TabularUtilizedAutoML(task=task, timeout=config.max_runtime_seconds, cpu_limit=config.cores, memory_limit=max_mem_size_gb, random_state=config.seed) log.info("Training...") with utils.Timer() as training: automl.fit_predict(train_data=df_train, roles={'target': label}) df_test = pd.DataFrame(dataset.test.data, columns=column_names).astype(column_types, copy=False) df_x_test = df_test.drop(columns=label) log.info("Predicting on the test set...") with utils.Timer() as predict: preds = automl.predict(df_x_test).data if is_classification: probabilities = preds if dataset.problem_type == 'binary': probabilities = np.vstack([ 1 - probabilities[:, 0], probabilities[:, 0] ]).T predictions = np.argmax(probabilities, axis=1) else: probabilities = None predictions = preds log.debug(probabilities) log.debug(config.output_predictions_file) save_artifacts(automl, config) return result( output_file=config.output_predictions_file, probabilities=probabilities, predictions=predictions, truth=y_test, target_is_encoded=is_classification, training_duration=training.duration, predict_duration=predict.duration, )
def run(dataset, config): log.info( f"\n**** Tuned Random Forest [sklearn v{sklearn.__version__}] ****\n") save_metadata(config, version=sklearn.__version__) is_classification = config.type == 'classification' training_params = { k: v for k, v in config.framework_params.items() if not k.startswith('_') } tuning_params = config.framework_params.get('_tuning', training_params) n_jobs = config.framework_params.get( '_n_jobs', config.cores ) # useful to disable multicore, regardless of the dataset config X_train, X_test = dataset.train.X_enc, dataset.test.X_enc y_train, y_test = dataset.train.y_enc, dataset.test.y_enc log.info( "Running RandomForest with a maximum time of {}s on {} cores.".format( config.max_runtime_seconds, n_jobs)) estimator = RandomForestClassifier if is_classification else RandomForestRegressor metric = dict( acc='accuracy', auc='roc_auc', f1='f1', logloss='neg_log_loss', mae='neg_mean_absolute_error', mse='neg_mean_squared_error', r2='r2', rmse='neg_root_mean_squared_error', )[config.metric] n_features = X_train.shape[1] default_value = max(1, int(math.sqrt(n_features))) below_default = pick_values_uniform(start=1, end=default_value, length=5 + 1)[:-1] # 5 below above_default = pick_values_uniform(start=default_value, end=n_features, length=10 + 1 - len(below_default))[1:] # 5 above # Mix up the order of `max_features` to try, so that a fair range is tried even if we have too little time # to try all possible values. Order: [sqrt(p), 1, p, random order for remaining values] # max_features_to_try = below_default[1:] + above_default[:-1] # max_features_values = ([default_value, 1, n_features] # + random.sample(max_features_to_try, k=len(max_features_to_try))) max_features_values = [default_value] + below_default + above_default # Define up to how much of total time we spend 'optimizing' `max_features`. # (the remainder if used for fitting the final model). safety_factor = 0.85 with stopit.ThreadingTimeout(seconds=int(config.max_runtime_seconds * safety_factor)): log.info("Evaluating multiple values for `max_features`: %s.", max_features_values) max_feature_scores = [] tuning_durations = [] for i, max_features_value in enumerate(max_features_values): log.info("[{:2d}/{:2d}] Evaluating max_features={}".format( i + 1, len(max_features_values), max_features_value)) imputation = SimpleImputer() random_forest = estimator(n_jobs=n_jobs, random_state=config.seed, max_features=max_features_value, **tuning_params) pipeline = Pipeline(steps=[('preprocessing', imputation), ('learning', random_forest)]) with utils.Timer() as cv_scoring: try: scores = cross_val_score(estimator=pipeline, X=dataset.train.X_enc, y=dataset.train.y_enc, scoring=metric, cv=5) max_feature_scores.append( (statistics.mean(scores), max_features_value)) except stopit.utils.TimeoutException as toe: log.error( "Failed CV scoring for max_features=%s : Timeout", max_features_value) tuning_durations.append( (max_features_value, cv_scoring.duration)) raise toe except Exception as e: log.error("Failed CV scoring for max_features=%s :\n%s", max_features_value, e) log.debug("Exception:", exc_info=True) tuning_durations.append((max_features_value, cv_scoring.duration)) log.info("Tuning scores:\n%s", sorted(max_feature_scores)) log.info("Tuning durations:\n%s", sorted(tuning_durations)) _, best_max_features_value = max( max_feature_scores) if len(max_feature_scores) > 0 else (math.nan, 'auto') log.info("Training final model with `max_features={}`.".format( best_max_features_value)) rf = estimator(n_jobs=n_jobs, random_state=config.seed, max_features=best_max_features_value, **training_params) with utils.Timer() as training: rf.fit(X_train, y_train) with utils.Timer() as predict: predictions = rf.predict(X_test) probabilities = rf.predict_proba(X_test) if is_classification else None return result(output_file=config.output_predictions_file, predictions=predictions, truth=y_test, probabilities=probabilities, target_is_encoded=is_classification, models_count=len(rf), training_duration=training.duration + sum(map(lambda t: t[1], tuning_durations)), predict_duration=predict.duration)
def run(dataset, config): log.info("\n**** GAMA [v%s] ****", __version__) log.info("sklearn == %s", sklearn.__version__) log.info("category_encoders == %s", category_encoders.__version__) save_metadata(config, version=__version__) is_classification = (config.type == 'classification') # Mapping of benchmark metrics to GAMA metrics metrics_mapping = dict( acc='accuracy', auc='roc_auc', f1='f1', logloss='neg_log_loss', mae='neg_mean_absolute_error', mse='neg_mean_squared_error', msle='neg_mean_squared_log_error', r2='r2', rmse='neg_root_mean_squared_error', ) scoring_metric = metrics_mapping[ config.metric] if config.metric in metrics_mapping else None if scoring_metric is None: raise ValueError("Performance metric {} not supported.".format( config.metric)) training_params = { k: v for k, v in config.framework_params.items() if not k.startswith('_') } n_jobs = config.framework_params.get( '_n_jobs', config.cores ) # useful to disable multicore, regardless of the dataset config *_, did, fold = dataset.train_path.split('/') fold = fold.split('.')[0].split('_')[-1] log_file = os.path.join(config.output_dir, "logs", '{}_{}.log'.format(did, fold)) utils.touch(log_file) log.info( 'Running GAMA with a maximum time of %ss on %s cores, optimizing %s.', config.max_runtime_seconds, n_jobs, scoring_metric) estimator = GamaClassifier if is_classification else GamaRegressor gama_automl = estimator(n_jobs=n_jobs, max_total_time=config.max_runtime_seconds, scoring=scoring_metric, random_state=config.seed, keep_analysis_log=log_file, **training_params) with utils.Timer() as training: gama_automl.fit_arff(dataset.train_path, dataset.target, encoding='utf-8') log.info('Predicting on the test set.') with utils.Timer() as predict: predictions = gama_automl.predict_arff(dataset.test_path, dataset.target, encoding='utf-8') if is_classification is not None: probabilities = gama_automl.predict_proba_arff(dataset.test_path, dataset.target, encoding='utf-8') else: probabilities = None return result(output_file=config.output_predictions_file, predictions=predictions, probabilities=probabilities, target_is_encoded=False, models_count=len(gama_automl._final_pop), training_duration=training.duration, predict_duration=predict.duration)
def run(dataset, config): log.info(f"\n**** Hyperopt-sklearn [v{config.framework_version}] ****\n") save_metadata(config) is_classification = config.type == 'classification' default = lambda: 0 metrics_to_loss_mapping = dict( acc=(default, False), # lambda y, pred: 1.0 - accuracy_score(y, pred) auc=(lambda y, pred: 1.0 - roc_auc_score(y, pred), False), f1=(lambda y, pred: 1.0 - f1_score(y, pred), False), # logloss=(log_loss, True), mae=(mean_absolute_error, False), mse=(mean_squared_error, False), msle=(mean_squared_log_error, False), r2=(default, False), # lambda y, pred: 1.0 - r2_score(y, pred) rmse=(mean_squared_error, False), ) loss_fn, continuous_loss_fn = metrics_to_loss_mapping[ config.metric] if config.metric in metrics_to_loss_mapping else (None, False) if loss_fn is None: log.warning("Performance metric %s not supported: defaulting to %s.", config.metric, 'accuracy' if is_classification else 'r2') if loss_fn is default: loss_fn = None training_params = { k: v for k, v in config.framework_params.items() if not k.startswith('_') } if 'algo' in training_params: training_params['algo'] = eval( training_params['algo'] ) # evil eval: use get_extensions instead once https://github.com/openml/automlbenchmark/pull/141 is merged log.warning("Ignoring cores constraint of %s cores.", config.cores) log.info( "Running hyperopt-sklearn with a maximum time of %ss on %s cores, optimizing %s.", config.max_runtime_seconds, 'all', config.metric) X_train = dataset.train.X_enc y_train = dataset.train.y_enc if is_classification: classifier = any_classifier('clf') regressor = None else: classifier = None regressor = any_regressor('rgr') estimator = HyperoptEstimator(classifier=classifier, regressor=regressor, loss_fn=loss_fn, continuous_loss_fn=continuous_loss_fn, trial_timeout=config.max_runtime_seconds, seed=config.seed, **training_params) with InterruptTimeout( config.max_runtime_seconds, interruptions=[ dict(), # default interruption dict(sig=signal.SIGKILL) ], wait_retry_secs=math.ceil(config.max_runtime_seconds / 60), before_interrupt=ft.partial(kill_proc_tree, timeout=5, include_parent=False)): with Timer() as training: estimator.fit(X_train, y_train) log.info('Predicting on the test set.') X_test = dataset.test.X_enc y_test = dataset.test.y_enc with Timer() as predict: predictions = estimator.predict(X_test) if is_classification: probabilities = "predictions" # encoding is handled by caller in `__init__.py` else: probabilities = None return result(output_file=config.output_predictions_file, predictions=predictions, truth=y_test, probabilities=probabilities, target_is_encoded=is_classification, models_count=len(estimator.trials), training_duration=training.duration, predict_duration=predict.duration)
def run(dataset, config): log.info( f"\n**** Stacking Ensemble [sklearn v{sklearn.__version__}] ****\n") save_metadata(config, version=sklearn.__version__) is_classification = config.type == 'classification' X_train, X_test = dataset.train.X_enc, dataset.test.X_enc y_train, y_test = dataset.train.y_enc, dataset.test.y_enc training_params = { k: v for k, v in config.framework_params.items() if not k.startswith('_') } n_jobs = config.framework_params.get( '_n_jobs', config.cores ) # useful to disable multicore, regardless of the dataset config estimators_params = { e: config.framework_params.get(f'_{e}_params', {}) for e in ['rf', 'gbm', 'linear', 'svc', 'final'] } log.info( "Running Sklearn Stacking Ensemble with a maximum time of {}s on {} cores." .format(config.max_runtime_seconds, n_jobs)) log.warning( "We completely ignore the requirement to stay within the time limit.") log.warning( "We completely ignore the advice to optimize towards metric: {}.". format(config.metric)) if is_classification: estimator = StackingClassifier( estimators=[ ('rf', RandomForestClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['rf'])), ('gbm', GradientBoostingClassifier(random_state=config.seed, **estimators_params['gbm'])), ('linear', SGDClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['linear'])), # ('svc', LinearSVC(random_state=config.seed, **estimators_params['svc'])) ], # final_estimator=SGDClassifier(n_jobs=n_jobs, random_state=config.seed, **estimators_params['final']), final_estimator=LogisticRegression(n_jobs=n_jobs, random_state=config.seed, **estimators_params['final']), stack_method='predict_proba', n_jobs=n_jobs, **training_params) else: estimator = StackingRegressor( estimators=[ ('rf', RandomForestRegressor(n_jobs=n_jobs, random_state=config.seed, **estimators_params['rf'])), ('gbm', GradientBoostingRegressor(random_state=config.seed, **estimators_params['gbm'])), ('linear', SGDRegressor(random_state=config.seed, **estimators_params['linear'])), ('svc', LinearSVR(random_state=config.seed, **estimators_params['svc'])) ], # final_estimator=SGDRegressor(random_state=config.seed, **estimators_params['final']), final_estimator=LinearRegression(n_jobs=n_jobs, random_state=config.seed, **estimators_params['final']), n_jobs=n_jobs, **training_params) with utils.Timer() as training: estimator.fit(X_train, y_train) predictions = estimator.predict(X_test) probabilities = estimator.predict_proba( X_test) if is_classification else None return result(output_file=config.output_predictions_file, predictions=predictions, truth=y_test, probabilities=probabilities, target_is_encoded=is_classification, models_count=len(estimator.estimators_) + 1, training_duration=training.duration)
def run(dataset, config): askl_method_version = 2 if config.framework_params.get('_askl2', False) else 1 askl_string = "Auto-sklearn2.0" if askl_method_version == 2 else "Auto-sklearn" log.info(f"\n**** {askl_string} [v{autosklearn.__version__}]****\n") save_metadata(config, version=autosklearn.__version__) warnings.simplefilter(action='ignore', category=FutureWarning) warnings.simplefilter(action='ignore', category=DeprecationWarning) is_classification = config.type == 'classification' dataset_name = config.name # Mapping of benchmark metrics to autosklearn metrics metrics_mapping = dict( acc=metrics.accuracy, auc=metrics.roc_auc, f1=metrics.f1, logloss=metrics.log_loss, mae=metrics.mean_absolute_error, mse=metrics.mean_squared_error, rmse=metrics.mean_squared_error if askl_version < version.parse("0.10") else metrics.root_mean_squared_error, r2=metrics.r2) perf_metric = metrics_mapping[ config.metric] if config.metric in metrics_mapping else None if perf_metric is None: # TODO: figure out if we are going to blindly pass metrics through, or if we use a strict mapping log.warning("Performance metric %s not supported.", config.metric) # Set resources based on datasize log.info( "Running %s for %s with a maximum time of %ss on %s cores with %sMB, optimizing %s.", askl_string, dataset_name, config.max_runtime_seconds, config.cores, config.max_mem_size_mb, perf_metric, ) log.info("Environment: %s", os.environ) X_train = dataset.train.X_enc y_train = dataset.train.y_enc predictors_type = dataset.predictors_type log.debug("predictors_type=%s", predictors_type) training_params = { k: v for k, v in config.framework_params.items() if not k.startswith('_') } n_jobs = config.framework_params.get('_n_jobs', config.cores) ml_memory_limit = config.framework_params.get('_ml_memory_limit', 'auto') constr_params = {} fit_extra_params = {'dataset_name': dataset_name} total_memory_mb = utils.system_memory_mb().total if ml_memory_limit == 'auto': ml_memory_limit = max( min(config.max_mem_size_mb / n_jobs, math.ceil(total_memory_mb / n_jobs)), 3072 # 3072 is autosklearn default and we use it as a lower bound ) if isinstance( askl_version, version.LegacyVersion) or askl_version >= version.parse("0.11"): log.info("Using %sMB memory per job and on a total of %s jobs.", ml_memory_limit, n_jobs) else: ensemble_memory_limit = config.framework_params.get( '_ensemble_memory_limit', 'auto') # when memory is large enough, we should have: # (cores - 1) * ml_memory_limit_mb + ensemble_memory_limit_mb = config.max_mem_size_mb if ensemble_memory_limit == 'auto': ensemble_memory_limit = max( math.ceil(ml_memory_limit - (total_memory_mb - config.max_mem_size_mb)), math.ceil(ml_memory_limit / 3), # default proportions 1024) # 1024 is autosklearn defaults log.info( "Using %sMB memory per ML job and %sMB for ensemble job on a total of %s jobs.", ml_memory_limit, ensemble_memory_limit, n_jobs) constr_params["ml_memory_limit"] = ml_memory_limit constr_params["ensemble_memory_limit"] = ensemble_memory_limit log.warning( "Using meta-learned initialization, which might be bad (leakage).") if is_classification: estimator = AutoSklearn2Classifier if askl_method_version == 2 else AutoSklearnClassifier else: if askl_method_version == 2: log.warning( '%s does not support regression, falling back to regular Auto-sklearn!', askl_string, ) estimator = AutoSklearnRegressor if isinstance( askl_version, version.LegacyVersion) or askl_version >= version.parse("0.8"): constr_params['metric'] = perf_metric else: fit_extra_params['metric'] = perf_metric constr_params["time_left_for_this_task"] = config.max_runtime_seconds constr_params["n_jobs"] = n_jobs constr_params["seed"] = config.seed log.info("%s constructor arguments: %s", askl_string, constr_params) log.info("%s additional constructor arguments: %s", askl_string, training_params) log.info("%s fit() arguments: %s", askl_string, fit_extra_params) auto_sklearn = estimator(**constr_params, **training_params) with utils.Timer() as training: auto_sklearn.fit(X_train, y_train, feat_type=predictors_type, **fit_extra_params) # Convert output to strings for classification log.info("Predicting on the test set.") X_test = dataset.test.X_enc y_test = dataset.test.y_enc with utils.Timer() as predict: predictions = auto_sklearn.predict(X_test) probabilities = auto_sklearn.predict_proba( X_test) if is_classification else None save_artifacts(auto_sklearn, config) return result(output_file=config.output_predictions_file, predictions=predictions, truth=y_test, probabilities=probabilities, target_is_encoded=is_classification, models_count=len(auto_sklearn.get_models_with_weights()), training_duration=training.duration, predict_duration=predict.duration)
def run(dataset, config): log.info(f"\n**** AutoGluon [v{__version__}] ****\n") save_metadata(config, version=__version__) metrics_mapping = dict( acc=metrics.accuracy, auc=metrics.roc_auc, f1=metrics.f1, logloss=metrics.log_loss, mae=metrics.mean_absolute_error, mse=metrics.mean_squared_error, r2=metrics.r2, rmse=metrics.root_mean_squared_error, ) label = dataset.target.name problem_type = dataset.problem_type perf_metric = metrics_mapping[config.metric] if config.metric in metrics_mapping else None if perf_metric is None: # TODO: figure out if we are going to blindly pass metrics through, or if we use a strict mapping log.warning("Performance metric %s not supported.", config.metric) is_classification = config.type == 'classification' training_params = {k: v for k, v in config.framework_params.items() if not k.startswith('_')} load_raw = config.framework_params.get('_load_raw', False) if load_raw: train, test = load_data_raw(dataset=dataset) else: column_names, _ = zip(*dataset.columns) column_types = dict(dataset.columns) train = pd.DataFrame(dataset.train.data, columns=column_names).astype(column_types, copy=False) print(f"Columns dtypes:\n{train.dtypes}") test = pd.DataFrame(dataset.test.data, columns=column_names).astype(column_types, copy=False) del dataset gc.collect() output_dir = output_subdir("models", config) with utils.Timer() as training: predictor = TabularPredictor( label=label, eval_metric=perf_metric.name, path=output_dir, problem_type=problem_type, ).fit( train_data=train, time_limit=config.max_runtime_seconds, **training_params ) del train y_test = test[label] test = test.drop(columns=label) if is_classification: with utils.Timer() as predict: probabilities = predictor.predict_proba(test, as_multiclass=True) predictions = probabilities.idxmax(axis=1).to_numpy() else: with utils.Timer() as predict: predictions = predictor.predict(test, as_pandas=False) probabilities = None prob_labels = probabilities.columns.values.tolist() if probabilities is not None else None leaderboard = predictor.leaderboard(silent=True) # Removed test data input to avoid long running computation, remove 7200s timeout limitation to re-enable with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000): print(leaderboard) save_artifacts(predictor, leaderboard, config) num_models_trained = len(leaderboard) if predictor._trainer.model_best is not None: num_models_ensemble = len(predictor._trainer.get_minimum_model_set(predictor._trainer.model_best)) else: num_models_ensemble = 1 return result(output_file=config.output_predictions_file, predictions=predictions, truth=y_test, probabilities=probabilities, probabilities_labels=prob_labels, target_is_encoded=False, models_count=num_models_trained, models_ensemble_count=num_models_ensemble, training_duration=training.duration, predict_duration=predict.duration)
def run(dataset, config): jar_file = glob.glob("{here}/lib/mlplan/mlplan-cli*.jar".format( here=os.path.dirname(__file__)))[0] version = re.match(r".*/mlplan-cli-(.*).jar", jar_file)[1] log.info(f"\n**** ML-Plan [v{version}] ****\n") save_metadata(config, version=version) is_classification = config.type == 'classification' # Mapping of benchmark metrics to Weka metrics metrics_mapping = dict(acc='ERRORRATE', auc='AUC', logloss='LOGLOSS', f1='F1', r2='R2', rmse='ROOT_MEAN_SQUARED_ERROR', mse='MEAN_SQUARED_ERROR', rmsle='ROOT_MEAN_SQUARED_LOGARITHM_ERROR', mae='MEAN_ABSOLUTE_ERROR') metric = metrics_mapping[ config.metric] if config.metric in metrics_mapping else None if metric is None: raise ValueError('Performance metric {} is not supported.'.format( config.metric)) train_file = dataset.train.path test_file = dataset.test.path training_params = { k: v for k, v in config.framework_params.items() if not k.startswith('_') } backend = config.framework_params.get('_backend', 'weka') if backend == "weka": mem_limit = str(max(config.max_mem_size_mb - 1024, 2048)) else: mem_limit = str( max(round((config.max_mem_size_mb - 1024) / config.cores), 2048)) mode = backend if config.type == 'regression': mode += '-regression' log.info( "Running ML-Plan with backend %s in mode %s and a maximum time of %ss on %s cores with %sMB for the JVM, optimizing %s.", backend, mode, config.max_runtime_seconds, config.cores, config.max_mem_size_mb, metric) log.info("Environment: %s", os.environ) predictions_file = os.path.join(output_subdir('mlplan_out', config), 'predictions.csv') statistics_file = os.path.join(output_subdir('mlplan_out', config), 'statistics.json') #tmp_dir = output_subdir('mlplan_tmp', config) cmd_root = f"java -jar -Xmx{mem_limit}M {jar_file}" with tempfile.TemporaryDirectory() as tmp_dir: cmd_params = dict( f='"{}"'.format(train_file), p='"{}"'.format(test_file), t=config.max_runtime_seconds, ncpus=config.cores, l=metric, m=mode, s=config.seed, # weka accepts only int16 as seeds ooab=predictions_file, os=statistics_file, tmp=tmp_dir, **training_params) cmd = cmd_root + ''.join( [" -{} {}".format(k, v) for k, v in cmd_params.items()]) with utils.Timer() as training: utils.run_cmd(cmd, _live_output_=True) with open(statistics_file, 'r') as f: stats = json.load(f) predictions = stats["predictions"] truth = stats["truth"] numEvals = stats["num_evaluations"] # only for classification tasks we have probabilities available, thus check whether the json contains the respective fields if "probabilities" in stats and "probabilities_labels" in stats: probabilities = stats["probabilities"] probabilities_labels = stats["probabilities_labels"] else: probabilities = [] probabilities_labels = [] return result(output_file=config.output_predictions_file, predictions=predictions, truth=truth, probabilities=probabilities, probabilities_labels=probabilities_labels, target_is_encoded=is_classification, models_count=numEvals, training_duration=training.duration)
def run(dataset, config): log.info(f"\n**** mljar-supervised [v{supervised.__version__}] ****\n") save_metadata(config, version=supervised.__version__) # Mapping of benchmark metrics to MLJAR metrics metrics_mapping = dict(auc='auc', logloss='logloss', rmse='rmse') eval_metric = metrics_mapping[ config.metric] if config.metric in metrics_mapping else "auto" # Mapping of benchmark task to MLJAR ML task problem_mapping = dict( binary="binary_classification", multiclass="multiclass_classification", regression="regression", ) ml_task = problem_mapping.get( dataset.problem_type ) # if None the AutoML will guess about the ML task is_classification = config.type == "classification" results_path = output_subdir("results", config) training_params = { k: v for k, v in config.framework_params.items() if not k.startswith("_") } column_names, _ = zip(*dataset.columns) column_types = dict(dataset.columns) label = dataset.target.name train = pd.DataFrame(dataset.train.data, columns=column_names).astype(column_types, copy=False) X_train = train.drop(columns=label) y_train = train[label] test = pd.DataFrame(dataset.test.data, columns=column_names).astype(column_types, copy=False) X_test = test.drop(columns=label) y_test = test[label] automl = AutoML(results_path=results_path, total_time_limit=config.max_runtime_seconds, random_state=config.seed, ml_task=ml_task, eval_metric=eval_metric, **training_params) with utils.Timer() as training: automl.fit(X_train, y_train) with utils.Timer() as predict: preds = automl.predict_all(X_test) predictions, probabilities = None, None if is_classification: predictions = preds["label"].values cols = [f"prediction_{c}" for c in np.unique(y_train)] probabilities = preds[cols].values else: predictions = preds["prediction"].values # clean the results if not config.framework_params.get("_save_artifacts", False): shutil.rmtree(results_path, ignore_errors=True) return result(output_file=config.output_predictions_file, predictions=predictions, truth=y_test, probabilities=probabilities, models_count=len(automl._models), training_duration=training.duration, predict_duration=predict.duration)
def run(dataset, config): log.info(f"\n**** H2O AutoML [v{h2o.__version__}] ****\n") save_metadata(config, version=h2o.__version__) # Mapping of benchmark metrics to H2O metrics metrics_mapping = dict(acc='mean_per_class_error', auc='AUC', logloss='logloss', mae='mae', mse='mse', r2='r2', rmse='rmse', rmsle='rmsle') sort_metric = metrics_mapping[ config.metric] if config.metric in metrics_mapping else None if sort_metric is None: # TODO: Figure out if we are going to blindly pass metrics through, or if we use a strict mapping log.warning("Performance metric %s not supported, defaulting to AUTO.", config.metric) try: training_params = { k: v for k, v in config.framework_params.items() if not k.startswith('_') } nthreads = config.framework_params.get('_nthreads', config.cores) jvm_memory = str( round(config.max_mem_size_mb * 2 / 3)) + "M" # leaving 1/3rd of available memory for XGBoost log.info("Starting H2O cluster with %s cores, %s memory.", nthreads, jvm_memory) max_port_range = 49151 min_port_range = 1024 rnd_port = os.getpid() % (max_port_range - min_port_range) + min_port_range port = config.framework_params.get('_port', rnd_port) init_params = config.framework_params.get('_init', {}) if "logs" in config.framework_params.get('_save_artifacts', []): init_params['ice_root'] = output_subdir("logs", config) h2o.init(nthreads=nthreads, port=port, min_mem_size=jvm_memory, max_mem_size=jvm_memory, **init_params) import_kwargs = {} # Load train as an H2O Frame, but test as a Pandas DataFrame log.debug("Loading train data from %s.", dataset.train.path) train = None if version.parse(h2o.__version__) >= version.parse( "3.32.0.3" ): # previous versions may fail to parse correctly some rare arff files using single quotes as enum/string delimiters (pandas also fails on same datasets) import_kwargs['quotechar'] = '"' train = h2o.import_file(dataset.train.path, destination_frame=frame_name( 'train', config), **import_kwargs) if not verify_loaded_frame(train, dataset): h2o.remove(train) train = None import_kwargs['quotechar'] = "'" if not train: train = h2o.import_file(dataset.train.path, destination_frame=frame_name( 'train', config), **import_kwargs) # train.impute(method='mean') log.debug("Loading test data from %s.", dataset.test.path) test = h2o.import_file(dataset.test.path, destination_frame=frame_name('test', config), **import_kwargs) # test.impute(method='mean') log.info("Running model on task %s, fold %s.", config.name, config.fold) log.debug( "Running H2O AutoML with a maximum time of %ss on %s core(s), optimizing %s.", config.max_runtime_seconds, config.cores, sort_metric) aml = H2OAutoML(max_runtime_secs=config.max_runtime_seconds, sort_metric=sort_metric, seed=config.seed, **training_params) monitor = ( BackendMemoryMonitoring( frequency_seconds=config.ext.monitoring.frequency_seconds, check_on_exit=True, verbosity=config.ext.monitoring.verbosity) if config.framework_params.get('_monitor_backend', False) # else contextlib.nullcontext # Py 3.7+ only else contextlib.contextmanager(iter)([0])) with utils.Timer() as training: with monitor: aml.train(y=dataset.target.index, training_frame=train) if not aml.leader: raise FrameworkError( "H2O could not produce any model in the requested time.") with utils.Timer() as predict: preds = aml.predict(test) preds = extract_preds(preds, test, dataset=dataset) save_artifacts(aml, dataset=dataset, config=config) return result(output_file=config.output_predictions_file, predictions=preds.predictions, truth=preds.truth, probabilities=preds.probabilities, probabilities_labels=preds.probabilities_labels, models_count=len(aml.leaderboard), training_duration=training.duration, predict_duration=predict.duration) finally: if h2o.connection(): # h2o.remove_all() h2o.connection().close() if h2o.connection().local_server: h2o.connection().local_server.shutdown()
def run(dataset, config): log.info(f"\n**** Oboe [{config.framework_version}] ****\n") save_metadata(config) is_classification = config.type == 'classification' if not is_classification: # regression currently fails (as of 26.02.2019: still under development state by oboe team) raise ValueError( 'Regression is not yet supported (under development).') X_train = dataset.train.X_enc y_train = dataset.train.y_enc training_params = { k: v for k, v in config.framework_params.items() if not k.startswith('_') } n_cores = config.framework_params.get('_n_cores', config.cores) log.info('Running oboe with a maximum time of {}s on {} cores.'.format( config.max_runtime_seconds, n_cores)) log.warning( 'We completely ignore the advice to optimize towards metric: {}.'. format(config.metric)) aml = AutoLearner( p_type='classification' if is_classification else 'regression', n_cores=n_cores, runtime_limit=config.max_runtime_seconds, **training_params) aml_models = lambda: [aml.ensemble, *aml.ensemble.base_learners] if len( aml.ensemble.base_learners) > 0 else [] with utils.Timer() as training: try: aml.fit(X_train, y_train) except IndexError as e: if len( aml_models() ) == 0: # incorrect handling of some IndexError in oboe if ensemble is empty raise ValueError( "Oboe could not produce any model in the requested time.") raise e log.info('Predicting on the test set.') X_test = dataset.test.X_enc y_test = dataset.test.y_enc with utils.Timer() as predict: predictions = aml.predict(X_test) predictions = predictions.reshape(len(X_test)) if is_classification: probabilities = "predictions" # encoding is handled by caller in `__init__.py` else: probabilities = None return result(output_file=config.output_predictions_file, predictions=predictions, truth=y_test, probabilities=probabilities, target_is_encoded=is_classification, models_count=len(aml_models()), training_duration=training.duration, predict_duration=predict.duration)