print(f'[INFO] Test shape: {X_test.shape}') print('[INFO] Finding best model...') #for auto vanilla, add this : #ensemble_size=1, initial_configurations_via_metalearning=0 #-----CLASSIFIER----- #automl = AutoSklearnClassifier(per_run_time_limit=300, ml_memory_limit=1024 * 6, time_left_for_this_task=3600, resampling_strategy='cv', # resampling_strategy_arguments={'folds': 5}) #-----REGRESSION----- automl = AutoSklearnRegressor(per_run_time_limit=300, ml_memory_limit=1024 * 6, time_left_for_this_task=3600, resampling_strategy='cv', resampling_strategy_arguments={'folds': 5}) start = time.time() #X_train = X_train.astype('float') automl.fit(X_train, y_train, dataset_name='linnerud') #change dataset name accordingly automl.refit(X_train.copy(), y_train.copy()) print(f'[INFO] Elapsed time finding best model: {time.time() - start} seconds.') predictions = automl.predict(X_test) #print('--- CLASSIFICATION REPORT: ---') #not for regression #print(classification_report(y_test, predictions, digits=5)) print('\n\n--- MODELS: ---') print(automl.show_models()) print('\n\n--- STATISTICS: ---') print(automl.sprint_statistics()) #-----CLASSIFIER----- #print('\n\n--- SCORE: ---') #print("Balanced error score", 1 - balanced_accuracy_score(y_test, predictions)) #-----REGRESSION----- print('\n\n--- SCORE: ---') print("R2 score", 1 - r2_score(y_test, predictions))
if __name__ == '__main__': # load dataset url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/auto-insurance.csv' dataframe = read_csv(url, header=None) # split into input and output elements data = dataframe.values data = data.astype('float32') X, y = data[:, :-1], data[:, -1] # split into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1) # define search model = AutoSklearnRegressor(time_left_for_this_task=20 * 60, per_run_time_limit=45, n_jobs=6, metric=auto_mean_absolute_error) # perform the search model.fit(X_train, y_train) # summarize print(model.sprint_statistics()) # evaluate best model y_hat = model.predict(X_test) mae = mean_absolute_error(y_test, y_hat) print("MAE: %.3f" % mae) print("Show models") models_def = model.show_models() print(models_def)
print("start searching") # perform the search model.fit(X_train, y_train, dataset_name=ml_type + '_t' + str(time_left_for_this_task) + '_lead' + str(l)) # summarize file = open( 'log_files/' + ml_type + '_t' + str(time_left_for_this_task) + '_lead' + str(l) + '.txt', 'w') file.write(model.sprint_statistics()) file.write('\n') file.write(model.show_models()) file.close() print(model.sprint_statistics()) print(model.show_models()) # evaluate best model y_hat = model.predict(X_val) metric = calc_metrics(y_val, y_hat, ml_type) if ml_type == 'regression': metrics[l] = metric elif ml_type == 'classification': metrics[l, :] = metric print("************************************") print("lead:" + str(l) + ", metric: " + str(metric)) print("************************************")
exclude_preprocessors=None, ml_memory_limit=6156, resampling_strategy="cv", resampling_strategy_arguments={"folds": 5}) # Train models auto_sklearn.fit(X=X_train.copy(), y=y_train.copy(), metric=mean_squared_error) it_fits = auto_sklearn.refit(X=X_train.copy(), y=y_train.copy()) # Predict y_hat = auto_sklearn.predict(X_test) # Show results auto_sklearn.cv_results_ auto_sklearn.sprint_statistics() auto_sklearn.show_models() auto_sklearn.get_models_with_weights() # TPOT from tpot import TPOTRegressor tpot_config = { "sklearn.linear_model.Ridge": {}, "sklearn.ensemble.RandomForestClassifier": {}, "sklearn.ensemble.ExtraTreesClassifier": {}, "sklearn.ensemble.GradientBoostingClassifier": {}, } auto_tpot = TPOTRegressor(generations=100, population_size=100,