def test_mixed_column_type(): train_data = load_pd.load('https://autogluon-text.s3-accelerate.amazonaws.com/' 'glue/sts/train.parquet') dev_data = load_pd.load('https://autogluon-text.s3-accelerate.amazonaws.com/' 'glue/sts/dev.parquet') rng_state = np.random.RandomState(123) train_perm = rng_state.permutation(len(train_data)) valid_perm = rng_state.permutation(len(dev_data)) train_data = train_data.iloc[train_perm[:100]] dev_data = dev_data.iloc[valid_perm[:10]] # Add more columns as feature train_data = pd.DataFrame({'sentence1': train_data['sentence1'], 'sentence2': train_data['sentence2'], 'sentence3': train_data['sentence2'], 'categorical0': train_data['genre'], 'numerical0': train_data['score'], 'genre': train_data['genre'], 'score': train_data['score']}) dev_data = pd.DataFrame({'sentence1': dev_data['sentence1'], 'sentence2': dev_data['sentence2'], 'sentence3': dev_data['sentence2'], 'categorical0': dev_data['genre'], 'numerical0': dev_data['score'], 'genre': dev_data['genre'], 'score': dev_data['score']}) # Train Regression predictor1 = task.fit(train_data, hyperparameters=test_hyperparameters, label='score', num_trials=1, verbosity=4, ngpus_per_trial=1, output_directory='./sts_score', plot_results=False) dev_rmse = predictor1.evaluate(dev_data, metrics=['rmse']) dev_prediction = predictor1.predict(dev_data) # Tran Classification predictor2 = task.fit(train_data, hyperparameters=test_hyperparameters, label='genre', num_trials=1, verbosity=4, ngpus_per_trial=1, output_directory='./sts_genre', plot_results=False) dev_rmse = predictor2.evaluate(dev_data, metrics=['acc']) dev_prediction = predictor2.predict(dev_data) # Specify the feature column predictor3 = task.fit(train_data, hyperparameters=test_hyperparameters, feature_columns=['sentence1', 'sentence3', 'categorical0'], label='score', num_trials=1, verbosity=4, ngpus_per_trial=1, output_directory='./sts_score', plot_results=False) dev_rmse = predictor1.evaluate(dev_data, metrics=['rmse']) dev_prediction = predictor1.predict(dev_data)
def prepare_data(config, dataset): print('#################') print('Config:') print(config.__json__()) print() print('Dataset:') print(dataset.__dict__) print('#################') metrics_mapping = dict(acc=metrics.accuracy, auc=metrics.roc_auc, f1=metrics.f1, logloss=metrics.log_loss, mae=metrics.mean_absolute_error, mse=metrics.mean_squared_error, r2=metrics.r2) perf_metric = metrics_mapping[ config.metric] if config.metric in metrics_mapping else None if perf_metric is None: # TODO: figure out if we are going to blindly pass metrics through, or if we use a strict mapping log.warning("Performance metric %s not supported.", config.metric) # un = dataset.train.path # print(un) # raw_data = loadarff(un) # df_data = pd.DataFrame(raw_data[0]) X_train = dataset.train.X y_train = dataset.train.y X_test = dataset.test.X y_test = dataset.test.y X_train = pd.DataFrame(X_train) X_test = pd.DataFrame(X_test) # Save and load data to remove any pre-set dtypes, we want to observe performance from worst-case scenario: raw csv save_pd.save(path='tmp/tmp_file_train.csv', df=X_train) X_train = load_pd.load(path='tmp/tmp_file_train.csv') save_pd.save(path='tmp/tmp_file_test.csv', df=X_test) X_test = load_pd.load(path='tmp/tmp_file_test.csv') is_classification = config.type == 'classification' if is_classification: unique_vals = np.unique(y_train) if len(unique_vals) == 2: problem_type = BINARY else: problem_type = MULTICLASS else: problem_type = REGRESSION return X_train, y_train, X_test, y_test, problem_type, perf_metric
def test_no_job_finished_raise(): train_data = load_pd.load('https://autogluon-text.s3-accelerate.amazonaws.com/' 'glue/sst/train.parquet') dev_data = load_pd.load('https://autogluon-text.s3-accelerate.amazonaws.com/' 'glue/sst/dev.parquet') with pytest.raises(RuntimeError): # Setting a very small time limits to trigger the bug predictor = task.fit(train_data, hyperparameters=test_hyperparameters, label='label', num_trials=1, ngpus_per_trial=0, verbosity=4, time_limits=10, output_directory='./sst_raise', plot_results=False)
def preprocess_openml_input(path, framework_suffix=None, framework_rename_dict=None, folds_to_keep=None): raw_input = load_pd.load(path) raw_input = _rename_openml_columns(raw_input) if framework_rename_dict is not None: for key in framework_rename_dict.keys(): raw_input[FRAMEWORK] = [ framework_rename_dict[key] if framework[0] == key else framework[0] for framework in zip(raw_input[FRAMEWORK]) ] if framework_suffix is not None: raw_input[FRAMEWORK] = [ framework[0] + framework_suffix for framework in zip(raw_input[FRAMEWORK]) ] with_prob_type_input = _infer_problem_type(raw_input) fixed_input = _fix_results(with_prob_type_input) fixed_input[METRIC_ERROR] = [ 1 - result if ptype == BINARY else result for result, ptype in zip( fixed_input[METRIC_SCORE], fixed_input[PROBLEM_TYPE]) ] cleaned_input = preprocess_utils.clean_result(fixed_input, folds_to_keep=folds_to_keep, remove_invalid=False) return cleaned_input
def test_sts(): train_data = load_pd.load( 'https://autogluon-text.s3-accelerate.amazonaws.com/glue/sts/train.parquet') dev_data = load_pd.load( 'https://autogluon-text.s3-accelerate.amazonaws.com/glue/sts/dev.parquet') rng_state = np.random.RandomState(123) train_perm = rng_state.permutation(len(train_data)) valid_perm = rng_state.permutation(len(dev_data)) train_data = train_data.iloc[train_perm[:100]] dev_data = dev_data.iloc[valid_perm[:10]] predictor = task.fit(train_data, hyperparameters=test_hyperparameters, label='score', num_trials=1, verbosity=4, ngpus_per_trial=1, output_directory='./sts', plot_results=False) dev_rmse = predictor.evaluate(dev_data, metrics=['rmse']) dev_prediction = predictor.predict(dev_data)
def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'input/prepared/openml/' results_dir_output = results_dir + 'output/openml/ablation/' results_raw = load_pd.load(path=[ results_dir_input + 'openml_core.csv', results_dir_input + 'openml_autogluon_ablation.csv' ]) frameworks_1h = [ 'autogluon_1h', 'autogluon_nostack_1h', 'autogluon_nobag_1h', 'autogluon_norepeatbag_1h', 'autogluon_nonn_1h', # 'autogluon_noknn_1h', ] frameworks_4h = [ 'autogluon_4h', 'autogluon_nostack_4h', 'autogluon_nobag_4h', 'autogluon_norepeatbag_4h', 'autogluon_nonn_4h', # 'autogluon_noknn_4h', ] run_path_prefix_list = ['1h/', '4h/', 'combined/'] frameworks_compare_vs_all_list = [['autogluon_1h'], ['autogluon_4h'], ['autogluon_1h', 'autogluon_4h']] frameworks_run_list = [ frameworks_1h, frameworks_4h, frameworks_1h + frameworks_4h ] folds_to_keep_list = [[0], [0], [0]] banned_datasets = [] num_runs = len(run_path_prefix_list) for i in range(num_runs): run_path_prefix = run_path_prefix_list[i] frameworks_compare_vs_all = frameworks_compare_vs_all_list[i] frameworks_run = frameworks_run_list[i] folds_to_keep = folds_to_keep_list[i] results_ranked, results_ranked_by_dataset, results_ranked_all, results_ranked_by_dataset_all, results_pairs_merged_dict = evaluate_results.evaluate( results_raw=results_raw, frameworks=frameworks_run, banned_datasets=banned_datasets, folds_to_keep=folds_to_keep, columns_to_agg_extra=[ # TIME_INFER_S, 'acc', 'auc', 'logloss' ], frameworks_compare_vs_all=frameworks_compare_vs_all, output_dir=results_dir_output + run_path_prefix, )
def run_single_vs(results_dir_input, filename, col_name_comparison_str, framework_name_map=SYSTEM_NAMES): results_dir_output = results_dir_input + 'tex/' pairwise_vs_df = load_pd.load(results_dir_input + 'pairwise/' + filename + '.csv') textable_file = results_dir_output + 'pairwise/' + filename + ".tex" textab = generate_tex_pairwise_vs_table(pairwise_vs_df, col_name_comparison_str=col_name_comparison_str, framework_name_map=framework_name_map) os.makedirs(os.path.dirname(textable_file), exist_ok=True) with open(textable_file, 'w') as tf: tf.write(textab) print("saved tex table to: %s" % textable_file)
def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'input/prepared/openml/' results_dir_output = results_dir + 'output/openml/core_1h_vs_4h/' results_raw = load_pd.load(path=results_dir_input + 'openml_core.csv') frameworks = [ 'autogluon', 'GCPTables', 'H2OAutoML', 'autosklearn', 'TPOT', 'AutoWEKA', ] folds_to_keep = [0] banned_datasets = [] full_results_pairs_merged_dict = {} for framework in frameworks: run_path_prefix = framework + '/' framework_1h = framework + '_1h' framework_4h = framework + '_4h' results_ranked, results_ranked_by_dataset, results_ranked_all, results_ranked_by_dataset_all, results_pairs_merged_dict = evaluate_results.evaluate( results_raw=results_raw, frameworks=[framework_1h, framework_4h], banned_datasets=banned_datasets, folds_to_keep=folds_to_keep, columns_to_agg_extra=[ # TIME_INFER_S, 'acc', 'auc', 'logloss' ], frameworks_compare_vs_all=[framework_4h], output_dir=results_dir_output + run_path_prefix, ) full_results_pairs_merged_dict.update(results_pairs_merged_dict) dfs = [] for framework in frameworks: framework_1h = framework + '_1h' framework_4h = framework + '_4h' cur_df = full_results_pairs_merged_dict[framework_4h] cur_df = cur_df[cur_df[FRAMEWORK] == framework_1h] cur_columns = list(cur_df.columns) cur_columns[1] = '> 4h' cur_columns[2] = '< 4h' cur_columns[3] = '= 4h' cur_df.columns = cur_columns dfs.append(cur_df) df_final = pd.concat(dfs, ignore_index=True) print(df_final) save_pd.save(path=results_dir_output + 'pairwise/1h_vs_4h.csv', df=df_final)
def run_single(results_dir_input, framework_compare_vs_all, drop_columns=None, framework_name_map=SYSTEM_NAMES, suffix=''): input_openml = results_dir_input + 'pairwise/' + framework_compare_vs_all + '.csv' results_dir_output = results_dir_input + 'tex/' pairwise_df = load_pd.load(input_openml) textab = generate_tex_pairwise_table(pairwise_df=pairwise_df, framework_compare_vs_all=framework_compare_vs_all, drop_columns=drop_columns, framework_name_map=framework_name_map) textable_file = results_dir_output + 'pairwise/' + framework_compare_vs_all + suffix + ".tex" os.makedirs(os.path.dirname(textable_file), exist_ok=True) with open(textable_file, 'w') as tf: tf.write(textab) print("saved tex table to: %s" % textable_file)
def test_mrpc(): train_data = load_pd.load( 'https://autogluon-text.s3-accelerate.amazonaws.com/glue/mrpc/train.parquet' ) dev_data = load_pd.load( 'https://autogluon-text.s3-accelerate.amazonaws.com/glue/mrpc/dev.parquet' ) train_data = train_data.iloc[:100] dev_data = dev_data.iloc[:10] predictor = task.fit(train_data, hyperparameters=test_hyperparameters, label='label', num_trials=1, verbosity=4, ngpus_per_trial=1, output_directory='./mrpc', plot_results=False) dev_acc = predictor.evaluate(dev_data, metrics=['acc']) dev_prediction = predictor.predict(dev_data) dev_pred_prob = predictor.predict_proba(dev_data)
def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'input/prepared/openml/' results_dir_output = results_dir + 'output/openml/core/' results_raw = load_pd.load(path=results_dir_input + 'openml_core.csv') frameworks_1h = [ 'autogluon_1h', 'GCPTables_1h', 'H2OAutoML_1h', 'autosklearn_1h', 'TPOT_1h', 'AutoWEKA_1h', ] frameworks_4h = [ 'autogluon_4h', 'GCPTables_4h', 'H2OAutoML_4h', 'autosklearn_4h', 'TPOT_4h', 'AutoWEKA_4h', ] run_path_prefix_list = ['1h/', '4h/'] frameworks_compare_vs_all_list = [['autogluon_1h'], ['autogluon_4h']] frameworks_run_list = [frameworks_1h, frameworks_4h] folds_to_keep_list = [[0], [0]] banned_datasets = [] num_runs = len(run_path_prefix_list) for i in range(num_runs): run_path_prefix = run_path_prefix_list[i] frameworks_compare_vs_all = frameworks_compare_vs_all_list[i] frameworks_run = frameworks_run_list[i] folds_to_keep = folds_to_keep_list[i] results_ranked, results_ranked_by_dataset, results_ranked_all, results_ranked_by_dataset_all, results_pairs_merged_dict = evaluate_results.evaluate( results_raw=results_raw, frameworks=frameworks_run, banned_datasets=banned_datasets, folds_to_keep=folds_to_keep, columns_to_agg_extra=[ # TIME_INFER_S, 'acc', 'auc', 'logloss' ], frameworks_compare_vs_all=frameworks_compare_vs_all, output_dir=results_dir_output + run_path_prefix, )
def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'output/' results_dir_output = results_dir + 'output/combined/4h/tables/' input_openml = results_dir_input + 'openml/core/4h/results_ranked_by_dataset_all.csv' input_kaggle = results_dir_input + 'kaggle/4h/results_ranked_by_dataset_all.csv' results_ranked_by_dataset_all = load_pd.load([input_openml, input_kaggle]) print(results_ranked_by_dataset_all) result = generate_charts.compute_dataset_framework_df(results_ranked_by_dataset_all) print(result) save_pd.save(path=results_dir_output + 'dataset_x_framework.csv', df=result)
def aggregate(path_prefix: str, contains=None): bucket, prefix = s3_utils.s3_path_to_bucket_prefix(path_prefix) objects = list_bucket_prefix_suffix_contains_s3( bucket=bucket, prefix=prefix, suffix='scores/results.csv', contains=contains) print(objects) paths_full = [ s3_utils.s3_bucket_prefix_to_path(bucket=bucket, prefix=file, version='s3') for file in objects ] print(paths_full) df = load_pd.load(paths_full) print(df) return df
def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'input/prepared/openml/' results_dir_output = results_dir + 'output/openml/accuracy/' results_raw = load_pd.load( path=[ results_dir_input + 'openml_core.csv', results_dir_input + 'openml_autopilot.csv' ], worker_count=1 ) valid_frameworks = [ 'autogluon_1h', 'GCPTables_1h', 'H2OAutoML_1h', 'autosklearn_1h', 'TPOT_1h', 'AutoWEKA_1h', 'AutoPilot_1h', ] results_raw[METRIC_SCORE] = results_raw['acc'] results_raw[METRIC_ERROR] = 1 - results_raw[METRIC_SCORE] run_path_prefix = '1h/' banned_datasets = [] folds_to_keep = [0] results_ranked, results_ranked_by_dataset, results_ranked_all, results_ranked_by_dataset_all, results_pairs_merged_dict = evaluate_results.evaluate( results_raw=results_raw, frameworks=valid_frameworks, banned_datasets=banned_datasets, folds_to_keep=folds_to_keep, columns_to_agg_extra=[ # TIME_INFER_S, 'acc', ], frameworks_compare_vs_all=['autogluon_1h', 'AutoPilot_1h'], output_dir=results_dir_output + run_path_prefix, )
def preprocess_kaggle_input(path, framework_suffix=None, framework_rename_dict=None): raw_input = load_pd.load(path) raw_input = _rename_kaggle_input(raw_input) raw_input[FOLD] = 0 if METRIC_SCORE not in raw_input.columns: raw_input[METRIC_SCORE] = -raw_input[METRIC_ERROR] if framework_rename_dict is not None: for key in framework_rename_dict.keys(): raw_input[FRAMEWORK] = [ framework_rename_dict[key] if framework[0] == key else framework[0] for framework in zip(raw_input[FRAMEWORK]) ] if framework_suffix is not None: raw_input[FRAMEWORK] = [ framework[0] + framework_suffix for framework in zip(raw_input[FRAMEWORK]) ] cleaned_input = preprocess_utils.clean_result(raw_input, folds_to_keep=[0]) return cleaned_input
def generate_tex_datasetXframework_table(results_dir_input, time_limit, method_order=None): """ # generate datasets x frameworks raw data dumps """ results_dir_output = results_dir_input + 'tex/' results_raw = load_pd.load( path=[ results_dir_input + 'results_ranked_by_dataset_all.csv', ] ) if method_order is None: method_order = ['AutoWEKA', 'autosklearn','TPOT', 'H2OAutoML','GCPTables','autogluon'] metric_error_df = generate_charts.compute_dataset_framework_df(results_raw) print("metric_error_df:") print(metric_error_df.head()) metric_error_df[DATASET] = pd.Series([x[:17] for x in list(metric_error_df[DATASET])]) df_ordered = metric_error_df.set_index(DATASET) df_ordered = df_ordered[[meth+"_"+time_limit for meth in method_order]].copy() df_ordered.rename(columns={'dataset': 'Dataset'},inplace=True) df_ordered.rename(columns=NOTIME_NAMES,inplace=True) # save_pd.save(path=results_dir_output + "openml_datasetsXframeworks_"+time_limit+".csv", df=df_ordered) textable_file = results_dir_output + "openml_alllosses_"+time_limit+".tex" if not os.path.exists(results_dir_output): os.makedirs(results_dir_output) tex_table.tex_table(df_ordered,textable_file,bold = 'min',nan_char =" x ",max_digits=5)
def get_predictions(problem_type, weka_file, class_prefix, labels_are_int, eval_metric): # Load predictions: if not os.path.exists(weka_file): raise ValueError("AutoWEKA failed producing any prediction.") if problem_type in [BINARY, MULTICLASS]: # Load classification predictions: # class_labels = sorted(list(set(labels_train))) # class_order = [''] * len(class_labels) # will contain ordering of classes # remaining_classes = class_labels[:] # classes whose index we don't know yet with open(weka_file, 'r') as weka_file_io: probabilities = [] predictions = [] truth = [] for line in weka_file_io.readlines()[1:-1]: inst, actual, predicted, error, *distribution = line.split(',') pred_probabilities = [float(pred_probability.replace('*', '').replace('\n', '')) for pred_probability in distribution] _, pred = predicted.split(':') _, tru = actual.split(':') pred = pred[pred.startswith(class_prefix) and len(class_prefix):] if labels_are_int: pred = int(pred) probabilities.append(pred_probabilities) predictions.append(pred) truth.append(tru) class_index = np.argmax(pred_probabilities) """ # Old code to compute class order: if pred in remaining_classes: remaining_classes.remove(pred) class_order[class_index] = pred elif class_order[class_index] != pred: raise ValueError("Class ordering cannot be determined due to ordering error") """ """ # Old code to compute class order: if len(remaining_classes) > 1: raise ValueError("Class ordering cannot be determined because not all classes were predicted") elif len(remaining_classes) == 1: if '' not in class_order: raise ValueError("Class ordering cannot be determined due to error in remaining_classes") else: remain_idx = class_order.index('') class_order[remain_idx] = remaining_classes[0] """ y_pred = pd.Series(predictions) y_prob = np.array(probabilities).astype('float') if eval_metric == 'log_loss': # ensure there are no probabilities = 0 which may cause infinite loss. EPS = 1e-8 for i in range(len(y_prob)): prob_i = y_prob[i] extra_prob = 0.0 # additional probability mass. for j in range(len(prob_i)): if prob_i[j] == 0.0: prob_i[j] = EPS extra_prob += EPS while extra_prob > 0: ind = np.argmax(prob_i) ind_prob = prob_i[ind] if ind_prob > extra_prob: prob_i[ind] = ind_prob - extra_prob extra_prob = 0 else: prob_i[ind] = ind_prob - EPS extra_prob -= EPS y_probsums = np.sum(y_prob, axis=1) y_prob = y_prob / y_probsums[:,None] # ensure all probs sum to 1 elif problem_type == REGRESSION: # Load regression predictions: pred_df = load_pd.load(weka_file) y_pred = pred_df['predicted'] y_prob = None # class_order = None else: raise ValueError("Unknown problem_type specified") return (y_pred, y_prob)
def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'input/prepared/kaggle/' output_prefix = 'output/kaggle/' raw_kaggle_file = 'results_kaggle_wpercentile.csv' results_raw = load_pd.load(path=[ results_dir_input + 'kaggle_core.csv', ]) # First generate datasets x frameworks raw data dumps: metrics = ['LEADER_PERCENTILE', METRIC_SCORE] dataset_order = [ 'house-prices-advanced-regression-techniques', 'mercedes-benz-greener-manufacturing', 'santander-value-prediction-challenge', 'allstate-claims-severity', 'bnp-paribas-cardif-claims-management', 'santander-customer-transaction-prediction', 'santander-customer-satisfaction', 'porto-seguro-safe-driver-prediction', 'ieee-fraud-detection', 'walmart-recruiting-trip-type-classification', 'otto-group-product-classification-challenge' ] dataset_order = [KAGGLE_ABBREVS[dat] for dat in dataset_order] method_order = [ 'AutoWEKA', 'autosklearn', 'TPOT', 'H2OAutoML', 'GCPTables', 'autogluon' ] time_limits = ['4h', '8h'] results_raw2 = results_raw.drop(METRIC_ERROR, axis=1).copy() results_raw2['LEADER_PERCENTILE'] = 1 - results_raw2[ 'LEADER_PERCENTILE'] # convert to actual percentile results_raw2.rename(columns={'LEADER_PERCENTILE': METRIC_ERROR}, inplace=True) # loss_df = generate_charts.compute_dataset_framework_df(results_raw) # values = losses percentile_df = generate_charts.compute_dataset_framework_df(results_raw2) for time_limit in time_limits: methods_t = [meth + "_" + time_limit for meth in method_order] df_time = percentile_df[[DATASET] + methods_t].copy() df_time[DATASET] = df_time[DATASET].map(KAGGLE_ABBREVS) df_ordered = df_time.set_index(DATASET) df_ordered = df_ordered.reindex(dataset_order) # df_ordered.reset_index(inplace=True) # df_ordered.rename(columns={'dataset': 'Dataset'},inplace=True) df_ordered.rename(columns=NOTIME_NAMES, inplace=True) save_pd.save(path=results_dir + output_prefix + time_limit + "/datasetsXframeworks.csv", df=df_ordered) textable_file = results_dir + output_prefix + time_limit + "/allpercentiles.tex" tex_table.tex_table(df_ordered, textable_file, bold='max', nan_char=" x ", max_digits=5) # Next do pairwise comparisons: num_frameworks = 6 valid_frameworks = [ 'autogluon_4h', 'GCPTables_4h', 'autosklearn_4h', 'H2OAutoML_4h', 'TPOT_4h', 'AutoWEKA_4h', 'autogluon_8h', 'GCPTables_8h', 'H2OAutoML_8h', 'autosklearn_8h', 'TPOT_8h', 'AutoWEKA_8h', ] frameworks_compare_vs_all_list = [ 'autogluon_4h', 'autogluon_8h', 'autogluon_4h', 'autogluon_8h' ] results_dir_output_list = [ '4h/', '8h/', 'allVautogluon_4h/', 'allVautogluon_8h/' ] results_dir_output_list = [ results_dir + output_prefix + name for name in results_dir_output_list ] framework_compare_ind_list = [ # list of lists, each corresponding to indices of valid_frameworks that should be compared in a single table. list(range(num_frameworks)), list(range(num_frameworks, num_frameworks * 2)), range(num_frameworks * 2), range(num_frameworks * 2), ] for i in range(len(results_dir_output_list)): results_dir_output = results_dir_output_list[i] frameworks_to_compare = [ valid_frameworks[j] for j in framework_compare_ind_list[i] ] framework_compare_vs_all = frameworks_compare_vs_all_list[i] results_ranked, results_ranked_by_dataset, results_ranked_all, results_ranked_by_dataset_all, results_pairs_merged_dict = evaluate_results.evaluate( results_raw=results_raw, frameworks=frameworks_to_compare, banned_datasets=[], folds_to_keep=None, frameworks_compare_vs_all=[framework_compare_vs_all], output_dir=results_dir_output, columns_to_agg_extra=['LEADER_PERCENTILE'], ) textab = tex_pairwise_table(results_dir_output, framework_compare_vs_all) # Generate plots: producePlots(time_limits, results_dir, raw_kaggle_file)
# Set arguments: output_directory = 'autosklearn_models/' # where to save trained models train_file = 'https://autogluon.s3-us-west-2.amazonaws.com/datasets/Inc/train.csv' test_file = 'https://autogluon.s3-us-west-2.amazonaws.com/datasets/Inc/test.csv' predict_proba = False pred_class_and_proba = True runtime_sec = 120 num_cores = None # Specify prediction problem: label_column = 'class' # specifies which column do we want to predict problem_type = BINARY eval_metric = 'roc_auc' # Load data: train_data = load_pd.load( train_file) # can be local CSV file as well, returns Pandas DataFrame train_data = train_data.head(500) # subsample for faster demo print(train_data.head()) test_data = load_pd.load( test_file) # can be local CSV file as well, returns Pandas DataFrame y_test = test_data[label_column] test_data.drop( [label_column], axis=1, inplace=True ) # If you do not remove test-data labels, then predictAutoSklearn() may return less predictions than datapoints (preprocessing filters out rowws with badly-formatted labels) # Run auto-sklearn: autosk = AutoSklearnBaseline() num_models_trained, num_models_ensemble, fit_time = autosk.fit( train_data=train_data,
def gcptables_fit_predict(train_data, test_data, dataset_name, label_column, problem_type, output_directory, gcp_info, eval_metric=None, runtime_sec=3600, fit_model=True, model_name=None, make_predictions=True): """ Use GCP AutoML tables for both fitting and prediction. Returns all outputs of AbstractBaseline.fit(), AbstractBaseline.predict() as one big tuple, with one final element: class_order Also takes in the same arguments as these methods, except for num_cores. Other Args: dataset_name: str Name GCP data and outputs will be stored in GCS Storage Bucket under this name, should be unique for every GCP run on a new dataset. gcp_info: dict of critical informtion regarding GCP configuration, project, and access keys. fit_model: bool indicating whether or not to actually fit models using GCP AutoML Tables. If a previous run of this function crashed after the model had been trained, then you just produce predictions via: fit_model = False. Similarly, you can set this False in order to get predictions in a separate process from the fit() call. When False, you must specify: model_name as the string corresponding to the model.name entry from previous fit(), but without the project/path prefix (this thus matches the display name of the model in the GCP console). make_predictions: bool indicating whether or not we should return after fit() without making predictions. Note: For classification, your class labels cannot end with suffix: '_score' """ train_data = train_data.copy() test_data = test_data.copy() # Reformat column names to only contain alphanumeric characters: label_column_index = train_data.columns.get_loc(label_column) train_data.columns = [ re.sub(r'\W+', '_', col) for col in train_data.columns.tolist() ] # ensure alphanumeric-only column-names test_data.columns = [ re.sub(r'\W+', '_', col) for col in test_data.columns.tolist() ] # ensure alphanumeric-only column-names label_column = train_data.columns[ label_column_index] # re-assign as it may have changed train_data[id_column] = list(train_data.index) test_data[id_column] = list(test_data.index) data_colnames = list(set(train_data.columns)) # Drop test labels if they exist: if label_column in test_data.columns: test_data = test_data.drop([label_column], axis=1) og_dataset_name = dataset_name dataset_name = re.sub( r'\W+', '_', dataset_name) # Ensure GCP will not complain about names dataset_name = dataset_name[:(GCP_DISPLAY_NAME_MAXCHARS - len(GCP_MODEL_PREFIX))] if model_name is None: model_display_name = GCP_MODEL_PREFIX + dataset_name else: model_display_name = model_name if og_dataset_name != dataset_name: print("GCP will complain about provided dataset_name, renamed to: %s" % dataset_name) PROJECT_ID = gcp_info['PROJECT_ID'] BUCKET_NAME = gcp_info['BUCKET_NAME'] COMPUTE_REGION = gcp_info['COMPUTE_REGION'] GOOGLE_APPLICATION_CREDENTIALS = gcp_info['GOOGLE_APPLICATION_CREDENTIALS'] num_models_trained = None num_models_ensemble = None fit_time = None y_pred = None y_prob = None predict_time = None class_order = None if len(train_data) < 1000: raise ValueError( "GCP AutoML tables can only be trained on datasets with >= 1000 rows" ) # Create GCP clients: storage_client = storage.Client.from_service_account_json( GOOGLE_APPLICATION_CREDENTIALS) bucket = storage_client.get_bucket(BUCKET_NAME) credentials = service_account.Credentials.from_service_account_file( GOOGLE_APPLICATION_CREDENTIALS) automl_client = automl.AutoMlClient(credentials=credentials) tables_client = automl.TablesClient(project=PROJECT_ID, region=COMPUTE_REGION, credentials=credentials) if not os.path.exists(output_directory): os.makedirs(output_directory) # Upload training data to GCS: gcs_train_path = dataset_name + "/" + GCS_TRAIN_FILENAME # target file-name train_file_exists = storage.Blob( bucket=bucket, name=gcs_train_path).exists(storage_client) if not train_file_exists: print('Uploading training data') train_file_path = output_directory + GCS_TRAIN_FILENAME train_data.to_csv( train_file_path, index=False) # write reformatted train-data to CSV file. # Upload to GCS: blob = bucket.blob(gcs_train_path) blob.upload_from_filename(train_file_path) else: # need to rename columns anyway to process predictions. print('Training data already uploaded') # Upload test data: gcs_test_path = dataset_name + "/" + GCS_TEST_FILENAME # target file-name test_file_exists = storage.Blob(bucket=bucket, name=gcs_test_path).exists(storage_client) if not test_file_exists: print('Uploading test data') test_file_path = output_directory + GCS_TEST_FILENAME test_data.to_csv( test_file_path, index=False) # write reformatted test-data to CSV file. # Upload to GCS: blob = bucket.blob(gcs_test_path) blob.upload_from_filename(test_file_path) else: print('Test data already uploaded') if not train_file_exists: os.remove(train_file_path) if not test_file_exists: os.remove(test_file_path) # print("train_data.columns", train_data.columns) # print("test_data.columns", test_data.columns) # TODO remove # Use AutoML-Tables to fit models with training data: dataset = tables_client.create_dataset(dataset_display_name=dataset_name) tables_dataset_name = dataset.name import_data_response = tables_client.import_data( dataset=dataset, gcs_input_uris=GCS_PREFIX + BUCKET_NAME + "/" + gcs_train_path) print('Dataset import operation: {}'.format( import_data_response.operation)) print('Dataset import response: {}'.format(import_data_response.result()) ) # print ensures block until dataset has been uploaded. list_table_specs_response = tables_client.list_table_specs(dataset=dataset) table_specs = [s for s in list_table_specs_response] print(table_specs) # list_column_specs_response = tables_client.list_column_specs(dataset=dataset) # column_specs = [s for s in list_column_specs_response] # label_spec = [column_specs[i] for i in range(len(column_specs)) if column_specs[i].display_name == label_column] # print(label_spec[0]) # Set label column: if problem_type in [BINARY, MULTICLASS]: type_code = 'CATEGORY' update_column_response = tables_client.update_column_spec( dataset=dataset, column_spec_display_name=label_column, type_code=type_code, nullable=False, ) # ensure label_column is categorical print(update_column_response) update_dataset_response = tables_client.set_target_column( dataset=dataset, column_spec_display_name=label_column, ) print(update_dataset_response) # Fit AutoML Tables: gcp_metric = None # Metric passed to GCP as optimization_objective argument if fit_model: if eval_metric is not None: metrics_map = { # Mapping of benchmark metrics to GCP AutoML Tables metrics: https://googleapis.dev/python/automl/latest/gapic/v1beta1/types.html 'accuracy': 'MINIMIZE_LOG_LOSS', 'f1': 'MAXIMIZE_AU_PRC', 'log_loss': 'MINIMIZE_LOG_LOSS', 'roc_auc': 'MAXIMIZE_AU_ROC', 'balanced_accuracy': 'MAXIMIZE_AU_ROC', 'precision': 'MAXIMIZE_PRECISION_AT_RECALL', 'recall': 'MAXIMIZE_RECALL_AT_PRECISION', 'mean_squared_error': 'MINIMIZE_RMSE', 'median_absolute_error': 'MINIMIZE_MAE', 'mean_absolute_error': 'MINIMIZE_MAE', 'r2': 'MINIMIZE_RMSE', } if eval_metric in metrics_map: gcp_metric = metrics_map[eval_metric] else: warnings.warn( "Unknown metric will not be used by GCP AutoML Tables: %s" % eval_metric) t0 = time.time() model_train_hours = math.ceil(runtime_sec / 3600.) print('Training model for %s hours' % model_train_hours) print('Training model with name: %s' % model_display_name) # TODO FIXME TODO FIXME: # exclude_column_spec_names (Optional[str]) – The list of the names of the columns you want to exclude and not train your model on. # FIXME: ADD AN ID COLUMN create_model_response = tables_client.create_model( model_display_name=model_display_name, dataset=dataset, train_budget_milli_node_hours=model_train_hours * 1000, optimization_objective=gcp_metric, exclude_column_spec_names=[id_column, label_column], ) operation_id = create_model_response.operation.name print('Create GCP model operation: {}'.format( create_model_response.operation)) check_interval = 60 # check for model status updates every check_interval seconds keep_checking = True check_time = time.time() while keep_checking: # and time.time() - t0 <= runtime_sec: # check on current model status if time.time() - check_time > check_interval: api = operations_v1.OperationsClient( channel=automl_client.transport.channel) status_update = api.get_operation(operation_id) print( "Status update on GCP model: \n {}".format(status_update)) print('Time Elapsed: %s of %s' % ((time.time() - t0), runtime_sec)) check_time = time.time() if hasattr(status_update, 'done') and status_update.done: keep_checking = False # Waits until model training is done: model = create_model_response.result() model_name = model.name print("GCP training completed, produced model object with name: %s" % model_name) print( "You can use this trained model for batch prediction by specifying model_name=%s" % model_display_name) print(model) t1 = time.time() fit_time = t1 - t0 print("GCP Tables Model fit complete, runtime: %s" % fit_time) print("GCP model name = %s" % model_name) else: #skip model fitting: fit_time = None print( "Skipping GCP Tables Model fit, just using trained model for prediction" ) if model_name is None: raise ValueError( "When fit_model=False, model_name must be specified.") model = tables_client.get_model(model_display_name=model_name) # Automatically-generated held-out validation performance estimates: num_models_trained = -1 num_models_ensemble = -1 summary_list = tables_client.list_model_evaluations(model=model) model_eval_summaries = [s for s in summary_list] if problem_type in [BINARY, MULTICLASS]: log_losses = [ model_eval_summaries[i + 1].classification_evaluation_metrics.log_loss for i in range(len(model_eval_summaries) - 1) ] log_loss = np.mean(np.array(log_losses)) print("Validation log_loss = %s" % log_loss) if problem_type == BINARY: auc_rocs = [ model_eval_summaries[i + 1].classification_evaluation_metrics.au_roc for i in range(len(model_eval_summaries) - 1) ] auc_roc = np.mean(np.array(auc_rocs)) print("Validation AUC_ROC = %s" % auc_roc) if not make_predictions: print( "Skipping predictions, set model_name = %s to use this trained model for prediction later on" % model_name) return num_models_trained, num_models_ensemble, fit_time, y_pred, y_prob, predict_time, class_order # Predict (using batch inference, so no need to deploy model): t2 = time.time() preds_file_prefix = GCS_PREFIX + BUCKET_NAME + "/" + dataset_name + "/pred" batch_predict_response = tables_client.batch_predict( model=model, gcs_input_uris=GCS_PREFIX + BUCKET_NAME + "/" + gcs_test_path, gcs_output_uri_prefix=preds_file_prefix) print('Batch prediction operation: {}'.format( batch_predict_response.operation)) # Wait until batch prediction is done. batch_predict_result = batch_predict_response.result() print(batch_predict_response.metadata) t3 = time.time() predict_time = t3 - t2 # Fetch predictions from GCS bucket to local file: preds_gcs_folder = batch_predict_response.metadata.batch_predict_details.output_info.gcs_output_directory # full path to GCS file containing predictions preds_gcs_filename = 'tables_1.csv' # default file name created by GCP Tables. preds_gcs_file = preds_gcs_folder + "/" + preds_gcs_filename local_preds_file = output_directory + LOCAL_PREDS_FILENAME with open(local_preds_file, 'wb') as file_obj: storage_client.download_blob_to_file(preds_gcs_file, file_obj) # Load predictions into python and format: test_pred_df = load_pd.load(local_preds_file) same_cols = [col for col in test_pred_df.columns if col in data_colnames] keep_cols = [ col for col in test_pred_df.columns if col not in data_colnames ] original_gcp_length = len(test_pred_df) original_test_length = len(test_data) print('test orig:', ) print(test_data) print('before dedupe...') print(test_pred_df) test_pred_df = test_pred_df.drop_duplicates(subset=[ id_column ]) # drop any duplicate rows in predictions before join print('before merge...') print(test_pred_df) test_pred_df = test_data.merge( test_pred_df, on=[id_column], how='left') # un-shuffle the predictions so order matches test data. print('after merge...') print(test_pred_df) test_pred_df = test_pred_df[keep_cols] if len(test_pred_df) != len(test_data): warnings.warn( "GCP failed to produce predictions for some test data rows") print('diff: %s | %s' % (len(test_pred_df), len(test_data))) print('DIFF ORIGINAL:') print(original_test_length) print(original_gcp_length) if problem_type != REGRESSION: gcp_classes = list(test_pred_df.columns) og_classes = list(train_data[label_column].unique()) print('Num Classes orig:', len(og_classes)) print('Num Classes GCP: ', len(gcp_classes)) print('GCP Class Names : ', gcp_classes) print('Original Class Names : ', og_classes) orig_colnames = [ column[(len(label_column) + 1):-len('_score')] for column in gcp_classes ] print('Original Class Names (Reordered): ', orig_colnames) if len(gcp_classes) != len(og_classes): warnings.warn("GCP AutoML Tables predictions are missing classes") raise AssertionError( 'GCP AutoML did not predict with all classes! GCP returned %s of %s classes!' % (len(gcp_classes), len(og_classes))) test_pred_df.columns = orig_colnames else: test_pred_df.columns = [label_column] if test_pred_df.isnull().values.any( ): # Some missing predictions exist that need to be imputed. test_pred_df = impute_dummy_predictor(test_pred_df=test_pred_df, train_data=train_data, label_column=label_column, problem_type=problem_type) if problem_type == REGRESSION: if len(keep_cols) != 1: warnings.warn( "GCP AutoML Tables regression predictions are incorrectly formatted" ) print('keep_cols:', keep_cols) raise AssertionError( 'GCP AutoML did not return a valid regression prediction! GCP returned %s of %s classes!' % (len(keep_cols), 1)) y_pred = test_pred_df[label_column] y_prob = None return num_models_trained, num_models_ensemble, fit_time, y_pred, y_prob, predict_time, class_order else: y_pred = test_pred_df.idxmax(axis=1) class_order = list(test_pred_df.columns) y_prob = np.array(test_pred_df) return num_models_trained, num_models_ensemble, fit_time, y_pred, y_prob, predict_time, class_order
def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'input/prepared/openml/' results_dir_output = results_dir + 'output/openml/orig_vs_core10fold/' results_raw = load_pd.load(path=[ results_dir_input + 'openml_core.csv', results_dir_input + 'openml_original.csv', ]) frameworks_1h = [ 'H2OAutoML_1h', 'autosklearn_1h', 'TPOT_1h', 'AutoWEKA_1h', ] frameworks_4h = [ 'H2OAutoML_4h', 'autosklearn_4h', 'TPOT_4h', 'AutoWEKA_4h', ] frameworks_run_list = [frameworks_1h, frameworks_4h] folds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] folds_to_keep_list = [folds, folds] banned_datasets_list = [DATASETS_LARGE, []] num_runs = len(frameworks_run_list) full_results_pairs_merged_dict = {} for i in range(num_runs): frameworks_run = frameworks_run_list[i] folds_to_keep = folds_to_keep_list[i] banned_datasets = banned_datasets_list[i] for framework in frameworks_run: run_path_prefix = framework + '/' orig_framework = 'orig_' + framework results_ranked, results_ranked_by_dataset, results_ranked_all, results_ranked_by_dataset_all, results_pairs_merged_dict = evaluate_results.evaluate( results_raw=results_raw, frameworks=[framework, orig_framework], banned_datasets=banned_datasets, folds_to_keep=folds_to_keep, columns_to_agg_extra=[ # TIME_INFER_S, 'acc', 'auc', 'logloss' ], frameworks_compare_vs_all=[orig_framework], output_dir=results_dir_output + run_path_prefix, ) full_results_pairs_merged_dict.update(results_pairs_merged_dict) dfs = [] frameworks_full = frameworks_1h + frameworks_4h for framework in frameworks_full: orig_framework = 'orig_' + framework cur_df = full_results_pairs_merged_dict[orig_framework] cur_df = cur_df[cur_df[FRAMEWORK] == framework] cur_columns = list(cur_df.columns) cur_columns[1] = '> Original' cur_columns[2] = '< Original' cur_columns[3] = '= Original' cur_df.columns = cur_columns dfs.append(cur_df) df_final = pd.concat(dfs, ignore_index=True) print(df_final) save_pd.save(path=results_dir_output + 'pairwise/new_vs_old.csv', df=df_final)