def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'input/prepared/openml/' results_dir_output = results_dir + 'output/openml/ablation/' results_raw = load_pd.load(path=[ results_dir_input + 'openml_core.csv', results_dir_input + 'openml_autogluon_ablation.csv' ]) frameworks_1h = [ 'autogluon_1h', 'autogluon_nostack_1h', 'autogluon_nobag_1h', 'autogluon_norepeatbag_1h', 'autogluon_nonn_1h', # 'autogluon_noknn_1h', ] frameworks_4h = [ 'autogluon_4h', 'autogluon_nostack_4h', 'autogluon_nobag_4h', 'autogluon_norepeatbag_4h', 'autogluon_nonn_4h', # 'autogluon_noknn_4h', ] run_path_prefix_list = ['1h/', '4h/', 'combined/'] frameworks_compare_vs_all_list = [['autogluon_1h'], ['autogluon_4h'], ['autogluon_1h', 'autogluon_4h']] frameworks_run_list = [ frameworks_1h, frameworks_4h, frameworks_1h + frameworks_4h ] folds_to_keep_list = [[0], [0], [0]] banned_datasets = [] num_runs = len(run_path_prefix_list) for i in range(num_runs): run_path_prefix = run_path_prefix_list[i] frameworks_compare_vs_all = frameworks_compare_vs_all_list[i] frameworks_run = frameworks_run_list[i] folds_to_keep = folds_to_keep_list[i] results_ranked, results_ranked_by_dataset, results_ranked_all, results_ranked_by_dataset_all, results_pairs_merged_dict = evaluate_results.evaluate( results_raw=results_raw, frameworks=frameworks_run, banned_datasets=banned_datasets, folds_to_keep=folds_to_keep, columns_to_agg_extra=[ # TIME_INFER_S, 'acc', 'auc', 'logloss' ], frameworks_compare_vs_all=frameworks_compare_vs_all, output_dir=results_dir_output + run_path_prefix, )
def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'input/prepared/openml/' results_dir_output = results_dir + 'output/openml/core/' results_raw = load_pd.load(path=results_dir_input + 'openml_core.csv') frameworks_1h = [ 'autogluon_1h', 'GCPTables_1h', 'H2OAutoML_1h', 'autosklearn_1h', 'TPOT_1h', 'AutoWEKA_1h', ] frameworks_4h = [ 'autogluon_4h', 'GCPTables_4h', 'H2OAutoML_4h', 'autosklearn_4h', 'TPOT_4h', 'AutoWEKA_4h', ] run_path_prefix_list = ['1h/', '4h/'] frameworks_compare_vs_all_list = [['autogluon_1h'], ['autogluon_4h']] frameworks_run_list = [frameworks_1h, frameworks_4h] folds_to_keep_list = [[0], [0]] banned_datasets = [] num_runs = len(run_path_prefix_list) for i in range(num_runs): run_path_prefix = run_path_prefix_list[i] frameworks_compare_vs_all = frameworks_compare_vs_all_list[i] frameworks_run = frameworks_run_list[i] folds_to_keep = folds_to_keep_list[i] results_ranked, results_ranked_by_dataset, results_ranked_all, results_ranked_by_dataset_all, results_pairs_merged_dict = evaluate_results.evaluate( results_raw=results_raw, frameworks=frameworks_run, banned_datasets=banned_datasets, folds_to_keep=folds_to_keep, columns_to_agg_extra=[ # TIME_INFER_S, 'acc', 'auc', 'logloss' ], frameworks_compare_vs_all=frameworks_compare_vs_all, output_dir=results_dir_output + run_path_prefix, )
def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'input/prepared/openml/' results_dir_output = results_dir + 'output/openml/core_1h_vs_4h/' results_raw = load_pd.load(path=results_dir_input + 'openml_core.csv') frameworks = [ 'autogluon', 'GCPTables', 'H2OAutoML', 'autosklearn', 'TPOT', 'AutoWEKA', ] folds_to_keep = [0] banned_datasets = [] full_results_pairs_merged_dict = {} for framework in frameworks: run_path_prefix = framework + '/' framework_1h = framework + '_1h' framework_4h = framework + '_4h' results_ranked, results_ranked_by_dataset, results_ranked_all, results_ranked_by_dataset_all, results_pairs_merged_dict = evaluate_results.evaluate( results_raw=results_raw, frameworks=[framework_1h, framework_4h], banned_datasets=banned_datasets, folds_to_keep=folds_to_keep, columns_to_agg_extra=[ # TIME_INFER_S, 'acc', 'auc', 'logloss' ], frameworks_compare_vs_all=[framework_4h], output_dir=results_dir_output + run_path_prefix, ) full_results_pairs_merged_dict.update(results_pairs_merged_dict) dfs = [] for framework in frameworks: framework_1h = framework + '_1h' framework_4h = framework + '_4h' cur_df = full_results_pairs_merged_dict[framework_4h] cur_df = cur_df[cur_df[FRAMEWORK] == framework_1h] cur_columns = list(cur_df.columns) cur_columns[1] = '> 4h' cur_columns[2] = '< 4h' cur_columns[3] = '= 4h' cur_df.columns = cur_columns dfs.append(cur_df) df_final = pd.concat(dfs, ignore_index=True) print(df_final) save_pd.save(path=results_dir_output + 'pairwise/1h_vs_4h.csv', df=df_final)
def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'input/prepared/openml/' results_dir_output = results_dir + 'output/openml/orig_vs_core10fold/' results_raw = load_pd.load(path=[ results_dir_input + 'openml_core.csv', results_dir_input + 'openml_original.csv', ]) frameworks_1h = [ 'H2OAutoML_1h', 'autosklearn_1h', 'TPOT_1h', 'AutoWEKA_1h', ] frameworks_4h = [ 'H2OAutoML_4h', 'autosklearn_4h', 'TPOT_4h', 'AutoWEKA_4h', ] frameworks_run_list = [frameworks_1h, frameworks_4h] folds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] folds_to_keep_list = [folds, folds] banned_datasets_list = [DATASETS_LARGE, []] num_runs = len(frameworks_run_list) full_results_pairs_merged_dict = {} for i in range(num_runs): frameworks_run = frameworks_run_list[i] folds_to_keep = folds_to_keep_list[i] banned_datasets = banned_datasets_list[i] for framework in frameworks_run: run_path_prefix = framework + '/' orig_framework = 'orig_' + framework results_ranked, results_ranked_by_dataset, results_ranked_all, results_ranked_by_dataset_all, results_pairs_merged_dict = evaluate_results.evaluate( results_raw=results_raw, frameworks=[framework, orig_framework], banned_datasets=banned_datasets, folds_to_keep=folds_to_keep, columns_to_agg_extra=[ # TIME_INFER_S, 'acc', 'auc', 'logloss' ], frameworks_compare_vs_all=[orig_framework], output_dir=results_dir_output + run_path_prefix, ) full_results_pairs_merged_dict.update(results_pairs_merged_dict) dfs = [] frameworks_full = frameworks_1h + frameworks_4h for framework in frameworks_full: orig_framework = 'orig_' + framework cur_df = full_results_pairs_merged_dict[orig_framework] cur_df = cur_df[cur_df[FRAMEWORK] == framework] cur_columns = list(cur_df.columns) cur_columns[1] = '> Original' cur_columns[2] = '< Original' cur_columns[3] = '= Original' cur_df.columns = cur_columns dfs.append(cur_df) df_final = pd.concat(dfs, ignore_index=True) print(df_final) save_pd.save(path=results_dir_output + 'pairwise/new_vs_old.csv', df=df_final)
def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'input/prepared/kaggle/' output_prefix = 'output/kaggle/' raw_kaggle_file = 'results_kaggle_wpercentile.csv' results_raw = load_pd.load(path=[ results_dir_input + 'kaggle_core.csv', ]) # First generate datasets x frameworks raw data dumps: metrics = ['LEADER_PERCENTILE', METRIC_SCORE] dataset_order = [ 'house-prices-advanced-regression-techniques', 'mercedes-benz-greener-manufacturing', 'santander-value-prediction-challenge', 'allstate-claims-severity', 'bnp-paribas-cardif-claims-management', 'santander-customer-transaction-prediction', 'santander-customer-satisfaction', 'porto-seguro-safe-driver-prediction', 'ieee-fraud-detection', 'walmart-recruiting-trip-type-classification', 'otto-group-product-classification-challenge' ] dataset_order = [KAGGLE_ABBREVS[dat] for dat in dataset_order] method_order = [ 'AutoWEKA', 'autosklearn', 'TPOT', 'H2OAutoML', 'GCPTables', 'autogluon' ] time_limits = ['4h', '8h'] results_raw2 = results_raw.drop(METRIC_ERROR, axis=1).copy() results_raw2['LEADER_PERCENTILE'] = 1 - results_raw2[ 'LEADER_PERCENTILE'] # convert to actual percentile results_raw2.rename(columns={'LEADER_PERCENTILE': METRIC_ERROR}, inplace=True) # loss_df = generate_charts.compute_dataset_framework_df(results_raw) # values = losses percentile_df = generate_charts.compute_dataset_framework_df(results_raw2) for time_limit in time_limits: methods_t = [meth + "_" + time_limit for meth in method_order] df_time = percentile_df[[DATASET] + methods_t].copy() df_time[DATASET] = df_time[DATASET].map(KAGGLE_ABBREVS) df_ordered = df_time.set_index(DATASET) df_ordered = df_ordered.reindex(dataset_order) # df_ordered.reset_index(inplace=True) # df_ordered.rename(columns={'dataset': 'Dataset'},inplace=True) df_ordered.rename(columns=NOTIME_NAMES, inplace=True) save_pd.save(path=results_dir + output_prefix + time_limit + "/datasetsXframeworks.csv", df=df_ordered) textable_file = results_dir + output_prefix + time_limit + "/allpercentiles.tex" tex_table.tex_table(df_ordered, textable_file, bold='max', nan_char=" x ", max_digits=5) # Next do pairwise comparisons: num_frameworks = 6 valid_frameworks = [ 'autogluon_4h', 'GCPTables_4h', 'autosklearn_4h', 'H2OAutoML_4h', 'TPOT_4h', 'AutoWEKA_4h', 'autogluon_8h', 'GCPTables_8h', 'H2OAutoML_8h', 'autosklearn_8h', 'TPOT_8h', 'AutoWEKA_8h', ] frameworks_compare_vs_all_list = [ 'autogluon_4h', 'autogluon_8h', 'autogluon_4h', 'autogluon_8h' ] results_dir_output_list = [ '4h/', '8h/', 'allVautogluon_4h/', 'allVautogluon_8h/' ] results_dir_output_list = [ results_dir + output_prefix + name for name in results_dir_output_list ] framework_compare_ind_list = [ # list of lists, each corresponding to indices of valid_frameworks that should be compared in a single table. list(range(num_frameworks)), list(range(num_frameworks, num_frameworks * 2)), range(num_frameworks * 2), range(num_frameworks * 2), ] for i in range(len(results_dir_output_list)): results_dir_output = results_dir_output_list[i] frameworks_to_compare = [ valid_frameworks[j] for j in framework_compare_ind_list[i] ] framework_compare_vs_all = frameworks_compare_vs_all_list[i] results_ranked, results_ranked_by_dataset, results_ranked_all, results_ranked_by_dataset_all, results_pairs_merged_dict = evaluate_results.evaluate( results_raw=results_raw, frameworks=frameworks_to_compare, banned_datasets=[], folds_to_keep=None, frameworks_compare_vs_all=[framework_compare_vs_all], output_dir=results_dir_output, columns_to_agg_extra=['LEADER_PERCENTILE'], ) textab = tex_pairwise_table(results_dir_output, framework_compare_vs_all) # Generate plots: producePlots(time_limits, results_dir, raw_kaggle_file)
def run(): results_dir = 'data/results/' results_dir_input = results_dir + 'input/prepared/openml/' results_dir_output = results_dir + 'output/openml/accuracy/' results_raw = load_pd.load( path=[ results_dir_input + 'openml_core.csv', results_dir_input + 'openml_autopilot.csv' ], worker_count=1 ) valid_frameworks = [ 'autogluon_1h', 'GCPTables_1h', 'H2OAutoML_1h', 'autosklearn_1h', 'TPOT_1h', 'AutoWEKA_1h', 'AutoPilot_1h', ] results_raw[METRIC_SCORE] = results_raw['acc'] results_raw[METRIC_ERROR] = 1 - results_raw[METRIC_SCORE] run_path_prefix = '1h/' banned_datasets = [] folds_to_keep = [0] results_ranked, results_ranked_by_dataset, results_ranked_all, results_ranked_by_dataset_all, results_pairs_merged_dict = evaluate_results.evaluate( results_raw=results_raw, frameworks=valid_frameworks, banned_datasets=banned_datasets, folds_to_keep=folds_to_keep, columns_to_agg_extra=[ # TIME_INFER_S, 'acc', ], frameworks_compare_vs_all=['autogluon_1h', 'AutoPilot_1h'], output_dir=results_dir_output + run_path_prefix, )