def to_data_frame(file_names, result_objs, varying_params, constant_params, shorten_headers=True): all_params = [merge_dicts(var, constant_params) for var in varying_params] file_numbers = [int(f.split('/')[-1].split('.')[0]) for f in file_names] # remove dollars for param_dict in all_params: for key, val in param_dict.iteritems(): param_dict[key] = remove_dollar(val) param_keys = all_params[0].keys() param_vals = [[v[key] for key in param_keys] for v in all_params] # transform lists to tuples to make them hashable param_vals = [[to_tuple_if_list(v) for v in var_list] for var_list in param_vals] param_vals = np.array(param_vals, dtype=object) test_accs = (1 - get_final_misclasses(result_objs, 'test')) * 100 train_accs = (1 - get_final_misclasses(result_objs, 'train')) * 100 training_times = get_training_times(result_objs) # try adding sample accuracies, might exist, might not sample_accs_exist = (hasattr(result_objs[0], 'monitor_channels') and 'test_sample_misclass' in result_objs[0].monitor_channels) if sample_accs_exist: test_sample_accs = ( 1 - get_final_misclasses(result_objs, 'test_sample')) * 100 train_sample_accs = ( 1 - get_final_misclasses(result_objs, 'train_sample')) * 100 vals_and_misclasses = np.append(param_vals, np.array([ training_times, test_accs, test_sample_accs, train_accs, train_sample_accs ]).T, axis=1) else: vals_and_misclasses = np.append( param_vals, np.array([training_times, test_accs, train_accs]).T, axis=1) if shorten_headers: param_keys = [prettify_word(key) for key in param_keys] if sample_accs_exist: all_keys = param_keys + [ 'time', 'test', 'test_sample', 'train', 'train_sample' ] else: all_keys = param_keys + ['time', 'test', 'train'] data_frame = MetaDataFrame(vals_and_misclasses, index=file_numbers, columns=all_keys) data_frame = to_numeric_where_possible(data_frame) data_frame.time = pd.to_timedelta(np.round(data_frame.time), unit='s') return data_frame
def print_stats(results, csp_results, n_diffs=None): res_misclasses = get_final_misclasses(results) csp_misclasses = get_final_misclasses(csp_results) res_times = get_training_times(results) csp_times = get_training_times(csp_results) # make a the smaller misclass, b the larger misclass if np.mean(res_misclasses) < np.mean(csp_misclasses): a = res_misclasses b = csp_misclasses else: a = csp_misclasses b = res_misclasses actual_diff = np.mean(a - b) if n_diffs is None: diffs = perm_mean_diffs(a, b) else: diffs = perm_mean_diffs_sampled(a, b, n_diffs=n_diffs) res_to_csp_diff = np.mean(res_misclasses - csp_misclasses) print("deep accuracy: {:.1f}".format(100 * (1 - np.mean(res_misclasses)))) print("csp accuracy: {:.1f}".format(100 * (1 - np.mean(csp_misclasses)))) print("diff accuracy: {:.1f}".format(100 * -res_to_csp_diff)) print("std : {:.1f}".format( 100 * np.std(res_misclasses - csp_misclasses))) print("one sided perm {:.5f}".format( np.sum(diffs <= actual_diff) / float(len(diffs)))) print("one sided wilcoxon {:.5f}".format( scipy.stats.wilcoxon(res_misclasses, csp_misclasses)[1] / 2)) #print("two sided perm {:.5f}".format(np.sum( # abs(diffs) >= abs(actual_diff)) / float(len(diffs)))) #print("two sided wilcoxon {:.5f}".format(scipy.stats.wilcoxon( # res_misclasses, csp_misclasses)[1])) print("deep time: {:s}".format( str(datetime.timedelta(seconds=round(np.mean(res_times)))))) print("csp time: {:s}".format( str(datetime.timedelta(seconds=round(np.mean(csp_times))))))
def print_stats(results, csp_results, n_diffs=None): res_misclasses = get_final_misclasses(results) csp_misclasses = get_final_misclasses(csp_results) res_times = get_training_times(results) csp_times = get_training_times(csp_results) # make a the smaller misclass, b the larger misclass if np.mean(res_misclasses) < np.mean(csp_misclasses): a = res_misclasses b = csp_misclasses else: a = csp_misclasses b = res_misclasses actual_diff = np.mean(a - b) if n_diffs is None: diffs = perm_mean_diffs(a, b) else: diffs = perm_mean_diffs_sampled(a,b,n_diffs=n_diffs) res_to_csp_diff = np.mean(res_misclasses - csp_misclasses) print ("deep accuracy: {:.1f}".format( 100 * (1 - np.mean(res_misclasses)))) print ("csp accuracy: {:.1f}".format( 100 * (1 - np.mean(csp_misclasses)))) print ("diff accuracy: {:.1f}".format( 100 * -res_to_csp_diff)) print ("std : {:.1f}".format( 100 * np.std(res_misclasses - csp_misclasses))) print("one sided perm {:.5f}".format(np.sum(diffs <= actual_diff) / float(len(diffs)))) print("one sided wilcoxon {:.5f}".format(scipy.stats.wilcoxon( res_misclasses, csp_misclasses)[1] / 2)) #print("two sided perm {:.5f}".format(np.sum( # abs(diffs) >= abs(actual_diff)) / float(len(diffs)))) #print("two sided wilcoxon {:.5f}".format(scipy.stats.wilcoxon( # res_misclasses, csp_misclasses)[1])) print ("deep time: {:s}".format(str(datetime.timedelta( seconds=round(np.mean(res_times)))))) print ("csp time: {:s}".format(str(datetime.timedelta( seconds=round(np.mean(csp_times))))))
def to_data_frame(file_names, result_objs, varying_params, constant_params, shorten_headers=True): all_params = [merge_dicts(var, constant_params) for var in varying_params] file_numbers = [int(f.split('/')[-1].split('.')[0]) for f in file_names] # remove dollars for param_dict in all_params: for key, val in param_dict.iteritems(): param_dict[key] = remove_dollar(val) param_keys = all_params[0].keys() param_vals = [[v[key] for key in param_keys] for v in all_params] # transform lists to tuples to make them hashable param_vals = [[to_tuple_if_list(v) for v in var_list] for var_list in param_vals] param_vals = np.array(param_vals, dtype=object) test_accs = (1 - get_final_misclasses(result_objs, 'test')) * 100 train_accs = (1 - get_final_misclasses(result_objs, 'train')) * 100 training_times = get_training_times(result_objs) # try adding sample accuracies, might exist, might not sample_accs_exist = (hasattr(result_objs[0], 'monitor_channels') and 'test_sample_misclass' in result_objs[0].monitor_channels) if sample_accs_exist: test_sample_accs = (1 - get_final_misclasses(result_objs, 'test_sample')) * 100 train_sample_accs = (1 - get_final_misclasses(result_objs, 'train_sample')) * 100 vals_and_misclasses = np.append(param_vals, np.array([training_times, test_accs, test_sample_accs, train_accs, train_sample_accs]).T, axis=1) else: vals_and_misclasses = np.append(param_vals, np.array([training_times, test_accs, train_accs]).T, axis=1) if shorten_headers: param_keys = [prettify_word(key) for key in param_keys] if sample_accs_exist: all_keys = param_keys + ['time', 'test', 'test_sample', 'train', 'train_sample'] else: all_keys = param_keys + ['time', 'test', 'train'] data_frame = MetaDataFrame(vals_and_misclasses, index=file_numbers, columns=all_keys) data_frame = to_numeric_where_possible(data_frame) data_frame.time = pd.to_timedelta(np.round(data_frame.time), unit='s') return data_frame