def evaluation(RMSEP, RMSEP_other, y, y_predict, y_other_predict): h = (1 - RMSEP / RMSEP_other) * 100 # print y_predict.shape,y.shape y_new = np.subtract(y_predict, y).ravel() y_old = np.subtract(y_other_predict, y).ravel() p = wilcoxon(y_new, y_old) # print p[1] , 666 return p[1], h
def codon_bias(outdir): # Data source: https://elifesciences.org/articles/41043 df = read_pickle('./resource/Proc_strains_codons.df') thr = df.loc[['ACT', 'ACA', 'ACC', 'ACG']] thr = thr.truediv(thr.sum()).T ile = df.loc[['ATT', 'ATA', 'ATC']] ile = ile.truediv(ile.sum()).T print(wilcoxon(thr['ACT'], thr['ACA'])) print(wilcoxon(thr['ACC'], thr['ACG'])) print(wilcoxon(ile['ATT'], ile['ATA'])) _, ax = plt.subplots(1, figsize=(8.2, 5.2), dpi=144) bp = ax.violinplot(concat([thr, ile], axis=1).T, positions=[1, 2, 3, 4, 6, 7, 8]) for partname in ('cbars', 'cmins', 'cmaxes'): vp = bp[partname] vp.set_edgecolor('k') vp.set_linewidth(1) [m.set_color('#0d4c7c') for m in bp['bodies'][:4]] [m.set_color('#891919') for m in bp['bodies'][-3:]] ax.set_ylim(0, 1.) ax.set_xticks(range(1, 9)) plt.savefig(join(outdir, 'Codon_usage.png'), dpi=144)
def pairwise_compare_frame(df, with_p_vals=False): table_vals = [] table_indices = [] param_keys = set(df.keys()) - set(['test', 'time', 'train', 'test_sample', 'train_sample']) for key in param_keys: if key == 'dataset_filename' or key == 'test_filename' or key == 'subject_id': continue possible_vals = df[key].unique() for i_value_a in range(0, len(possible_vals) - 1): for i_value_b in range(i_value_a + 1, len(possible_vals)): val_a = possible_vals[i_value_a] val_b = possible_vals[i_value_b] frame_1 = df[df[key] == val_a] frame_2 = df[df[key] == val_b] other_param_keys = list(param_keys - set([key])) joined_frame = frame_1.merge(frame_2, on=other_param_keys) if joined_frame.size == 0: continue accuracies_a = np.array(joined_frame.test_x, dtype=np.float64) accuracies_b = np.array(joined_frame.test_y, dtype=np.float64) mean_a = np.mean(accuracies_a) mean_b = np.mean(accuracies_b) # Always put better value first in table if mean_a >= mean_b: accuracies_1 = accuracies_a accuracies_2 = accuracies_b mean_1 = mean_a mean_2 = mean_b val_1 = val_a val_2 = val_b else: accuracies_1 = accuracies_b accuracies_2 = accuracies_a mean_1 = mean_b mean_2 = mean_a val_1 = val_b val_2 = val_a if with_p_vals: if len(accuracies_1) <= 18: diff_perm = perm_mean_diff_test(accuracies_1, accuracies_2) * 100 elif len(accuracies_1) <= 62: diff_perm = perm_mean_diff_test(accuracies_1, accuracies_2, n_diffs=2**17) * 100 else: _, diff_perm = wilcoxon(accuracies_1, accuracies_2) diff_perm *= 100 diffs = accuracies_2 - accuracies_1 diff_std = np.std(diffs) diff_mean = np.mean(diffs) this_vals = [len(accuracies_1), str(val_1), str(val_2), mean_1, mean_2, diff_mean, diff_std] if with_p_vals: this_vals.append(diff_perm) table_vals.append(this_vals) table_indices.append(key) if len(table_vals) == 0: return None table_vals = np.array(table_vals) compare_headers = ['n_exp', 'val_1', 'val_2', 'acc_1', 'acc_2', 'diff', 'std'] if with_p_vals: compare_headers.append('p_val') compare_frame = pd.DataFrame(table_vals, columns=compare_headers, index=(table_indices)) compare_frame = to_numeric_where_possible(compare_frame) compare_frame = round_numeric_columns(compare_frame, 1) return compare_frame
elementwise_sum = b_rec + appro_rec nz_ids = elementwise_sum.nonzero() elementwise_rpd = np.zeros(b_rec.shape) elementwise_rpd[nz_ids] = np.divide(elementwise_me[nz_ids], elementwise_sum[nz_ids]) approachi_rpd.append(elementwise_rpd) approachi_shapiro.append(shapiro(appro_rec.T.toarray()[0])[1]) me_improvement.append( len(np.nonzero(elementwise_rpd <= 0)[0]) / elementwise_rpd.shape[0]) if approachi_label != baseline_label: h_test = b_rec - appro_rec approachi_wilcoxon.append( wilcoxon(h_test.T.toarray()[0])[1]) approachi_wilcoxon_pratt.append( wilcoxon(h_test.T.toarray()[0], zero_method="pratt")[1]) del h_test else: approachi_wilcoxon.append(1) approachi_wilcoxon_pratt.append(1) del b_rec, appro_rec approachi_rpd = np.hstack(approachi_rpd) results_dataframe.loc[:, "%sMe_std" % (approachi_label)] = (approachi_rpd / 2).std(axis=0) results_dataframe.loc[:, "%sRPD_mean" %
def compute_metrics(expt_settings, methods, datasets, feature_types, embeddings_types, accuracy=1.0, di=0, npairs=0, tag='', remove_seen_from_mean=False, max_no_folds=32, min_folds_desired=0, compute_tr_performance=False, flip_labels=[]): expt_settings['acc'] = accuracy expt_settings['di'] = di row_index = np.zeros(len(methods) * len(datasets), dtype=object) columns = np.zeros(len(feature_types) * len(embeddings_types), dtype=object) row = 0 if expt_settings['di'] == 0 or np.ceil( np.float(npairs) / np.float(expt_settings['di'])) == 0: AL_rounds = np.array([0]).astype(int) else: AL_rounds = np.arange(expt_settings['di'], npairs + expt_settings['di'], expt_settings['di'], dtype=int) #np.arange( np.ceil(np.float(npairs) / np.float(expt_settings['di'])), dtype=int) if tag == '': ts = time.time() tag = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d-%H-%M-%S') for d, dataset_next in enumerate(datasets): docids = None if expt_settings['dataset'] != dataset_next or expt_settings[ 'folds'] is None: expt_settings['dataset'] = dataset_next expt_settings['folds'], expt_settings[ 'folds_regression'], _, _, _ = load_train_test_data( expt_settings['dataset']) for m, expt_settings['method'] in enumerate(methods): if d == 0 and m == 0: if expt_settings['di'] == 0: results_shape = (len(methods) * len(datasets), len(feature_types) * len(embeddings_types), len(expt_settings['folds']) + 1, 1) else: results_shape = (len(methods) * len(datasets), len(feature_types) * len(embeddings_types), len(expt_settings['folds']) + 1, int(npairs / expt_settings['di'])) results_f1 = np.zeros(results_shape) results_acc = np.zeros(results_shape) results_logloss = np.zeros(results_shape) results_auc = np.zeros(results_shape) results_pearson = np.zeros(results_shape) results_spearman = np.zeros(results_shape) results_kendall = np.zeros(results_shape) tr_results_f1 = np.zeros(results_shape) tr_results_acc = np.zeros(results_shape) tr_results_logloss = np.zeros(results_shape) tr_results_auc = np.zeros(results_shape) row_index[row] = expt_settings['method'] + ', ' + expt_settings[ 'dataset'] col = 0 for expt_settings['feature_type'] in feature_types: if expt_settings['feature_type'] == 'ling': embeddings_to_use = [''] else: embeddings_to_use = embeddings_types for expt_settings['embeddings_type'] in embeddings_to_use: data, nFolds, resultsdir, resultsfile = load_results_data( data_root_dir, resultsfile_template, expt_settings, max_no_folds) min_folds = min_folds_desired for f in range(nFolds): print("Processing fold %i" % f) if expt_settings[ 'fold_order'] is None: # fall back to the order on the current machine fold = list(expt_settings['folds'].keys())[f] else: fold = expt_settings['fold_order'][f] if fold[-2] == "'" and fold[0] == "'": fold = fold[1:-2] elif fold[-1] == "'" and fold[0] == "'": fold = fold[1:-1] expt_settings['fold_order'][f] = fold # look for new-style data in separate files for each fold. Prefer new-style if both are found. foldfile = resultsdir + '/fold%i.pkl' % f if os.path.isfile(foldfile): with open(foldfile, 'rb') as fh: data_f = pickle.load(fh, encoding='latin1') else: # convert the old stuff to new stuff if data is None: min_folds = f + 1 print('Skipping fold with no data %i' % f) print("Skipping results for %s, %s, %s, %s" % (expt_settings['method'], expt_settings['dataset'], expt_settings['feature_type'], expt_settings['embeddings_type'])) print( "Skipped filename was: %s, old-style results file would be %s" % (foldfile, resultsfile)) continue if not os.path.isdir(resultsdir): os.mkdir(resultsdir) data_f = [] for thing in data: if f in thing: data_f.append(thing[f]) else: data_f.append(thing) with open(foldfile, 'wb') as fh: pickle.dump(data_f, fh) gold_disc, pred_disc, gold_prob, pred_prob, gold_rank, pred_rank, pred_tr_disc, \ pred_tr_prob, postprocced = get_fold_data(data_f, f, expt_settings, flip_labels=m in flip_labels) if postprocced: # data was postprocessed and needs saving with open(foldfile, 'wb') as fh: pickle.dump(data_f, fh) if pred_tr_disc is not None: print( str(pred_tr_disc.shape) + ', ' + str(pred_prob.shape) + ', ' + str(pred_tr_disc.shape[0] + pred_prob.shape[0])) for AL_round, _ in enumerate(AL_rounds): #print "fold %i " % f #print AL_round if AL_round >= pred_disc.shape[1]: continue results_f1[row, col, f, AL_round] = f1_score( gold_disc[gold_disc != 1], pred_disc[gold_disc != 1, AL_round], average='macro') #skip the don't knows results_acc[row, col, f, AL_round] = accuracy_score( gold_disc[gold_disc != 1], pred_disc[gold_disc != 1, AL_round]) results_logloss[row, col, f, AL_round] = log_loss( gold_prob[gold_disc != 1], pred_prob[gold_disc != 1, AL_round]) results_auc[row, col, f, AL_round] = roc_auc_score( gold_prob[gold_disc != 1], pred_prob[gold_disc != 1, AL_round]) # macro if gold_rank is None and expt_settings[ 'folds_regression'] is not None: if docids is None: _, docids = load_ling_features( expt_settings['dataset']) # ranking data was not saved in original file. Get it from the expt_settings['folds_regression'] here _, rankscores_test, _, _ = expt_settings[ 'folds_regression'].get(fold)["test"] gold_rank = np.array(rankscores_test) if gold_rank is not None and pred_rank is not None: results_pearson[row, col, f, AL_round] = pearsonr( gold_rank, pred_rank[:, AL_round])[0] results_spearman[row, col, f, AL_round] = spearmanr( gold_rank, pred_rank[:, AL_round])[0] results_kendall[row, col, f, AL_round] = kendalltau( gold_rank, pred_rank[:, AL_round])[0] def mean_unseen(result, remove_seen_from_mean): if not remove_seen_from_mean: return result N = len(gold_tr) Nseen = (AL_round + 1) * expt_settings['di'] Nunseen = (N - Nseen) return (result * N - Nseen) / Nunseen if pred_tr_prob is not None and AL_round < pred_tr_disc.shape[ 1] and compute_tr_performance: _, _, gold_tr, _, _, _, _ = expt_settings[ 'folds'].get(fold)["training"] gold_tr = np.array(gold_tr) if (gold_tr != 1).shape[0] != pred_tr_disc.shape[0]: print("Mismatch in fold %s! %i, %i" % (fold, (gold_tr != 1).shape[0], pred_tr_disc.shape[0])) gold_tr_prob = gold_tr / 2.0 tr_results_f1[ row, col, f, AL_round] = mean_unseen( f1_score(gold_tr[gold_tr != 1], pred_tr_disc[gold_tr != 1, AL_round], average='macro'), remove_seen_from_mean) #skip the don't knows tr_results_acc[ row, col, f, AL_round] = mean_unseen( accuracy_score( gold_tr[gold_tr != 1], pred_tr_disc[gold_tr != 1, AL_round]), remove_seen_from_mean) tr_results_logloss[ row, col, f, AL_round] = mean_unseen( log_loss( gold_tr_prob[gold_tr != 1], pred_tr_prob[gold_tr != 1, AL_round]), remove_seen_from_mean) tr_results_auc[ row, col, f, AL_round] = mean_unseen( roc_auc_score( gold_tr_prob[gold_tr != 1], pred_tr_prob[gold_tr != 1, AL_round]), remove_seen_from_mean) elif pred_tr_prob is not None and AL_round >= pred_tr_disc.shape[ 1]: tr_results_f1[row, col, f, AL_round] = 1 tr_results_acc[row, col, f, AL_round] = 1 tr_results_auc[row, col, f, AL_round] = 1 tr_results_logloss[row, col, f, AL_round] = 0 for AL_round in range(results_f1.shape[3]): foldrange = np.arange( min_folds, max_no_folds ) # skip any rounds that did not complete when taking the mean foldrange = foldrange[results_f1[row, col, foldrange, AL_round] != 0] results_f1[row, col, -1, AL_round] = np.mean( results_f1[row, col, foldrange, AL_round], axis=0) results_acc[row, col, -1, AL_round] = np.mean( results_acc[row, col, foldrange, AL_round], axis=0) results_logloss[row, col, -1, AL_round] = np.mean( results_logloss[row, col, foldrange, AL_round], axis=0) results_auc[row, col, -1, AL_round] = np.mean( results_auc[row, col, foldrange, AL_round], axis=0) results_pearson[row, col, -1, AL_round] = np.mean( results_pearson[row, col, foldrange, AL_round], axis=0) results_spearman[row, col, -1, AL_round] = np.mean( results_spearman[row, col, foldrange, AL_round], axis=0) results_kendall[row, col, -1, AL_round] = np.mean( results_kendall[row, col, foldrange, AL_round], axis=0) tr_results_f1[row, col, -1, AL_round] = np.mean( tr_results_f1[row, col, foldrange, AL_round], axis=0) tr_results_acc[row, col, -1, AL_round] = np.mean( tr_results_acc[row, col, foldrange, AL_round], axis=0) tr_results_logloss[ row, col, -1, AL_round] = np.mean( tr_results_logloss[row, col, foldrange, AL_round], axis=0) tr_results_auc[row, col, -1, AL_round] = np.mean( tr_results_auc[row, col, foldrange, AL_round], axis=0) print('p-values for %s, %s, %s, %s:' % (expt_settings['dataset'], expt_settings['method'], expt_settings['feature_type'], expt_settings['embeddings_type'])) print( wilcoxon(results_f1[0, 0, foldrange, AL_round], results_f1[row, col, foldrange, AL_round])[1]) print( wilcoxon(results_acc[0, 0, foldrange, AL_round], results_acc[row, col, foldrange, AL_round])[1]) print( wilcoxon( results_logloss[0, 0, foldrange, AL_round], results_logloss[row, col, foldrange, AL_round])[1]) print( wilcoxon(results_auc[0, 0, foldrange, AL_round], results_auc[row, col, foldrange, AL_round])[1]) print( wilcoxon( results_pearson[0, 0, foldrange, AL_round], results_pearson[row, col, foldrange, AL_round])[1]) print( wilcoxon( results_spearman[0, 0, foldrange, AL_round], results_spearman[row, col, foldrange, AL_round])[1]) print( wilcoxon( results_kendall[0, 0, foldrange, AL_round], results_kendall[row, col, foldrange, AL_round])[1]) if row == 0: # set the column headers columns[col] = expt_settings[ 'feature_type'] + ', ' + expt_settings[ 'embeddings_type'] col += 1 row += 1 combined_labels = [] for row in row_index: for col in columns: combined_labels.append(str(row) + '_' + str(col)) mean_results = [] mean_results.append( collate_AL_results(AL_rounds, results_f1, combined_labels, "Macro-F1 scores for round %i: ")) mean_results.append( collate_AL_results(AL_rounds, results_acc, combined_labels, "Accuracy (excl. don't knows), round %i:") ) # for UKPConvArgStrict don't knows are already ommitted) mean_results.append( collate_AL_results(AL_rounds, results_auc, combined_labels, "AUC ROC, round %i:")) #if AUC is higher than accuracy and F1 score, it suggests that decision boundary is not calibrated or that #accuracy may improve if we exclude data points close to the decision boundary mean_results.append( collate_AL_results(AL_rounds, results_logloss, combined_labels, "Cross Entropy classification error, round %i: ")) #(quality of the probability labels is taken into account) mean_results.append( collate_AL_results(AL_rounds, results_pearson, combined_labels, "Pearson's r for round %i: ")) mean_results.append( collate_AL_results(AL_rounds, results_spearman, combined_labels, "Spearman's rho for round %i: ")) mean_results.append( collate_AL_results(AL_rounds, results_kendall, combined_labels, "Kendall's tau for round %i: ")) if np.any(tr_results_acc): mean_results.append( collate_AL_results(AL_rounds, tr_results_f1, combined_labels, "(TR) Macro-F1 scores for round %i: ")) mean_results.append( collate_AL_results(AL_rounds, tr_results_acc, combined_labels, "(TR) Accuracy for round %i: ")) mean_results.append( collate_AL_results(AL_rounds, tr_results_auc, combined_labels, "(TR) AUC ROC for round %i: ")) mean_results.append( collate_AL_results(AL_rounds, tr_results_logloss, combined_labels, "(TR) Cross Entropy Error for round %i: ")) # metricsfile = data_root_dir + 'outputdata/expt_root_dir' + \ # 'metrics_%s.pkl' % (tag) # with open(metricsfile, 'w') as fh: # pickle.dump((results_f1, results_acc, results_auc, results_logloss, results_pearson, results_spearman, # results_kendall), fh) # TODO: Correlations between reasons and features? # TODO: Correlations between reasons and latent argument features found using preference components? return results_f1, results_acc, results_auc, results_logloss, results_pearson, results_spearman, results_kendall,\ tr_results_f1, tr_results_acc, tr_results_auc, tr_results_logloss, mean_results, combined_labels
from scipy.stats.morestats import wilcoxon import os import matplotlib.pyplot as plt os.chdir('C:/Users/ikdem/PycharmProjects/Thesis_Analysis/Social_Media_Files') amt_lags = 12 tweet_polarity = 'polarity' companies = [ 'aNike Stock lagged correlations', 'bSteven Madden Stock lagged correlations', 'cSketchers Stock lagged correlations', 'dWolverine World Wide Stock lagged correlations' ] for c in companies: for lag in range(0, amt_lags + 1): list_correlations = [] for f in range(0, 30): try: df = pd.read_excel('file' + str(f) + '_laggedcorrelations' + tweet_polarity + '.xlsx') list_correlations.append(df[c][lag]) except: continue print(str(f), c, tweet_polarity, lag * 5, wilcoxon(list_correlations)) # for every lag, look at all the files and append all of the correlations for every lag to a list # create a list of 0's with len(list_from_above), then wilcoxon(list(correlations), list(zeros) for each lag
for test in tests: for individual_pickle in os.listdir('pickles'): if test in individual_pickle: print('\n{}\n'.format(re.sub('.pickle', '', individual_pickle))) final_results = defaultdict(list) with open('pickles/{}'.format(individual_pickle), 'rb') as input: evaluations = pickle.load(input)[0] for test_type, test_results in evaluations.items(): for single_novel in test_results: median_within_novel = numpy.median(single_novel) #var_within_novel = numpy.var(single_novel) final_results[test_type].append(median_within_novel) #final_results[test_type].append(var_within_novel) print('Median for common nouns: {}'.format( numpy.median(final_results['common_nouns']))) print('Median for proper names: {}'.format( numpy.median(final_results['proper_names']))) z_value, p_value = wilcoxon(final_results['common_nouns'][:59], final_results['proper_names'][:59]) print('\nP-value: {}\nEffect size: {}'.format( p_value, abs(z_value / numpy.sqrt(len(final_results['common_nouns'][:59]))))) ### STD #print('Variance for common nouns: {}'.format(numpy.var(final_results['common_nouns']))) #print('Variance for proper names: {}'.format(numpy.var(final_results['proper_names']))) #z_value, p_value = wilcoxon(final_results['common_nouns'][:59], final_results['proper_names'][:59]) #print('\nP-value: {}\nEffect size: {}'.format(p_value, abs(z_value/numpy.sqrt(len(final_results['common_nouns'][:59])))))
from scipy.stats.morestats import wilcoxon from numpy.lib.function_base import average, median #tree = [96.19047619047619, 96.28571428571429, 95.61904761904762, 96.0, 96.57142857142857, 96.57142857142857, 95.14285714285714, 96.0, 96.19047619047619, 95.9047619047619, 96.28571428571429, 95.61904761904762, 97.33333333333333, 94.85714285714286, 95.14285714285714, 94.28571428571429, 94.19047619047619, 96.38095238095238, 95.04761904761905, 94.85714285714286, 96.19047619047619, 96.38095238095238, 96.38095238095238, 96.47619047619048, 96.19047619047619, 94.57142857142857, 96.38095238095238, 96.47619047619048, 96.47619047619048, 94.95238095238095, 96.19047619047619, 95.14285714285714, 95.71428571428571, 94.85714285714286, 95.9047619047619, 96.66666666666667, 94.95238095238095, 94.57142857142857, 94.19047619047619, 94.19047619047619, 95.9047619047619, 95.9047619047619, 94.66666666666667, 96.0952380952381, 96.0, 95.9047619047619, 97.14285714285714, 96.19047619047619, 96.28571428571429, 96.19047619047619] #C457NN = [96.0, 94.95238095238095, 94.28571428571429, 95.61904761904762, 95.23809523809524, 96.38095238095238, 95.23809523809524, 96.0, 95.04761904761905, 96.0952380952381, 96.19047619047619, 94.57142857142857, 96.47619047619048, 96.38095238095238, 95.42857142857143, 94.85714285714286, 94.57142857142857, 96.19047619047619, 94.76190476190476, 94.47619047619048, 94.95238095238095, 95.61904761904762, 96.19047619047619, 96.0, 95.04761904761905, 95.33333333333333, 96.28571428571429, 95.52380952380952, 96.47619047619048, 95.61904761904762, 95.80952380952381, 95.14285714285714, 96.0952380952381, 95.04761904761905, 95.33333333333333, 96.85714285714286, 95.23809523809524, 96.0952380952381, 93.52380952380952, 95.52380952380952, 95.71428571428571, 96.0, 94.57142857142857, 96.66666666666667, 96.0, 95.61904761904762, 96.38095238095238, 95.61904761904762, 96.0, 95.61904761904762] tree = [96.18604651162791, 96.18604651162791, 96.37209302325581, 96.46511627906976, 96.74418604651163, 96.09302325581395, 95.90697674418605, 96.18604651162791, 96.27906976744185, 96.27906976744185, 96.09302325581395, 96.18604651162791, 96.65116279069767, 96.18604651162791, 95.81395348837209, 96.0, 96.55813953488372, 96.18604651162791, 95.72093023255815, 95.72093023255815, 96.0, 95.90697674418605, 96.09302325581395, 96.0, 96.46511627906976, 96.18604651162791, 96.09302325581395, 95.90697674418605, 95.81395348837209, 96.55813953488372, 96.0, 96.46511627906976, 96.0, 96.09302325581395, 96.18604651162791, 95.72093023255815, 96.27906976744185, 95.62790697674419, 94.79069767441861, 95.81395348837209, 96.09302325581395, 96.18604651162791, 96.37209302325581, 96.37209302325581, 96.27906976744185, 96.09302325581395, 96.46511627906976, 96.74418604651163, 96.0, 96.18604651162791] C457NN = [94.13953488372093, 95.53488372093024, 96.46511627906976, 95.44186046511628, 95.72093023255815, 95.53488372093024, 95.06976744186046, 96.0, 94.88372093023256, 95.81395348837209, 94.97674418604652, 94.69767441860465, 96.37209302325581, 95.25581395348837, 94.79069767441861, 95.53488372093024, 96.46511627906976, 96.0, 95.90697674418605, 95.62790697674419, 95.81395348837209, 94.32558139534883, 95.16279069767442, 94.4186046511628, 94.97674418604652, 96.0, 95.81395348837209, 95.34883720930233, 95.72093023255815, 95.90697674418605, 95.53488372093024, 95.72093023255815, 95.25581395348837, 95.62790697674419, 96.55813953488372, 96.37209302325581, 96.09302325581395, 94.51162790697674, 95.16279069767442, 94.79069767441861, 95.25581395348837, 94.69767441860465, 96.46511627906976, 95.44186046511628, 95.81395348837209, 96.55813953488372, 95.25581395348837, 96.46511627906976, 94.97674418604652, 94.97674418604652] print 'average tree = ',average(tree) print 'average C4.5(7NN) = ', average(C457NN) print 'median tree = ',median(tree) print 'median C4.5(7NN) = ', median(C457NN) print 'wilcoxon test for J48 or C4.5(7NN):', wilcoxon(tree, C457NN)
import pandas as pd from scipy.stats.morestats import wilcoxon amt_lags = 48 file = 'Own_Classifier' tweet_stock = 'polarity' for lag in range(0,amt_lags+1): list_correlations = [] for f in range(2,55): try: df = pd.read_excel('../thesis_files/Correlations_4_hour_lag/' + \ file + str(f) +'_laggedcorrelations_' + tweet_stock + '.xlsx') list_correlations.append(df['SP500 lagged correlations'][lag]) except: continue print(file, tweet_stock, lag * 5, wilcoxon(list_correlations)) # for every lag, look at all the files and append all of the correlations for every lag to a list # create a list of 0's with len(list_from_above), then wilcoxon(list(correlations), list(zeros) for each lag
def pairwise_compare_frame(df, with_p_vals=False, result_cols=('test', 'time', 'train', 'test_sample', 'train_sample'), compare_col='test'): table_vals = [] table_indices = [] param_keys = set(df.keys()) - set(list(result_cols)) for key in param_keys: if key == 'dataset_filename' or key == 'test_filename' or key == 'subject_id': continue possible_vals = df[key].unique() for i_value_a in range(0, len(possible_vals) - 1): for i_value_b in range(i_value_a + 1, len(possible_vals)): val_a = possible_vals[i_value_a] val_b = possible_vals[i_value_b] frame_1 = df[df[key] == val_a] frame_2 = df[df[key] == val_b] other_param_keys = list(param_keys - set([key])) joined_frame = frame_1.merge(frame_2, on=other_param_keys) if joined_frame.size == 0: continue accuracies_a = np.array(joined_frame[compare_col + '_x'], dtype=np.float64) accuracies_b = np.array(joined_frame[compare_col + '_y'], dtype=np.float64) mean_a = np.mean(accuracies_a) mean_b = np.mean(accuracies_b) # Always put better value first in table if mean_a >= mean_b: accuracies_1 = accuracies_a accuracies_2 = accuracies_b mean_1 = mean_a mean_2 = mean_b val_1 = val_a val_2 = val_b else: accuracies_1 = accuracies_b accuracies_2 = accuracies_a mean_1 = mean_b mean_2 = mean_a val_1 = val_b val_2 = val_a if with_p_vals: if len(accuracies_1) <= 18: diff_perm = perm_mean_diff_test( accuracies_1, accuracies_2) * 100 elif len(accuracies_1) <= 62: diff_perm = perm_mean_diff_test( accuracies_1, accuracies_2, n_diffs=2**17) * 100 else: _, diff_perm = wilcoxon(accuracies_1, accuracies_2) diff_perm *= 100 diffs = accuracies_2 - accuracies_1 diff_std = np.std(diffs) diff_mean = np.mean(diffs) this_vals = [ len(accuracies_1), str(val_1), str(val_2), mean_1, mean_2, diff_mean, diff_std ] if with_p_vals: this_vals.append(diff_perm) table_vals.append(this_vals) table_indices.append(key) if len(table_vals) == 0: return None table_vals = np.array(table_vals) compare_headers = [ 'n_exp', 'val_1', 'val_2', 'acc_1', 'acc_2', 'diff', 'std' ] if with_p_vals: compare_headers.append('p_val') compare_frame = pd.DataFrame(table_vals, columns=compare_headers, index=(table_indices)) compare_frame = to_numeric_where_possible(compare_frame) compare_frame = round_numeric_columns(compare_frame, 1) return compare_frame
def signif(self, files, bucktype='none'): """Compute signification of 3 input sets: test, system_output1, system_output2 """ if len(files) != 3: raise ValueError( "You must supply 3 input files for `signif` command") if bucktype not in ['none', 'dialog']: raise ValueError("Unknown `bucktype`: %r" % bucktype) self.logger.debug("Importing scipy") from scipy.stats import median, mean, tvar, tstd from scipy.stats.morestats import wilcoxon from scipy.stats.distributions import norm, t as t from scipy import sqrt forest1, forest2, forest3 = self.loadForestFiles(files) self.logger.info("Processing forests 1 and 2") diff1 = {} for fn, tree1, tree2, dist, script in self.forestProcessor( forest1, forest2): H, D, I, S = script.HDIS n_errors = D + I + S fn = self.filenameKey(fn, bucktype) diff1.setdefault(fn, 0.) diff1[fn] += n_errors self.logger.info("Processing forests 1 and 3") diff2 = {} for fn, tree1, tree2, dist, script in self.forestProcessor( forest1, forest3): H, D, I, S = script.HDIS n_errors = D + I + S fn = self.filenameKey(fn, bucktype) diff2.setdefault(fn, 0.) diff2[fn] += n_errors def mapsswe(x, y): xm = mean(x) ym = mean(y) s = 0. n = 0. for xi, yi in izip(w1, w2): s += ((xi - yi) - (xm - ym))**2 n += 1 t_stat = sqrt(n) * abs(xm - ym) / sqrt(s / (n - 1.)) p_value = t.sf(t_stat, n - 1) * 2 return t_stat, p_value Z_values = [] w1 = [] w2 = [] for key in sorted(diff1.keys()): if key not in diff2: self.logger.error("Unmatched utterance: %r", key) continue Na = diff1.pop(key) Nb = diff2.pop(key) w1.append(Na) w2.append(Nb) Z_values.append(Na - Nb) Z_mean = mean(Z_values) Z_median = median(Z_values) Z_tvar = tvar(Z_values) Z_tstd = tstd(Z_values) wilcoxon_t_stat, wilcoxon_p_value = wilcoxon(w1, w2) mapsswe_w_stat, mapsswe_p_value = mapsswe(w1, w2) fw = sys.stdout fw.write("Z stats:\n") fw.write("========\n") fw.write(" - mean: %9.3f\n" % Z_mean) fw.write(" - median: %9.3f\n" % Z_median) fw.write(" - tvar: %9.3f\n" % Z_tvar) fw.write(" - tstd: %9.3f\n\n" % Z_tstd) fw.write("Wilcoxon test:\n") fw.write("==============\n") fw.write( " - p-value: %9.3f (two-tailed) [significant if <= 0.05]\n" % wilcoxon_p_value) fw.write(" - t-stat: %9.3f\n\n" % wilcoxon_t_stat) fw.write("MAPSSWE test:\n") fw.write("=============\n") fw.write( " - p-value: %9.3f (two-tailed) [significant if <= 0.05]\n" % mapsswe_p_value) fw.write(" - t-stat: %9.3f\n\n" % mapsswe_w_stat)
useAllAverages.append(average(useAllAttributes1NN)) useAllAverages.append(average(useAllAttributes3NN)) useAllAverages.append(average(useAllAttributes5NN)) useAllAverages.append(average(useAllAttributes7NN)) useAllAverages.append(average(useAllAttributes9NN)) useAllAverages.append(average(useAllAttributes11NN)) useAllAverages.append(average(useAllAttributes13NN)) useAllAverages.append(average(useAllAttributes15NN)) useAllAverages.append(average(useAllAttributes17NN)) useAllAverages.append(average(useAllAttributes19NN)) useAllAverages.append(average(useAllAttributes21NN)) useAllAverages.append(average(useAllAttributes23NN)) useAllAverages.append(average(useAllAttributes25NN)) print 'wilcoxon test for use all attributes or ignore all:', wilcoxon(ignoreAttributes3NN, useAllAttributes3NN) print 'wilcoxon test for use all attributes or ignore all:', wilcoxon(ignoreAttributes5NN, useAllAttributes5NN) print 'wilcoxon test for use all attributes or ignore all:', wilcoxon(ignoreAttributes7NN, useAllAttributes7NN) print 'wilcoxon test for use all attributes or ignore all:', wilcoxon(ignoreAttributes9NN, useAllAttributes9NN) print 'wilcoxon test for use all attributes or ignore all:', wilcoxon(ignoreAttributes11NN, useAllAttributes11NN) print 'wilcoxon test for use all attributes or ignore all:', wilcoxon(ignoreAttributes13NN, useAllAttributes13NN) print 'wilcoxon test for use all attributes or ignore all:', wilcoxon(ignoreAttributes15NN, useAllAttributes15NN) print 'wilcoxon test for use all attributes or ignore all:', wilcoxon(ignoreAttributes17NN, useAllAttributes17NN) print 'shapiro normality test', shapiro(ignoreAttributes7NN) createXYSpreadGraph(ignoreAverages, useAllAverages, 'ignore used attributes in KNN', 'use all attributes in KNN') #the histogram is not very educational... results are not distributed normally. createHistogram(ignoreAttributes7NN, useAllAttributes7NN, 'histogram: % correcteness for required data set','7NN ignore attributes used by tree', '7NN use all attributes')
#new range: new_range_ig_naive_bayse = [0.7169642857142857, 0.6879464285714286, 0.7044642857142858, 0.6910714285714286, 0.6941964285714286, 0.6955357142857143, 0.6959821428571429, 0.6915178571428572, 0.6901785714285714, 0.6745535714285714, 0.6821428571428572, 0.6790178571428571, 0.6683035714285714, 0.6669642857142857, 0.6486607142857143, 0.646875, 0.6464285714285715, 0.6455357142857143, 0.6450892857142857, 0.6495535714285714, 0.6508928571428572, 0.6508928571428572, 0.6513392857142857, 0.6504464285714285, 0.646875, 0.646875, 0.6477678571428571, 0.6464285714285715, 0.6464285714285715, ] new_range_ig_linear_svm = [0.7098214285714286, 0.6861607142857142, 0.6991071428571428, 0.6959821428571429, 0.6825892857142857, 0.6915178571428572, 0.6785714285714286, 0.6959821428571429, 0.6799107142857143, 0.6745535714285714, 0.6763392857142857, 0.6785714285714286, 0.6772321428571428, 0.6638392857142857, 0.6651785714285714, 0.6638392857142857, 0.6642857142857143, 0.6575892857142858, 0.6580357142857143, 0.6598214285714286, 0.6540178571428571, 0.6544642857142857, 0.6571428571428571, 0.6665178571428572, 0.6647321428571429, 0.6616071428571428, 0.6638392857142857, 0.6651785714285714, 0.6584821428571429, ] new_range_ig_hyperbolic_svm = [0.6964285714285714, 0.6870535714285714, 0.6977678571428572, 0.6941964285714286, 0.6866071428571429, 0.6986607142857143, 0.6848214285714286, 0.6834821428571428, 0.6901785714285714, 0.671875, 0.6700892857142857, 0.6790178571428571, 0.6566964285714286, 0.6611607142857143, 0.6709821428571429, 0.6598214285714286, 0.6678571428571428, 0.6696428571428571, 0.6709821428571429, 0.6602678571428572, 0.6629464285714286, 0.6669642857142857, 0.6714285714285714, 0.6665178571428572, 0.6700892857142857, 0.6584821428571429, 0.6633928571428571, 0.6709821428571429, 0.665625, ] stochastic_naive_bayse = [0.5205357142857143, 0.565625, 0.6017857142857143, 0.6200892857142857, 0.6352678571428572, 0.6142857142857143, 0.6348214285714285, 0.6433035714285714, 0.6303571428571428, 0.6334821428571429, 0.640625, 0.6410714285714286, 0.6508928571428572, 0.6495535714285714, 0.6486607142857143, 0.659375, 0.6575892857142858, 0.6607142857142857, 0.6714285714285714, 0.6741071428571429, 0.6683035714285714, 0.6776785714285715, 0.6651785714285714, 0.6830357142857143, 0.6852678571428571, 0.6700892857142857, 0.6732142857142858, 0.6803571428571429, 0.6741071428571429, ] stochastic_linear_svm = [0.5294642857142857, 0.5428571428571428, 0.6013392857142857, 0.6334821428571429, 0.6272321428571429, 0.603125, 0.6129464285714286, 0.640625, 0.6419642857142858, 0.6508928571428572, 0.6544642857142857, 0.6580357142857143, 0.6633928571428571, 0.66875, 0.6714285714285714, 0.6464285714285715, 0.6723214285714286, 0.6785714285714286, 0.6772321428571428, 0.6803571428571429, 0.6803571428571429, 0.6879464285714286, 0.6830357142857143, 0.6839285714285714, 0.7008928571428571, 0.6982142857142857, 0.69375, 0.6745535714285714, 0.6910714285714286, ] stochastic_hyperbolic_svm = [0.5482142857142858, 0.5732142857142857, 0.6022321428571429, 0.628125, 0.6196428571428572, 0.6366071428571428, 0.6142857142857143, 0.634375, 0.65, 0.6540178571428571, 0.6522321428571428, 0.6763392857142857, 0.6736607142857143, 0.66875, 0.6736607142857143, 0.6660714285714285, 0.6736607142857143, 0.6696428571428571, 0.6607142857142857, 0.6741071428571429, 0.7008928571428571, 0.6857142857142857, 0.6799107142857143, 0.6852678571428571, 0.6830357142857143, 0.6794642857142857, 0.690625, 0.6799107142857143, 0.6897321428571429, ] pca_hyperbolic_svm = [0.6571428571428571, 0.6352678571428572, 0.6160714285714286, 0.5513392857142857, 0.6169642857142857, 0.6223214285714286, 0.5508928571428572, 0.6352678571428572, 0.6321428571428571, 0.634375, 0.6450892857142857, 0.6267857142857143, 0.6410714285714286, 0.646875, 0.6361607142857143, 0.6459821428571428, 0.6348214285714285, 0.6375, 0.6433035714285714, 0.6424107142857143, 0.6388392857142857, 0.6303571428571428, 0.6419642857142858, 0.6334821428571429, 0.6285714285714286, 0.6375, 0.6330357142857143, 0.6379464285714286, 0.6375, ] pca_linear_svm = [0.6316964285714286, 0.6196428571428572, 0.5370535714285715, 0.35401785714285716, 0.603125, 0.6017857142857143, 0.60625, 0.6160714285714286, 0.6013392857142857, 0.6053571428571428, 0.621875, 0.6330357142857143, 0.6410714285714286, 0.63125, 0.6241071428571429, 0.6366071428571428, 0.6339285714285714, 0.6370535714285714, 0.6223214285714286, 0.6339285714285714, 0.6348214285714285, 0.6392857142857142, 0.6263392857142858, 0.6223214285714286, 0.6415178571428571, 0.6339285714285714, 0.6357142857142857, 0.6392857142857142, 0.6223214285714286, ] pca_naive_bayse = [0.3665178571428571, 0.3879464285714286, 0.590625, 0.5785714285714286, 0.378125, 0.5794642857142858, 0.6040178571428572, 0.35625, 0.5785714285714286, 0.5785714285714286, 0.5803571428571429, 0.6236607142857142, 0.5790178571428571, 0.5763392857142857, 0.5816964285714286, 0.5763392857142857, 0.5741071428571428, 0.625, 0.5665178571428572, 0.5696428571428571, 0.5919642857142857, 0.63125, 0.5852678571428571, 0.5816964285714286, 0.5803571428571429, 0.5910714285714286, 0.5848214285714286, 0.5803571428571429, 0.5803571428571429, ] list1 = new_range_ig_naive_bayse + new_range_ig_linear_svm + new_range_ig_hyperbolic_svm list2 = stochastic_naive_bayse + stochastic_linear_svm + stochastic_hyperbolic_svm list3 = pca_hyperbolic_svm + pca_linear_svm + pca_naive_bayse print 'ig VS stochastic' print wilcoxon(list1, list2) print 'avg ig=', average(list1), ' avg stochastic=', average(list2) print 'ig VS PCA' print wilcoxon(list1, list3) print 'avg ig=', average(list1), ' avg PCA=', average(list3) print 'stochastic VS PCA' print wilcoxon(list2, list3) print 'avg stochastic=', average(list2), ' avg PCA=', average(list3)
def signif(self, files, bucktype='none'): """Compute signification of 3 input sets: test, system_output1, system_output2 """ if len(files) != 3: raise ValueError("You must supply 3 input files for `signif` command") if bucktype not in ['none', 'dialog']: raise ValueError("Unknown `bucktype`: %r" % bucktype) self.logger.debug("Importing scipy") from scipy.stats import median, mean, tvar, tstd from scipy.stats.morestats import wilcoxon from scipy.stats.distributions import norm, t as t from scipy import sqrt forest1, forest2, forest3 = self.loadForestFiles(files) self.logger.info("Processing forests 1 and 2") diff1 = {} for fn, tree1, tree2, dist, script in self.forestProcessor(forest1, forest2): H, D, I, S = script.HDIS n_errors = D+I+S fn = self.filenameKey(fn, bucktype) diff1.setdefault(fn, 0.) diff1[fn] += n_errors self.logger.info("Processing forests 1 and 3") diff2 = {} for fn, tree1, tree2, dist, script in self.forestProcessor(forest1, forest3): H, D, I, S = script.HDIS n_errors = D+I+S fn = self.filenameKey(fn, bucktype) diff2.setdefault(fn, 0.) diff2[fn] += n_errors def mapsswe(x, y): xm = mean(x) ym = mean(y) s = 0. n = 0. for xi, yi in izip(w1, w2): s += ((xi-yi) - (xm-ym))**2 n += 1 t_stat = sqrt(n) * abs(xm-ym) / sqrt(s/(n-1.)) p_value = t.sf(t_stat, n-1) * 2 return t_stat, p_value Z_values = [] w1 = [] w2 = [] for key in sorted(diff1.keys()): if key not in diff2: self.logger.error("Unmatched utterance: %r", key) continue Na = diff1.pop(key) Nb = diff2.pop(key) w1.append(Na) w2.append(Nb) Z_values.append(Na-Nb) Z_mean = mean(Z_values) Z_median = median(Z_values) Z_tvar = tvar(Z_values) Z_tstd = tstd(Z_values) wilcoxon_t_stat, wilcoxon_p_value = wilcoxon(w1, w2) mapsswe_w_stat, mapsswe_p_value = mapsswe(w1, w2) fw = sys.stdout fw.write("Z stats:\n") fw.write("========\n") fw.write(" - mean: %9.3f\n" % Z_mean) fw.write(" - median: %9.3f\n" % Z_median) fw.write(" - tvar: %9.3f\n" % Z_tvar) fw.write(" - tstd: %9.3f\n\n" % Z_tstd) fw.write("Wilcoxon test:\n") fw.write("==============\n") fw.write(" - p-value: %9.3f (two-tailed) [significant if <= 0.05]\n" % wilcoxon_p_value) fw.write(" - t-stat: %9.3f\n\n" % wilcoxon_t_stat) fw.write("MAPSSWE test:\n") fw.write("=============\n") fw.write(" - p-value: %9.3f (two-tailed) [significant if <= 0.05]\n" % mapsswe_p_value) fw.write(" - t-stat: %9.3f\n\n" % mapsswe_w_stat)