def calc_ttest(id1, id2): #reading sim files logger.info('reading '+id1+' ...') fr=open('../../result/wordsim/simcos/'+id1+'.txt') wordsim1=json.load(fr) words=wordsim1.keys() fr.close() logger.info('reading '+id2+' ...') fr=open('../../result/wordsim/simcos/'+id2+'.txt') wordsim2=json.load(fr) fr.close() keywordsim2={} for w in words: keywordsim2[w]={} for item in wordsim2[w]: keywordsim2[w][item[0]]=item[1] #ttest all logger.info('ttest all of '+id1+'_'+id2) fw_all=open('../../result/wordsim/ttest/all/'+id1+'_'+id2+'.txt','w') for i, w in enumerate(words): logger.debug('ttest all of '+id1+'_'+id2+' of '+w+' ('+str(i)+'/'+str(len(words))+')') values1=[x[1] for x in wordsim1[w]] values2=[keywordsim2[w].get(x[0], 0) for x in wordsim1[w]] #pdb.set_trace() m=sum(abs(numpy.array(values1)-numpy.array(values2)))/len(values1) ttest=ttest_rel(values1, values2) fw_all.write(w+',ttest'+','+str(ttest[0])+','+str(ttest[1])+','+str(m)+'\n') fw_all.close() #ttest slice logger.info('ttest slice of '+id1+'_'+id2) fw_slice=open('../../result/wordsim/ttest/slice/'+id1+'_'+id2+'.txt','w') steps=20 for i, w in enumerate(words): logger.info('ttest slice of '+id1+'_'+id2+' of '+w) for step in range(0,steps): top=1-(step*(2.0/steps)) bottom=1-((step+1)*(2.0/steps)) keys_subset=[x[0] for x in wordsim1[w] if x[1] <= top and x[1] > bottom] logger.debug('ttest slice of '+id1+'_'+id2+' of '+w+' ('+str(i)+'/'+str(len(words))+') len: '+str(len(keys_subset))+' slice: '+str(bottom)+'-'+str(top)) if len(keys_subset)>0: values1_subset=[x[1] for x in wordsim1[w] if x[1] <= top and x[1] > bottom] values2_subset=[keywordsim2[w].get(x, 0) for x in keys_subset] m=sum(abs(numpy.array(values1_subset)-numpy.array(values2_subset)))/len(values1_subset) ttest=ttest_rel(values1_subset, values2_subset) fw_slice.write(w+','+str(top)+','+str(bottom)+','+str(len(keys_subset))+',ttest,'+str(ttest[0])+','+str(ttest[1])+','+str(m)+'\n') else: fw_slice.write(w+','+str(top)+','+str(bottom)+','+str(len(keys_subset))+'-'+'\n')
def loocvFileCompare(csvpath, kvals): "calls loocvFile() for a FFT and a CH then decides if the AUC difference is significant" aucs = [loocvFile(csvpath % ('fft'), kvals), loocvFile(csvpath % ('ch' ), kvals)] (t, prob) = stats.ttest_rel(aucs[0], aucs[1]) print "---------------------------" print "Paired T-test: t=%g, p=%g" % (t, prob)
def _ttest(orig_score, rep_score, rpd=True, pbar=False): """ @param orig_score: The original scores. @param rep_score: The reproduced/replicated scores. @param rpd: Boolean indicating if the evaluated runs are reproduced. @param pbar: Boolean value indicating if progress bar should be printed. @return: Generator with p-values. """ if rpd: # paired two-tailed t-test topic_scores_orig = topic_scores(orig_score) topic_scores_rep = topic_scores(rep_score) generator = tqdm( topic_scores_orig.items()) if pbar else topic_scores_orig.items() for measure, scores in generator: yield measure, ttest_rel(scores, topic_scores_rep.get(measure)).pvalue else: # else unpaired two-tailed t-test topic_scores_orig = topic_scores(orig_score) topic_scores_rep = topic_scores(rep_score) generator = tqdm( topic_scores_orig.items()) if pbar else topic_scores_orig.items() for measure, scores in generator: yield measure, ttest_ind(scores, topic_scores_rep.get(measure)).pvalue
def main(): judgment_path = sys.argv[1] metric = sys.argv[2] ranked_list_path1 = sys.argv[3] ranked_list_path2 = sys.argv[4] # print qrels = load_qrels_flat(judgment_path) ranked_list_1: Dict[str, List[TrecRankedListEntry]] = load_ranked_list_grouped(ranked_list_path1) ranked_list_2: Dict[str, List[TrecRankedListEntry]] = load_ranked_list_grouped(ranked_list_path2) metric_fn = get_metric_fn(metric) score_d1 = get_score_per_query(qrels, metric_fn, ranked_list_1) score_d2 = get_score_per_query(qrels, metric_fn, ranked_list_2) pairs = [] for key in score_d1: try: e = (score_d1[key], score_d2[key]) pairs.append(e) except KeyError as e: pass if len(pairs) < len(score_d1) or len(pairs) < len(score_d2): print("{} matched from {} and {} scores".format(len(pairs), len(score_d1), len(score_d2))) l1, l2 = zip(*pairs) d, p_value = stats.ttest_rel(l1, l2) print("baseline:", average(l1)) print("treatment:", average(l2)) print(d, p_value)
def processFoldResult(baselineResult, result): baselineScore = baselineResult['auc-mean'] baselineFold = baselineResult['auc-fold'] foldAuc = baselineFold - result['auc-fold'] tstat = stats.ttest_rel(baselineFold, result['auc-fold']) return (result['auc-mean'], result['auc-fold'].mean(), result['auc-fold'].std(), baselineScore - result['auc-mean'], foldAuc.mean(), foldAuc.std(), tstat[1])
def check_hypothesis(all_voca, cdf_cont, cdf_ncont, clueweb_cdf, clueweb_ctf, clueweb_df, clueweb_tf, ctf_cont, ctf_ncont, df_cont, df_ncont, tf_cont, tf_ncont, unigrams): hypo1 = [] hypo1_1 = [] hypo2_1 = [] hypo2_2 = [] not_observed_in_relevant_docs = set() for t in unigrams: if t not in all_voca: not_observed_in_relevant_docs.add(t) continue # Hypothesis 1 : P(t|controversy,R) > P(t| !controversy,R) # Hypothesis 2 : P(t|R) > P(t|BG) p1 = tf_cont[t] / ctf_cont p2 = tf_ncont[t] / ctf_ncont hypo1.append((t, (p1, p2))) p1 = df_cont[t] / cdf_cont p2 = df_ncont[t] / cdf_ncont hypo1_1.append((t, (p1, p2))) p1 = (tf_cont[t] + tf_ncont[t]) / (ctf_cont + ctf_ncont) if t not in clueweb_df: print("warning {} not in clueweb voca".format(t)) continue p2 = clueweb_tf[t] / clueweb_ctf hypo2_1.append((t, (p1, p2))) p1 = (df_cont[t] + df_ncont[t]) / (cdf_cont + cdf_ncont) p2 = clueweb_df[t] / clueweb_cdf hypo2_2.append((t, (p1, p2))) todo = [ (hypo1, "Hypothesis 1 : P(t|controversy,R) > P(t| !controversy,R)"), (hypo1_1, "Hypothesis 1 : P(t|controversy,R) > P(t| !controversy,R) by binary model" ), (hypo2_1, "Hypothesis 2 : P(t|R) > P(t|BG)"), (hypo2_2, "Hypothesis 2 : P(t|R) > P(t|BG) by binary model"), ] print("not_observed_in_relevant_docs : {} ".format( not_observed_in_relevant_docs)) for hypo, desc in todo: print(desc) terms, pairs = zip(*hypo) p1_list, p2_list = zip(*pairs) diff, p = stats.ttest_rel(p1_list, p2_list) print(diff, p) for term, pair in hypo: p1, p2 = pair print( term, "tf_cont:{} tf_ncont:{} df_cont:{}".format( tf_cont[term], tf_ncont[term], df_cont[term]), "{0:.4f} {1:.4f}".format(p1, p2))
def roi_pair_ttest(): """ compare rsfc difference between ROIs scheme: hemi-separately network-wise """ import numpy as np import pickle as pkl import pandas as pd from scipy.stats.stats import ttest_rel from cxy_hcp_ffa.lib.predefine import net2label_cole from commontool.stats import EffectSize # inputs hemis = ('lh', 'rh') roi_pair = ('pFus-face', 'mFus-face') data_file = pjoin(work_dir, 'rsfc_individual2Cole_{}.pkl') compare_name = f"{roi_pair[0].split('-')[0]}_vs_" \ f"{roi_pair[1].split('-')[0]}" # outputs out_file = pjoin(work_dir, f"rsfc_individual2Cole_{compare_name}_ttest_paired.csv") # start trg_names = list(net2label_cole.keys()) trg_labels = list(net2label_cole.values()) out_data = {'network': trg_names} es = EffectSize() for hemi in hemis: data = pkl.load(open(data_file.format(hemi), 'rb')) assert data['trg_label'] == trg_labels out_data[f'CohenD_{hemi}'] = [] out_data[f't_{hemi}'] = [] out_data[f'P_{hemi}'] = [] for trg_idx, trg_name in enumerate(trg_names): sample1 = data[roi_pair[0]][:, trg_idx] sample2 = data[roi_pair[1]][:, trg_idx] nan_vec1 = np.isnan(sample1) nan_vec2 = np.isnan(sample2) nan_vec = np.logical_or(nan_vec1, nan_vec2) print(f'#NAN in sample1 or sample2:', np.sum(nan_vec)) sample1 = sample1[~nan_vec] sample2 = sample2[~nan_vec] d = es.cohen_d(sample1, sample2) t, p = ttest_rel(sample1, sample2) out_data[f'CohenD_{hemi}'].append(d) out_data[f't_{hemi}'].append(t) out_data[f'P_{hemi}'].append(p) # save out out_data = pd.DataFrame(out_data) out_data.to_csv(out_file, index=False)
def main(): score_path1 = sys.argv[1] score_path2 = sys.argv[2] # print l1 = get_score_per_query(score_path1) l2 = get_score_per_query(score_path2) assert len(l1) == len(l2) d, p_value = stats.ttest_rel(l1, l2) print("baseline:", average(l1)) print("treatment:", average(l2)) print(d, p_value)
def t_test(arr): """ Moving window t-test function. to get two columns in to the function, one must be set as the index. Example: input_table = input_table.set_index(treatment_column, drop=False) input_table['p_value'] = input_table['controls_mean'].rolling( window=size, center=True).apply(t_test, raw=False) """ tstat, pvalue = stats.ttest_rel(arr.index, arr) return pvalue
def highlight_min(s): best_measure_idx = s[:-1].idxmax() best_mask = [ stats.ttest_rel( experiments[s.name][best_measure_idx] ['best_model_test_ll'].flatten(), experiments[s.name][name] ['best_model_test_ll'].flatten()).pvalue > 0.05 if name in experiments[s.name] else False for name, val in s.iteritems() ] best_mask = np.logical_or(best_mask, s.keys() == best_measure_idx) return ['font-weight: bold' if val else '' for val in best_mask]
def ttest(victim_run, allTheOther_runs, qrels, metric): """ Computes ttest between victim_run and all runs contained in allTheOther_runs using relevance judgements contained in qrels to compute the specified metric. Returns a dictionary d[otherRunName] = p-value. The ttest used is a two-tail student ttest on 2 related samples. """ victimAvg, victimDetails = evaluate(victim_run, qrels, metric, True) # to read the scores always in the same order keyList = list(victimDetails.keys()) victimScores = [victimDetails[k] for k in keyList] result = {} for othertrun in allTheOther_runs: otherAvg, otherDetails = evaluate(othertrun, qrels, metric, True) otherScores = [otherDetails[k] for k in keyList] _, p = stats.ttest_rel(victimScores, otherScores) result[othertrun.name] = p return result
def ttest(victim_run, allTheOther_runs, qrels, metric): """ Computes ttest between victim_run and all runs contained in allTheOther_runs using relevance judgements contained in qrels to compute the specified metric. Returns a dictionary d[otherRunName] = p-value. The ttest used is a two-tail student ttest on 2 related samples. """ victimAvg, victimDetails = evaluate(victim_run, qrels, metric, True) # to read the scores always in the same order keyList = list(victimDetails.keys()) victimScores = [ victimDetails[k] for k in keyList ] result = {} for othertrun in allTheOther_runs: otherAvg, otherDetails = evaluate(othertrun, qrels, metric, True) otherScores = [otherDetails[k] for k in keyList] _, p = stats.ttest_rel(victimScores, otherScores) result[othertrun.name] = p return result
def export_to_latext(experiments, df, col_names, row_names, file): with open(file, 'w') as file: file.write('\\begin{tabular}{l%s}\n' % "".join( (['r'] * len(df.columns)))) file.write('\\toprule\n') file.write('{}') for col in df.columns.values: file.write(' & \makecell{%s}' % col_names[col]) file.write('\\\\\n') file.write('%s ' % df.index.name.replace('_', ' ')) file.write("".join(['& '] * len(df.columns))) file.write("\\\\\n") for i in range(len(df)): row = df.iloc[i] best_measure_idx = row[:-1].idxmax() best_mask = [ stats.ttest_rel( experiments[row.name][best_measure_idx] ['best_model_test_ll'].flatten(), experiments[row.name] [name]['best_model_test_ll'].flatten()).pvalue > 0.05 if name in experiments[row.name] else False for name, val in row.iteritems() ] best_mask = np.logical_or(best_mask, row.keys() == best_measure_idx) file.write('\makecell[l]{%s}' % row_names[row.name]) for col_i, col in enumerate(df.columns.values): if np.isnan(row[col]): val = '& ' else: val = '%.3f ' % row[col] if best_mask[col_i]: val = "\\textbf{%s}" % val val = "& %s" % val file.write(val) file.write('\\\\\n') file.write('\\bottomrule\n') file.write('\\end{tabular}\n')
def compare_gdist(): import numpy as np import pandas as pd from scipy.stats.stats import ttest_rel items = ('pFus-mFus', ) data_file = pjoin(work_dir, 'gdist_peak.csv') df = pd.read_csv(data_file) for item in items: col1 = 'lh_' + item col2 = 'rh_' + item data1 = np.array(df[col1]) data2 = np.array(df[col2]) nan_vec1 = np.isnan(data1) nan_vec2 = np.isnan(data2) not_nan_vec = ~np.logical_or(nan_vec1, nan_vec2) data1 = data1[not_nan_vec] data2 = data2[not_nan_vec] print(f'#{item}: {len(data1)}') print(f'{col1} vs {col2}:', ttest_rel(data1, data2))
def main(prefix1, prefix2): topic = "abortion" tfrecord_path = "./data/ukp_tfrecord/dev_" + topic tfrecord = list(load_tfrecord(tfrecord_path)) get_correctness_arr_fn = partial(get_correctness_arr, tfrecord) prediction_list_1 = list(get_existing_predictions(prefix1, topic)) prediction_list_2 = list(get_existing_predictions(prefix2, topic)) num_runs = min(len(prediction_list_1), len(prediction_list_2)) prediction_list_1 = prediction_list_1[:num_runs] prediction_list_2 = prediction_list_2[:num_runs] c1 = flatten(lmap(get_correctness_arr_fn, prediction_list_1)) c2 = flatten(lmap(get_correctness_arr_fn, prediction_list_2)) print(len(c1)) print(len(c2)) _, p_value = stats.ttest_rel(c1, c2) print(p_value)
def student_t_test_rel(approaches, accuracy_values, save_path): # calculate the two sided paired students t-test from scipy # it compare all approaches with each other # returns nan for same dataset necause standard deviation of the differences between all pairs stands in divider student_t_test_rel_frame = pd.DataFrame() for i in range(len(approaches)): for j in range(i, len(approaches), 1): # iterate through approaches approach_i = approaches[i] approach_j = approaches[j] values_i = accuracy_values.loc[:, approach_i] values_j = accuracy_values.loc[:, approach_j] t_statistic, two_tailed_p_test = stats.ttest_rel(values_i, values_j) student_t_test_rel_frame.at[approach_i, approach_j] = two_tailed_p_test save_path.mkdir(parents=True, exist_ok=True) fig = plt.figure(figsize=(4, 2)) ax = fig.subplots() ax = sns.heatmap(student_t_test_rel_frame, ax=ax, annot=True, fmt="0.3f", cmap="autumn", vmin=0, vmax=0.05) plt.xticks(rotation=45) fig.canvas.start_event_loop(sys.float_info.min) path = save_path / 'students-test_scipy_rel.png' fig.savefig(path, bbox_inches='tight', dpi=100) plt.close(fig)
from scipy.stats import stats import math #equations from #https://www.statisticshowto.datasciencecentral.com/probability-and-statistics/t-test/ path = 'ResamplesT.txt' data = pd.read_csv(path, sep="\t") data_top = data.head() print(data_top) hive = data['HIVE-COTE'] boss = data['BOSS'] #T-test paired results = stats.ttest_rel(hive, boss) print(results) ###"Manual" step-wise procedure #Getting squared and summed differences #sumsquaresΣD Σ2 = sum(((hive) - (boss))**2) Σ = sum(hive - boss) Σ2_2 = Σ**2 #Number of samples n = len(hive) print("sumDiff") print(Σ) print("sumSquared")
elif sit == 1: coordinate_x_phase2.append(x_pos) coordinate_y_phase2.append(y_pos) elif sit == 2: coordinate_x_phase3.append(x_pos) coordinate_y_phase3.append(y_pos) #arena data input data2 = open("%d.cfg" % (number), 'rU') diameter, center_x, center_y, radius, lstripex, lstripey, rstripex, rstripey = [ int(l.split('=')[1]) for l in data2 if len(l.split('=')) > 1 ] #------------------Calculate over all fixation index----------------- fixation_index_phase1.append( distribution_method(coordinate_x_phase1, coordinate_y_phase1, center_x, center_y)) fixation_index_phase2.append( distribution_method(coordinate_x_phase2, coordinate_y_phase2, center_x, center_y)) fixation_index_phase3.append( distribution_method(coordinate_x_phase3, coordinate_y_phase3, center_x, center_y)) #------------------Student T test----------------- t_static1, p_value1 = stats.ttest_rel(fixation_index_phase1, fixation_index_phase2) t_static2, p_value2 = stats.ttest_rel(fixation_index_phase1, fixation_index_phase3) print "p-value of phase 1 & 2 is %s" % p_value1 print "p-value of phase 1 & 3 is %s" % p_value2