def sign_barplot(df, val_col, group_col, test="HSD"): if test == "HSD": result_df = tukey_hsd(df, val_col, group_col) if test == "tukey": result_df = sp.posthoc_tukey(df, val_col, group_col) if test == "ttest": result_df = sp.posthoc_ttest(df, val_col, group_col) if test == "scheffe": result_df = sp.posthoc_scheffe(df, val_col, group_col) if test == "dscf": result_df = sp.posthoc_dscf(df, val_col, group_col) if test == "conover": result_df = sp.posthoc_conover(df, val_col, group_col) #マッピングのプロファイル fig, ax = plt.subplots(1, 2, figsize=(10, 6)) cmap = ['1', '#fb6a4a', '#08306b', '#4292c6', '#c6dbef'] heatmap_args = { 'cmap': cmap, 'linewidths': 0.25, 'linecolor': '0.5', 'clip_on': False, 'square': True } sp.sign_plot(result_df, ax=ax[1], **heatmap_args) #検定結果を描画 sns.barplot(data=df, x=group_col, y=val_col, capsize=0.1, ax=ax[0]) #使ったデータを描画 plt.show()
def sign_plot(self, df, x, y): p = self.posthoc(df, x, y) heatmap_args = { 'linewidths': 0.25, 'linecolor': '0.5', 'clip_on': False, 'square': True, 'cbar_ax_bbox': [0.80, 0.35, 0.04, 0.3] } sp.sign_plot(p, **heatmap_args)
def heatmap_plot(self, p_values, axes=None, symmetric=False): """Draws heatmap plot for visualizing statistical test results. If |symmetric| is enabled, it masks out the upper triangle of the p-value table (as it is redundant with the lower triangle). """ if symmetric: mask = np.zeros_like(p_values) mask[np.triu_indices_from(p_values)] = True heatmap_args = { 'linewidths': 0.5, 'linecolor': '0.5', 'clip_on': False, 'square': True, 'cbar_ax_bbox': [0.85, 0.35, 0.04, 0.3], 'mask': mask if symmetric else None } sp.sign_plot(p_values, ax=axes, **heatmap_args)
def SignificancePlot(self, methods=None, metric='MAE'): # -- Method(s) if methods == None: methods = self.methods else: if set(methods) <= set(self.methods): raise ("Some method is wrong!") else: self.methods = methods # -- set metric self.metric = metric self.mag = self.metricSort[metric] # -- get data from dataset(s) if self.multidataset: Y = self.__getData() else: Y = self.__getDataMono() # -- Significance plot, a heatmap of p values methodNames = [x.upper() for x in self.methods] Ypd = pd.DataFrame(Y, columns=methodNames) ph = sp.posthoc_nemenyi_friedman(Ypd) cmap = ['1', '#fb6a4a', '#08306b', '#4292c6', '#c6dbef'] heatmap_args = { 'cmap': cmap, 'linewidths': 0.25, 'linecolor': '0.5', 'clip_on': False, 'square': True, 'cbar_ax_bbox': [0.85, 0.35, 0.04, 0.3] } plt.figure(figsize=(5, 4)) sp.sign_plot(ph, cbar=True, **heatmap_args) plt.title('p-vals') fname = 'SP_' + self.metric + '.pdf' plt.savefig(fname) plt.show()
def heatmap_plot(p_values, axes=None, symmetric=False, **kwargs): """ Heatmap for p_values """ if symmetric: mask = np.zeros_like(p_values) mask[np.triu_indices_from(p_values)] = True heatmap_args = { 'linewidths': 0.5, 'linecolor': '0.5', 'clip_on': False, 'square': True, 'cbar_ax_bbox': [0.85, 0.35, 0.04, 0.3], 'mask': mask if symmetric else None, } heatmap_args.update(kwargs) return sp.sign_plot(p_values, ax=axes, **heatmap_args)
def field_longitudinal(fn, thres): pp = PdfPages('figures/field_longitudinal_gamma={}_thres={}.pdf'.format( gamma, thres)) years = ["1996", "2001", "2007", "2012"] def ind(yr): return years.index(yr) fig, ax = plt.subplots(1, 1) ax.grid(False) # color-blind spectrum: http://personal.sron.nl/~pault/colourschemes.pdf colors = [ "#88ccee", "#44aa99", "#999933", "#DDCC77", "#CC6677", "#882255", "#AA4499" ] colors = colors[0:len(krange)] bars = [] rows = [] # Crude hm = {v: [] for v in krange} sm = 0 total = {ind(y): 0 for y in years} byyear = {ind(y): [] for y in years} def process_vc(tup): x, yr, sample = tup # Run the misfits calculation mf = s.misfits(x, krange, gamma=gamma) sr = {k: s.compute(x, k, gamma=gamma) for k in krange} return x, mf, yr, sample, sr cached_process = memory.cache(process_vc) stf = open('figures/strains.out', 'w') for year in tqdm(years): fdata = Parallel(n_jobs=num_cores)( delayed(cached_process)(tup) for tup in read_field_data_year(fn, [year])) for x, mf, yr, sample, sr in fdata: mf = np.array(mf) # Find first drop below threshold tqdm.write("Length:{}. Interesting bases:{}. Misfits: {}".format( len(x), len([xx for xx in x if 1.0 - 1e-6 > xx > 1e-6]), mf)) best = sum(mf > thres) if best >= len(mf): continue best += 1 if best not in hm: hm[best] = [] byyear[ind(year)] += [best] hm[best] += [ind(year)] total[ind(year)] += 1 # Print out the strain sequences of at least 5% proportion in a sample assert best in sr print(sr[best][1], max(sr[best][1])) stf.write("DOMSTR\t{}\t{:.2f}%\n".format(yr, max(sr[best][1]))) for i, f in enumerate(sr[best][1]): if f >= 0.05: stf.write("STRAIN\t{}\t{}\t{}\t{:.2f}%\t{}\n".format( sample, yr, i, 100.0 * sr[best][1][i], "".join([ "{}".format(int(z)) if not np.isnan(z) else "N" for z in sr[best][0][i] ]))) stf.close() of = open( 'figures/field_longitudinal_gamma={}_thres={}.txt'.format( gamma, thres), 'w') for year in years: of.write("Year %s\t" % year + "\t".join([ "%d=%d" % (v, len(list(filter(lambda y: y == ind(year), hm[v])))) for v in krange ])) of.write("\tAverage (including 5+):\t" + "%2.4f" % (np.mean(byyear[ind(year)])) + "\tAverage (excluding 5):\t" + "%2.4f" % (np.mean([sc for sc in byyear[ind(year)] if sc < 5]))) of.write("\tMedian:\t" + "%2.4f" % (np.median(byyear[ind(year)])) + "\n") m = [hm[v] for v in krange] plt.ylabel('% samples') plt.xlabel('Survey year') plt.xticks(np.arange(len(years)), years) plt.ylim([0, 100]) weights = np.array([[100.0 / float(total[int(y)]) for y in hm[v]] for v in krange]) bins = np.arange(len(years) + 1) - 0.5 hatch = '/' _, _, patches = plt.hist( m, bins=bins, histtype='bar', stacked=True, weights=weights, rwidth=0.5, color=colors, label=[ "%s%d strain%s" % ("=" if v != krange[-1] else "$\geq$", v, "s" if v != krange[0] else "") for v in krange ]) #, hatch=hatch) plt.legend( bbox_to_anchor=(1.04, 0.5), loc="center left", borderaxespad=0, prop={'size': 10}, ) mm = np.array(m) lk = { year: { v: len(list(filter(lambda y: y == ind(year), hm[v]))) for v in krange } for year in years } for j, bc in enumerate(patches): for i, p in enumerate(bc): #l = np.sum(np.array(byyear[i]) == len(patches)-j-1) l = lk[years[i]][krange[j]] if l == 0: continue h1 = p.get_height() print("{} {}".format(p, l)) z = 100.0 * l / float(sum(lk[years[i]].values())) ax.text(p.get_x() + p.get_width() / 2., p.get_y() + h1 / 2., "%d%%" % int(z), ha="center", va="center", color="black", fontsize=12, fontweight="bold") pp.savefig(bbox_inches="tight") pp.close() for y in years: of.write("%s: length %d\n" % (y, len(byyear[ind(y)]))) of.write("{}\n".format(byyear[ind("1996")])) of.write("H1\t{}\t1996 vs 2001:\t{}\n".format( thres, sts.mannwhitneyu(byyear[ind("1996")], byyear[ind("2001")]))) of.write("H2\t{}\t2007 vs 2012:\t{}\n".format( thres, sts.mannwhitneyu(byyear[ind("2007")], byyear[ind("2012")]))) x = [byyear[ind(y)] for y in years] #pc = sp.posthoc_conover(x, val_col='values', group_col='groups', p_adjust='holm') kr = sts.kruskal(*x) of.write("Kruskal-Willis:\n{}\n".format(kr)) pc = sp.posthoc_conover(x, val_col='values', group_col='groups', p_adjust='fdr_tsbky') of.write("Conover:\n{}\n".format(pc)) # Format: diagonal, non-significant, p<0.001, p<0.01, p<0.05 cmap = ['1', '#fb6a4a', '#08306b', '#4292c6', '#c6dbef'] heatmap_args = { 'cmap': cmap, 'linewidths': 0.25, 'linecolor': '0.5', 'clip_on': False, 'square': True, 'cbar_ax_bbox': [0.80, 0.35, 0.04, 0.3] } sp.sign_plot(pc, **heatmap_args) of.close()
print('statistic: ' + str(t)) print('pvalue: ' + str(p)) print(' ') pc = sp.posthoc_nemenyi_friedman(data_MAE_df) cmap = ['1', '#fb6a4a', '#08306b', '#4292c6', '#c6dbef'] heatmap_args = { 'cmap': cmap, 'linewidths': 0.25, 'linecolor': '0.5', 'clip_on': False, 'square': True, 'cbar_ax_bbox': [0.80, 0.35, 0.04, 0.3] } plt.figure() sp.sign_plot(pc, **heatmap_args) plt.title('Nemenyi Test MAE') data_CC_df = pd.DataFrame(data_CC, columns=all_methods) print('\nFriedman Test CC:') #print(ss.friedmanchisquare(*data_CC.T)) #print(' ') t, p, ranks_cc, piv_cc = ft(data_CC[:, 0], data_CC[:, 1], data_CC[:, 2], data_CC[:, 3], data_CC[:, 4], data_CC[:, 5], data_CC[:, 6], data_CC[:, 7]) avranksCC = list(np.divide(ranks_cc, n_datasets)) print('statistic: ' + str(t)) print('pvalue: ' + str(p)) print(' ') pc = sp.posthoc_nemenyi_friedman(data_CC_df)
from denn import * from scipy.stats import kruskal import scikit_posthocs as sp import pylustrator pylustrator.start() path = Path('../../data/results/experiment4') # fitness plots no_nn = pd.read_csv(path/'no_nn_mof.csv') nn_normal_rand = pd.read_csv(path/'nn-normal-random_mof.csv') nn_dist_rand = pd.read_csv(path/'nn-distribution-random_mof.csv') nn_dropout_rand= pd.read_csv(path/'nn-dropout-random_mof.csv') labels = ['no_nn', 'nn_normal_rand', 'nn_dist_rand', 'nn_drop_rand'] x=np.array([no_nn.mof, nn_normal_rand.mof, nn_dist_rand.mof,nn_dropout_rand.mof]) stat, p = kruskal(no_nn,nn_normal_rand,nn_dist_rand,nn_dropout_rand) pc=sp.posthoc_conover(x, p_adjust='holm', val_col='values', group_col='groups') print('Statistics=%.3f, p=%.3f' % (stat, p)) print(pc) heatmap_args = {'linewidths': 0.25, 'linecolor': '0.5', 'clip_on': False, 'square': True, 'cbar_ax_bbox': [0.80, 0.35, 0.04, 0.3]} ax,cbar = sp.sign_plot(pc, **heatmap_args) ax.set_xticklabels(labels) ax.set_yticklabels(labels) plt.show()
def plot_test_results(results): heatmap_args = {'linewidths': 0.25, 'linecolor': '0.5', 'clip_on': False, 'square': True, 'cbar_ax_bbox': [0.80, 0.35, 0.04, 0.3]} return sp.sign_plot(results, **heatmap_args)
def compareBases(mainFolders, folders, tp, rep, lastGen, plotType, saveFile): logCol = 1 file = 'evolution' if ((tp == 'evol') or (tp == 'evolBase')): logCol = 1 file = 'evolution' variable = 'Fitness' elif ((tp == 'nModules') or (tp == 'nModulesBase')): logCol = 2 file = 'bestFeatures' variable = 'Number of Modules' elif ((tp == 'brokenConn') or (tp == 'brokenConnBase')): logCol = 11 file = 'meanFeatures' variable = 'Number of Broken Connections' elif ((tp == 'nConn') or (tp == 'nConnBase')): logCol = 19 file = 'bestFeatures' variable = 'Average Connections per Module' dfAll = pd.DataFrame() data = [] for l in range(0, len(mainFolders)): dfBase = pd.DataFrame() for k in range(0, len(folders)): #nGenerations = minGenerationCount(mainFolders[l],folders[k],rep) data.clear() for i in range(0, rep): csv_file = open('./' + mainFolders[l] + '/' + folders[k] + 'xL/' + str(i + 1) + '/log/' + file + '.txt') csv_reader = csv.reader(csv_file) oldRows = list(csv_reader) rows = [] for row in oldRows: rows.append(row[0].split(" - ")) #print(rows) #print(nGenerations) if (lastGen): data.append(float(rows[-1][logCol])) else: line_count = 0 for row in rows: #print(row[logCol]) data.append(float(row[logCol])) line_count = line_count + 1 #if line_count >= nGenerations: # break dfPartial = pd.DataFrame(data, columns=[variable]) dfPartial['Length'] = folders[k] #print(dfPartial) dfBase = dfBase.append(dfPartial, ignore_index=True) #ax1.set_title('Length x'+folders[k]) dfBase['Base'] = mainFolders[l] dfAll = dfAll.append(dfBase, ignore_index=True) #print(dfAll) #dfAll.boxplot(column='Fitness',by='Length',ax=ax1,grid=False,notch=False) #dfAll.groupby('Length',sort=True).boxplot() #print(dfAll) #print([group['Fitness'].values for name,group in dfAll.groupby(['Length','Base'])]) if ((tp != 'nModulesBase') and (tp != 'brokenConnBase') and (tp != 'evolBase') and (tp != 'nConnBase')): print( scp_stats.kruskal(*[ group[variable].values for name, group in dfAll.groupby(['Length', 'Base']) ])) else: print( scp_stats.kruskal(*[ group[variable].values for name, group in dfAll.groupby(['Base']) ])) if ((tp != 'brokenConn') and (tp != 'nModulesBase') and (tp != 'brokenConnBase') and (tp != 'evolBase') and (tp != 'nConnBase')): #Connover postHoc = sp.posthoc_conover([ group[variable].values for name, group in dfAll.groupby(['Length', 'Base']) ]) #print(postHoc) #Mann-Whitney #postHoc = sp.posthoc_mannwhitney([group['Fitness'].values for name,group in dfAll.groupby(['Length','Base'])]) #print(postHoc) heatmap_args = { 'linewidths': 0.25, 'linecolor': '0.5', 'clip_on': False, 'square': True, 'cbar_ax_bbox': [0.80, 0.35, 0.04, 0.3] } sp.sign_plot(postHoc, **heatmap_args) fig = plt.figure(figsize=(15, 10)) y = variable if ((tp == 'nModulesBase') or (tp == 'brokenConnBase') or (tp == 'evolBase') or (tp == 'nConnBase')): x = 'Base' order = mainFolders if (plotType == 'box'): #ax = sns.boxplot(data=dfAll, x=x, y=y,order=order,showfliers=False) ax = sns.boxplot(data=dfAll, x=x, y=y, order=order) elif (plotType == 'swarm'): ax = sns.swarmplot(data=dfAll, x=x, y=y, order=order) elif (plotType == 'strip'): ax = sns.stripplot(data=dfAll, x=x, y=y, order=order) elif (plotType == 'violin'): ax = sns.violinplot(data=dfAll, x=x, y=y, order=order) else: x = "Length" hue = "Base" order = folders if (plotType == 'box'): #ax = sns.boxplot(data=dfAll, x=x, y=y,order=order,hue = hue,showfliers=False) ax = sns.boxplot(data=dfAll, x=x, y=y, order=order, hue=hue) elif (plotType == 'swarm'): ax = sns.swarmplot(data=dfAll, x=x, y=y, order=order, hue=hue) elif (plotType == 'strip'): ax = sns.stripplot(data=dfAll, x=x, y=y, order=order, hue=hue) elif (plotType == 'violin'): ax = sns.violinplot(data=dfAll, x=x, y=y, order=order, hue=hue) #dfAll.boxplot(column='Fitness',by=['Length','Base'],ax=ax1,grid=False,notch=False) #if(tp=='evol'): #ax.set_ylim(-0.1,11) plt.savefig(saveFile + tp + plotType + '.eps', bbox_inches="tight") plt.show()
def boxplotResults(mainFolder, folders, tp, rep, indiv, lastGen, plotType, saveFile, annotatePairs): logCol = 1 file = 'evolution' if (tp == 'evol'): logCol = 1 file = 'evolution' variable = 'Fitness' elif (tp == 'nModules'): logCol = 2 file = 'bestFeatures' variable = 'Number of Modules' elif (tp == 'brokenConn'): logCol = 11 file = 'meanFeatures' variable = 'Number of Broken Connections' elif (tp == 'nConn'): logCol = 19 file = 'bestFeatures' variable = 'Average Connections per Module' #dfAll = pd.DataFrame(columns=folders) dfAll = pd.DataFrame() if (not indiv): fig = plt.figure(figsize=(15, 10)) ax1 = fig.gca() data = [] for k in range(0, len(folders)): if (indiv): fig = plt.figure() ax1 = fig.gca() #nGenerations = minGenerationCount(mainFolder,folders[k],rep) data.clear() for i in range(0, rep): csv_file = open('./' + mainFolder + '/' + folders[k] + 'xL/' + str(i + 1) + '/log/' + file + '.txt') csv_reader = csv.reader(csv_file) oldRows = list(csv_reader) rows = [] for row in oldRows: rows.append(row[0].split(" - ")) #print(row) #print(rows) #print(nGenerations) if (lastGen): data.append(float(rows[-1][logCol])) else: line_count = 0 for row in rows: #print(row[logCol]) data.append(float(row[logCol])) line_count = line_count + 1 #if line_count >= nGenerations: # break dfPartial = pd.DataFrame(data, columns=[variable]) dfPartial['Length'] = folders[k] #print(dfPartial) dfAll = dfAll.append(dfPartial, ignore_index=True) #ax1.set_title('Length x'+folders[k]) #print(dfAll) #print(dfAll) #dfAll.boxplot(column='Fitness',by='Length',ax=ax1,grid=False,notch=False) #dfAll.groupby('Length',sort=True).boxplot() #if(tp=='evol'): # ax1.set_ylim(-0.1,11) x = "Length" y = variable order = folders if (plotType == 'box'): #ax = sns.boxplot(data=dfAll, x=x, y=y,order=order,showfliers=False) ax = sns.boxplot(data=dfAll, x=x, y=y, order=order) elif (plotType == 'swarm'): ax = sns.swarmplot(data=dfAll, x=x, y=y, order=order) elif (plotType == 'strip'): ax = sns.stripplot(data=dfAll, x=x, y=y, order=order) elif (plotType == 'violin'): ax = sns.violinplot(data=dfAll, x=x, y=y, order=order) if (tp != 'brokenConn'): add_stat_annotation(ax, data=dfAll, x=x, y=y, order=order, box_pairs=annotatePairs, test='Mann-Whitney', text_format='star', loc='outside', verbose=2) plt.savefig(saveFile + tp + plotType + '.eps', bbox_inches="tight") plt.show() print( scp_stats.kruskal(*[ group[variable].values for name, group in dfAll.groupby('Length') ])) #Connover #postHoc = sp.posthoc_conover(dfAll,val_col='Fitness',group_col='Length') #print(postHoc) if (tp != 'brokenConn'): #Mann-Whitney postHoc = sp.posthoc_mannwhitney(dfAll, val_col=variable, group_col='Length') #print(postHoc) heatmap_args = { 'linewidths': 0.25, 'linecolor': '0.5', 'clip_on': False, 'square': True, 'cbar_ax_bbox': [0.80, 0.35, 0.04, 0.3] } sp.sign_plot(postHoc, **heatmap_args)
for system_order in ['finite', 'infinite']: print(system_order.upper()) for eval in range(len(function_evals_of_interest)): algorithms_at_fes = [] for algorithm in ['sa', 'acfsa', 'pso', 'aiwpso', 'acor', 'baacor']: print(algorithm) # Load test costs of a given metaheuristic for a given system, considering some number of objective function evaluations base_filename = './results/' + algorithm + '_' + system_order test_costs_mat = np.load(base_filename + '_test_costs.npy') test_costs_of_interest = test_costs_mat[:, evals_mask] costs_fe = test_costs_of_interest[:, eval] algorithms_at_fes.append(list(costs_fe)) print( str(function_evals_of_interest[eval]) + ': \t' + str(np.mean(costs_fe))) algorithms_at_fes = np.array(algorithms_at_fes) print('\n Statistical significance') print(np.shape(algorithms_at_fes)) print('Friedman p-val = ' + str(scipy.stats.friedmanchisquare(*algorithms_at_fes)[1]) + '\n\n') nm_posthoc = sp.posthoc_nemenyi_friedman(algorithms_at_fes.T) plt.figure() sp.sign_plot(nm_posthoc, **heatmap_args) plt.show() print('\n')
# prepare the pairwise plots pc_voc = sp.posthoc_conover(df_voc, val_col='score', group_col='method') pc_acc = sp.posthoc_conover(df_acc, val_col='score', group_col='method') f = plt.figure(figsize=(10, 10)) # Format: diagonal, non-significant, p<0.001, p<0.01, p<0.05 cmap = ['1', '#ff2626', '#ffffff', '#fcbdbd', '#ff7272'] heatmap_args = { 'cmap': cmap, 'linewidths': 0.25, 'linecolor': '0.5', 'clip_on': False, 'square': True, 'cbar_ax_bbox': [0.90, 0.35, 0.04, 0.3] } sp.sign_plot(pc_voc, **heatmap_args) f.tight_layout() f.savefig("pairwise_vocals.pdf", bbox_inches='tight', dpi=300) f = plt.figure(figsize=(8.709677419, 8.709677419)) # Format: diagonal, non-significant, p<0.001, p<0.01, p<0.05 cmap = ['1', '#ff2626', '#ffffff', '#fcbdbd', '#ff7272'] heatmap_args = { 'cmap': cmap, 'linewidths': 0.25, 'linecolor': '0.5', 'clip_on': False, 'square': True, 'cbar_ax_bbox': [0.90, 0.35, 0.04, 0.3]
def plot(scores, ax): # Pretty plot of significance heatmap_args = {'linewidths': 1, 'linecolor': '0.5', 'square': True, 'cbar_ax_bbox': [0.82, 0.35, 0.04, 0.3]} sp.sign_plot(scores, ax=ax, **heatmap_args)
project_path = os.path.join(os.path.join('Research', 'BCBL', 'SOCIALCON ')) data_path = os.path.join(project_path, '2_empirical', '1_fMRI', '1_decoding', '1_binomial', '0_data') df_likable_Co = pd.read_csv('postHocTests_likableness_conceptCV.csv', sep=';', header=None) ROIs = ['LTL', 'IFG', 'Prec', 'ATL', 'aPFC', 'Ins', 'ACC', 'PCC', 'V1'] # Set up styling presets sns.set_context("poster", font_scale=1, rc={"lines.linewidth": 3}) sns.set_style('white') # Create the figure fig, ax = plt.subplots(figsize=(15, 8)) # cmap = ['1', '#FFFFFF', '#B4171F', '#E5323B', '#EE767C'] # cmap = ['1', '#FFFFFF', '#C13944', '#D5727A', '#E7ADB2'] # cmap = ['1', '#FFFFFF', '#131A21', '#2E4052', '#496683'] cmap = ['1', '#FFFFFF', '#43494F', '#666F79', '#8D959F'] heatmap_args = { 'cmap': cmap, 'linewidths': 3, 'linecolor': '0.05', 'clip_on': False, 'square': True, 'cbar_ax_bbox': [0.80, 0.35, 0.04, 0.3] } sp.sign_plot(df_likable_Co, **heatmap_args) plt.show()