def venn(inp, names, title="venn", folder=''): """ Plots a venn diagram using the pyvenn package Args: ----- inp: list[set()] of sets of values (e.g. [(1,2,3,4),(2,3),(1,3,4,5)]) names: list[str] of the name of each leaf title: str the plot title folder: str of location where to save the plot, won't save if empty """ labels = pyvenn.get_labels(inp, fill=['number', 'logic']) if len(inp) == 2: fig, ax = pyvenn.venn2(labels, names=names) elif len(inp) == 3: fig, ax = pyvenn.venn3(labels, names=names) elif len(inp) == 4: fig, ax = pyvenn.venn4(labels, names=names) elif len(inp) == 5: fig, ax = pyvenn.venn5(labels, names=names) elif len(inp) == 6: fig, ax = pyvenn.venn6(labels, names=names) else: raise ValueError('need to be between 2 to 6') ax.set_title(title) if folder: fig.savefig(folder + title + '_venn.pdf') fig.show() plt.pause(0.1)
def generate_venn_diagram(nb_dim, files, names, sep, label): """Cette fonction fait l'appel du fichier venn.py (qui doit être présent dans le même dossier que generate_venn.py) pour générer les diagrammes de Venn. :param: nombre de dimensions présent dans le diagramme de Venn (1 < nb_dim <= 6) :param: files: liste des chemins d'accès aux fichiers contenant les résultats des modèles :param: names: liste des noms des modèles (doit être en adéquation avec la liste des modèles files) :param: sep: séparateur présent dans filename séparant les id-instance des label :label: label que l'on souhaite extraire """ if nb_dim < 2 or nb_dim > 6: print( 'Le diagramme de Venn ne peut être généré que pour un nombre de dimensions supérieur à un et inférieur ou égal à 6.' ) else: i = 0 models = [] while i < nb_dim: models.append( get_id_instances_title(files[i], sep=sep, label=label)) i += 1 labels = venn.get_labels(models) if nb_dim == 2: fig, ax = venn.venn2(labels, names=names) plt.show() elif nb_dim == 3: fig, ax = venn.venn3(labels, names=names) plt.show() elif nb_dim == 4: fig, ax = venn.venn4(labels, names=names) plt.show() elif nb_dim == 5: fig, ax = venn.venn5(labels, names=names) plt.show() elif nb_dim == 6: fig, ax = venn.venn6(labels, names=names) plt.show() else: print( 'Le diagramme de Venn ne peut être généré que pour un nombre de dimensions supérieur à un et inférieur ou égal à 6.' )
def uniq_share_cal(self, infile): for line in infile: s = line.rstrip() orths = list(s.split('\t')) ids = [] for x in xrange(len(orths)): item = orths[x].split('|')[0] if item in self.ids: ids.append(item) target = set(ids) for key in self.data: if set(key) == target: self.data[key] += 1 labels = {} for key in self.data: key2 = '' for x in xrange(len(self.ids)): if self.ids[x] in key: key2 += '1' else: key2 += '0' labels[key2] = self.data[key] if len(self.ids) == 3: fig, ax = venn.venn3(labels, names=self.ids) fig.savefig('gene_family_3taxa.pdf') elif len(self.ids) == 4: fig, ax = venn.venn4(labels, names=self.ids) fig.savefig('gene_family_4taxa.pdf') elif len(self.ids) == 5: fig, ax = venn.venn5(labels, names=self.ids) fig.savefig('gene_family_5taxa.pdf') elif len(self.ids) == 6: fig, ax = venn.venn6(labels, names=self.ids) fig.savefig('gene_family_6taxa.pdf')
n34 = len(intersection(tp[ids[2]], tp[ids[3]])) n35 = len(intersection(tp[ids[2]], tp[ids[4]])) n45 = len(intersection(tp[ids[3]], tp[ids[4]])) area5 = len(tp[ids[4]]) area4 = len(tp[ids[3]]) area3 = len(tp[ids[2]]) area2 = len(tp[ids[1]]) area1 = len(tp[ids[0]]) venn_diag_list = [area1, area2, area3, area4, area5, n12, n13, n14, n15,n23, n24, n25, n34, n35, n45, n123, n124, n125, n134, n135, n145, n234, n235, n245, n345, n1234, n1235, n1245, n1345, n2345, n12345] print('Overlap list needed for VennDiagram in R') print(venn_diag_list) out1.write('\n\n## Overlap list needed for VennDiagram in R:\n\n%s' % venn_diag_list) if plot.lower() == 't' or plot.lower() == 'true': if n_comparing == 5: labels = venn.get_labels([tp[ids[0]], tp[ids[1]], tp[ids[2]], tp[ids[3]], tp[ids[4]]], fill = ['number']) fig, ax = venn.venn5(labels, names = ids) elif n_comparing == 4: labels = venn.get_labels([tp[ids[0]], tp[ids[1]], tp[ids[2]], tp[ids[3]]], fill = ['number']) fig, ax = venn.venn4(labels, names = ids) elif n_comparing == 3: labels = venn.get_labels([tp[ids[0]], tp[ids[1]], tp[ids[2]]], fill = ['number']) fig, ax = venn.venn3(labels, names = ids) elif n_comparing == 2: labels = venn.get_labels([tp[ids[0]], tp[ids[1]]], fill = ['number']) fig, ax = venn.venn2(labels, names = ids) filename = save+'_pred_compared.pdf' fig.savefig(filename)
n25 = len(intersection(comp[ids[1]], comp[ids[4]])) n34 = len(intersection(comp[ids[2]], comp[ids[3]])) n35 = len(intersection(comp[ids[2]], comp[ids[4]])) n45 = len(intersection(comp[ids[3]], comp[ids[4]])) area5 = len(comp[ids[4]]) area4 = len(comp[ids[3]]) area3 = len(comp[ids[2]]) area2 = len(comp[ids[1]]) area1 = len(comp[ids[0]]) venn_diag_list = [area1, area2, area3, area4, area5, n12, n13, n14, n15,n23, n24, n25, n34, n35, n45, n123, n124, n125, n134, n135, n145, n234, n235, n245, n345, n1234, n1235, n1245, n1345, n2345, n12345] print('Overlap list needed for VennDiagram in R') print(venn_diag_list) #out1.write('\n\n## Overlap list needed for VennDiagram in R:\n\n%s' % venn_diag_list) if n_comparing == 5: labels = venn.get_labels([comp[ids[0]], comp[ids[1]], comp[ids[2]], comp[ids[3]], comp[ids[4]]], fill = ['number']) fig, ax = venn.venn5(labels, names = ids) elif n_comparing == 4: labels = venn.get_labels([comp[ids[0]], comp[ids[1]], comp[ids[2]], comp[ids[3]]], fill = ['number']) fig, ax = venn.venn4(labels, names = ids) elif n_comparing == 3: labels = venn.get_labels([comp[ids[0]], comp[ids[1]], comp[ids[2]]], fill = ['number']) fig, ax = venn.venn3(labels, names = ids) elif n_comparing == 2: labels = venn.get_labels([comp[ids[0]], comp[ids[1]]], fill = ['number']) fig, ax = venn.venn2(labels, names = ids) filename = save+'_pred_compared.pdf' fig.savefig(filename)
# ipython notebook requires this # %matplotlib inline # python console requires this import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import sys sys.path.append('lib') import venn sparqldata = set(line.strip() for line in open('out/dataset-full_name-sparql-total.csv')) cypherdata = set(line.strip() for line in open('out/dataset-full_name-cypher-total.csv')) graphqldata = set(line.strip() for line in open('out/dataset-full_name-graphql-total.csv')) gremlindata = set(line.strip() for line in open('out/dataset-full_name-gremlin-total.csv')) data = [sparqldata, cypherdata, graphqldata, gremlindata] labels = venn.get_labels(data, fill=['number']) fig, ax = venn.venn4(labels, names=['SPARQL', 'Cypher', 'GraphQL', 'Gremlin']) fig.savefig('vennrepos.pdf', bbox_inches='tight') plt.close()
plt.ylabel('Number of kinase') plt.legend() plt.show() infl = pd.read_excel('results/Staturosporine_TPP_data_Savitski/Inflect.xlsx') infl_sig = np.intersect1d(infl.iloc[:, 0].values.astype(str), infl.iloc[:, 1].values.astype(str)) tpp_m_sig = tpp_m['Protein_ID'].values tpp_s_sig = tpp_s['Protein_ID'].values[:100] npp_sig = npp['id'].values[:100] labels = venn.get_labels([infl_sig, tpp_m_sig, tpp_s_sig, npp_sig], fill=['number']) plt.figure(dpi=300) fig, ax = venn.venn4(labels, names=['Inflect', 'TPP_MeltCurve', 'TPP_Spline', 'NPARC']) fig.show() infl_kin = [i for i in infl_sig if i in kins] tpp_m_kin = [i for i in tpp_m_sig if i in kins] tpp_s_kin = [i for i in tpp_s_sig if i in kins] npp_kin = [i for i in npp_sig if i in kins] labels = venn.get_labels([infl_kin, tpp_m_kin, tpp_s_kin, npp_kin], fill=['number']) plt.figure(dpi=300) fig, ax = venn.venn4(labels, names=['Inflect', 'TPP_MeltCurve', 'TPP_Spline', 'NPARC']) fig.show()
# Get labels for the overlapping sections of the venn labels_up = venn.get_labels(pos, fill=['number']) labels_down = venn.get_labels(neg, fill=['number']) labels_tot = venn.get_labels(tot, fill=['number']) # Combine labels for up- and downregulated genes into a single label labels_combined = {} for key in labels_up: label_up = labels_up[key] label_down = labels_down[key] combined_label = f'⇧{label_up}\n⇩{label_down}' labels_combined[key] = combined_label # Create Venn diagram for the up- und downregulated genes only fig, ax = venn.venn4(labels_combined, names=order, colors=colors) #for i in range(4): # curr_ell = ax.get_children()[i] # curr_ell.set_facecolor(colors[i]) # curr_ell.set_edgecolor(colors[i]) #plt.draw() fig.savefig('venn_up_and_downregulated.svg') fig.savefig('venn_up_and_downregulated.pdf') fig.clear() # Create Venn diagram for all differentially expressed genes, regardless of directionality fig, ax = venn.venn4(labels_tot, names=order, colors=colors) fig.savefig('venn_total.svg') fig.savefig('venn_total.pdf')
#together_4 = len(zhang_2018 & wang_2012 & lukasik_2013 & ours) # #zwl_4 = len(zhang_2018 & wang_2012 & lukasik_2013) - together_4 #zwo_4 = len(zhang_2018 & wang_2012 & ours) - together_4 #zlo_4 = len(zhang_2018 & ours & lukasik_2013) - together_4 #wlo_4 = len(ours & wang_2012 & lukasik_2013) - together_4 #%% import venn plt.figure(figsize=(7, 7)) labels = venn.get_labels([ours, zhang_2018, wang_2012, lukasik_2013]) fig, ax = venn.venn4(labels, names=[ 'Present Study', 'Zhang et al. 2018', 'Wang et al. 2012', 'Lukasik et al. 2013' ]) fig.savefig('previous_bol_mirnas_venn_wours.svg', bbox_inches='tight', format='svg') fig.show() #%% # Use eulerAPE instead for proportional # from matplotlib_venn import venn3 from matplotlib import pyplot as plt plt.figure(figsize=(7, 7))
elif "full_model" in results_file: all_labels.append("Full model") for r_label in results: # print unique TP to this result tps = set(results[r_label]["all"]) & set(gold_pairs["all"]) for r_label2 in results: if r_label2 == r_label: continue tps = tps - set(results[r_label2]["all"]) print() print("unique to {}:".format(r_label)) print(len(tps)) print(tps) print() print(all_labels) # print(results) labels = venn.get_labels(all_pairs, fill=["number"]) if len(all_pairs) == 2: fig, ax = venn.venn2(labels, names=all_labels) elif len(all_pairs) == 3: fig, ax = venn.venn3(labels, names=all_labels) elif len(all_pairs) == 4: fig, ax = venn.venn4(labels, names=all_labels) elif len(all_pairs) == 5: fig, ax = venn.venn5(labels, names=all_labels) elif len(all_pairs) == 6: fig, ax = venn.venn6(labels, names=all_labels) fig.savefig("{}.png".format("_".join(all_labels)), bbox_inches="tight") plt.close()
] for i, loc in enumerate(cancer_locs): relevant_tmp_df = sub_tmp_df_both[sub_tmp_df_both.cancer_loc == loc] labels = venn.get_labels( [relevant_tmp_df[c].dropna().index for c in relevant_nb_clones], fill=['number', 'logic']) labels_short = {k: v.split(': ')[1] for k, v in labels.items()} label_names = [ c.split('_')[0].replace('pyclone', 'PyClone').replace( 'sciclone', 'SciClone').replace('expands', 'Expands') for c in relevant_nb_clones ] label_names_fig = ['' for l in label_names] ax = venn.venn4(labels_short, names=label_names_fig, colors=[colors_protected[i] for i in label_names], fontsize_text=22, fontsize_number=25, ax=axes[ii, i]) nb_samples_total = relevant_tmp_df[relevant_nb_clones].dropna( axis='index', how='all').shape[0] ax.set_title('{loc} (n={n})'.format(loc=loc, n=nb_samples_total), fontsize=40) if i == 0: ax.text(-0.1, 0.4, '{}'.format(folder).replace( 'merged', 'public and protected\nintersection'), fontsize=40, rotation=90, va='center', ha='center')
def linear_regression_venn4(gene, grouping, subgroups, showplot=False): # move to correct directory os.chdir('%s\\Linear_Regression' % grouping) # pull in gene lists allpatients = pd.read_csv('short_summary_All Patients.csv', index_col = 0) group1 = pd.read_csv('short_summary_%s.csv' % subgroups[0], index_col = 0) group2 = pd.read_csv('short_summary_%s.csv' % subgroups[1], index_col = 0) group3 = pd.read_csv('short_summary_%s.csv' % subgroups[2], index_col = 0) group4 = pd.read_csv('short_summary_%s.csv' % subgroups[3], index_col = 0) allgenes = set(allpatients.index.tolist()) group1_genes = set(group1.index.tolist()) group2_genes = set(group2.index.tolist()) group3_genes = set(group3.index.tolist()) group4_genes = set(group4.index.tolist()) # plot venn diagram of gene list intersections fig, ax = plt.subplots(figsize = (12,12)) args = {'font': 'Arial', 'group_labels': 24, 'patch_labels': 19} venn4([group1_genes, group2_genes, group3_genes, group4_genes], (subgroups), ax=ax, fig=fig, **args) plt.title('Genes Correlated to %s by Subgroup' % gene, fontname='Arial', fontsize=30) textbox = dict(horizontalalignment = 'center', verticalalignment = 'center', fontname = 'Arial', fontsize = 22) plt.text(0.5, 0.97, '%d total genes' % (len(allgenes)), textbox, transform=ax.transAxes) plt.tight_layout() if showplot == True: plt.show() fig.savefig('Venn_Diagram.png', transparent = True) fig.savefig('Venn_Diagram.eps', transparent = True) # create and save total gene list with TRUE and FALSE variables for sorting groups = {subgroups[0]: set(group1_genes), subgroups[1]: set(group2_genes), subgroups[2]: set(group3_genes), subgroups[3]: set(group4_genes)} summary = {} summary['All Patients'] = True for group in subgroups: summary[group] = {} for gene in allgenes: if gene in groups[group]: summary[group][gene] = True else: summary[group][gene] = False df = pd.DataFrame.from_dict(summary) df.to_csv("Venn_Diagram.csv") # # create a short list with unique values for each group as well as overall intersections group1_alone = df[(df[subgroups[0]] == True ) & (df[subgroups[1]] == False) & (df[subgroups[2]] == False) & (df[subgroups[3]] == False)].index.values.tolist() group2_alone = df[(df[subgroups[1]] == True ) & (df[subgroups[0]] == False) & (df[subgroups[2]] == False) & (df[subgroups[3]] == False)].index.values.tolist() group3_alone = df[(df[subgroups[2]] == True ) & (df[subgroups[1]] == False) & (df[subgroups[0]] == False) & (df[subgroups[3]] == False)].index.values.tolist() group4_alone = df[(df[subgroups[3]] == True ) & (df[subgroups[0]] == False) & (df[subgroups[1]] == False) & (df[subgroups[2]] == False)].index.values.tolist() intersection = df[(df[subgroups[0]] == True ) & (df[subgroups[1]] == True) & (df[subgroups[2]] == True) & (df[subgroups[3]] == True)].index.values.tolist() venn_summary = {subgroups[0]: group1_alone, subgroups[1]: group2_alone, subgroups[2]: group3_alone, subgroups[3]: group4_alone, 'intersection': intersection} with open('Venn_Summary.txt', 'w') as f: for key in subgroups: f.write('%s\t%d genes\t' % (key, len(venn_summary[key]))) for gene in venn_summary[key]: f.write('%s\t' % gene) f.write('\n') f.write('Intersection\t%d genes\t' % (len(venn_summary['intersection']))) for gene in venn_summary['intersection']: f.write('%s\t' % gene) f.write('\n') elevate() elevate()
def create_venn4(labels, names): fig, ax = venn.venn4(labels, names=names) #fig.savefig('venn4.png', bbox_inches='tight') plt.show()
def create_venn_diagram(df, venn_labels, figsize=[10, 9], percent_only=False): """ df - dataframe with data venn_labels - ordered dictionary keys are the column names to be used in the venn diagram - columns should only have 0s/1s values are the "pretty" label for the column """ sets = list() set_names = list() for c in venn_labels: idx = df[c] == 1 sets.append(set(df.loc[idx, 'icustay_id'])) set_names.append(venn_labels[c]) if len(venn_labels) > 4: print('Only supports up to a 4 set venn diagrams') return if len(venn_labels) == 4: if percent_only: fill = 'percent_only' else: fill = 'percent' venn4(sets, set_names, show_plot=False, fontdict={ 'fontsize': 15, 'fontweight': 'normal' }, fill=fill, figsize=figsize) leg = plt.legend('off') leg.remove() plt.show() else: if percent_only: string_formatter = lambda x: '{:2.1f}%'.format(x * 100.0 / df. shape[0]) else: string_formatter = lambda x: '{:,}\n{:2.1f}%'.format( x, x * 100.0 / df.shape[0]) plt.figure(figsize=figsize) plt.rcParams.update({'font.size': 15}) venn3(sets, set_names, subset_label_formatter=string_formatter) plt.show() # excluded IDs set_other = set(df['icustay_id'].values).difference(*sets) # Print other numbers for above venn diagram print('{} patients ({:2.1f}%) satisfied all criteria.'.format( len(set.intersection(*sets)), len(set.intersection(*sets)) * 100.0 / df.shape[0])) print('{} patients ({:2.1f}%) satisfied no criteria.'.format( len(set_other), len(set_other) * 100.0 / df.shape[0])) # pair-wise counts for i, c1 in enumerate(venn_labels): for j, c2 in enumerate(venn_labels): if i <= j: continue else: set_both = set.intersection(sets[i], sets[j]) print('{:2.1f}% ({}) - {} & {}'.format( len(set_both) * 100.0 / df.shape[0], len(set_both), c1, c2)) """
rgi_set = set( pandas.read_csv(rgi, sep="\t", header=None, names=["db", "sample", "antibiotic"]).query( 'antibiotic == "%s"' % antibio)["sample"].tolist()) mykrobe_set = set( pandas.read_csv(mykrobe, sep="\t", header=None, names=["db", "sample", "antibiotic"]).query( 'antibiotic == "%s"' % antibio)["sample"].tolist()) tbprofiler_set = set( pandas.read_csv(tbprofiler, sep="\t", header=None, names=["db", "sample", "antibiotic"]).query( 'antibiotic == "%s"' % antibio)["sample"].tolist()) walker_set = set( pandas.read_csv(walker, sep="\t", header=None, names=["db", "sample", "antibiotic"]).query( 'antibiotic == "%s"' % antibio)["sample"].tolist()) labels = venn.get_labels([rgi_set, mykrobe_set, tbprofiler_set, walker_set], fill=['number']) fig, ax = venn.venn4(labels, names=['rgi', 'mykrobe', 'tbprofiler', 'walker']) fig.savefig(snakemake.output[0])
from collections import Counter from itertools import combinations import ast from operator import itemgetter import operator import pandas as pd import csv import numpy as np import matplotlib.pyplot as plt import venn labels = venn.get_labels( [range(10), range(5, 15), range(3, 8), range(8, 17)], fill=['number', 'logic']) fig, ax = venn.venn4(labels, names=['list 1', 'list 2', 'list 3', 'list 4']) fig.show()
matplotlib.use('Agg') import matplotlib.pyplot as plt import venn labels = venn.get_labels([range(10), range(5, 15)], fill=['number', 'logic']) fig, ax = venn.venn2(labels, names=['list 1', 'list 2']) fig.savefig('venn2.png', bbox_inches='tight') plt.close() labels = venn.get_labels([range(10), range(5, 15), range(3, 8)], fill=['number', 'logic']) fig, ax = venn.venn3(labels, names=['list 1', 'list 2', 'list 3']) fig.savefig('venn3.png', bbox_inches='tight') plt.close() labels = venn.get_labels([range(10), range(5, 15), range(3, 8), range(8, 17)], fill=['number', 'logic']) fig, ax = venn.venn4(labels, names=['list 1', 'list 2', 'list 3', 'list 4']) fig.savefig('venn4.png', bbox_inches='tight') plt.close() labels = venn.get_labels([range(10), range(5, 15), range(3, 8), range(8, 17), range(10, 20)], fill=['number', 'logic']) fig, ax = venn.venn5(labels, names=['list 1', 'list 2', 'list 3', 'list 4', 'list 5']) fig.savefig('venn5.png', bbox_inches='tight') plt.close() labels = venn.get_labels([range(10), range(5, 15), range(3, 8), range(8, 17), range(10, 20), range(13, 25)], fill=['number', 'logic']) fig, ax = venn.venn6(labels, names=['list 1', 'list 2', 'list 3', 'list 4', 'list 5', 'list 6']) fig.savefig('venn6.png', bbox_inches='tight') plt.close()