def plot_venn(t1=None, t2=None, t3=None, ax=None, set_colors=('r', 'b', 'k')): """input: 2 or 3 tuples: (list/set, name_to_display) """ assert len(t1) == len(t2) == 2 if t3: venn3( [set(t[0]) for t in [t1,t2,t3]], tuple( ['%s\n(%s)'%(t[1], len(set(t[0])) ) for t in [t1,t2,t3]]) , set_colors=set_colors, alpha=0.5,ax=ax) else: venn2( [set(t[0]) for t in [t1,t2]], tuple( ['%s\n(%s)'%(t[1], len(set(t[0])) ) for t in [t1,t2]]), set_colors=set_colors[0:2],alpha=0.5, ax=ax)
def plotVennDiagram(fdr_threshold): groundtruth = pd.read_csv(groundtruth_fn, names=['formula', 'adduct']) sim_layer_formulas = groundtruth.groupby('adduct') def top_results(df, threshold, adduct): """ results with estimated FDR < threshold and positive MSM """ fdr = df['fdr'] if 'fdr' in df else df['est_fdr'] return df[(fdr < threshold) & (df['adduct'] == adduct) & (df['img'] * df['iso'] * df['moc'] > 0)] for i, adduct in enumerate(orig['fdr'].keys()): plt.subplot(len(orig['fdr']), 1, i + 1) plt.title("Annotation overlap for {} (FDR threshold = {})" .format(adduct, fdr_threshold)) orig_res = pd.read_csv(orig['fdr'][adduct]) sim_res = pd.read_csv(sim['fdr'][adduct]) db = set(orig_res['formula']) orig_top = set(top_results(orig_res, fdr_threshold, adduct)['formula']) sim_top = set(top_results(sim_res, fdr_threshold, adduct)['formula']) venn3([orig_top, sim_top, set(sim_layer_formulas.get_group(adduct)['formula']) & db], ("Orig. annotations", "Sim. annotations", "Sim. groundtruth & DB"))
def mi_venn_snps(df_snp_1, df_snp_2, df_snp_3, file='venn_snp.png'): import matplotlib_venn from matplotlib import pyplot as plt n_A = len( set(df_snp_1[df_snp_1['SNP'].isin([1,2])].FID.tolist() ) ) n_B = len( set(df_snp_2[df_snp_2['SNP'].isin([1,2])].FID.tolist() ) ) n_C = len( set(df_snp_3[df_snp_3['SNP'].isin([1,2])].FID.tolist() ) ) n_AB = len( set(df_snp_1[df_snp_1['SNP'].isin([1,2])].FID.tolist() ) \ & set(df_snp_2[df_snp_2['SNP'].isin([1,2])].FID.tolist() ) ) n_AC = len( set(df_snp_1[df_snp_1['SNP'].isin([1,2])].FID.tolist() ) \ & set(df_snp_3[df_snp_3['SNP'].isin([1,2])].FID.tolist() ) ) n_BC = len( set(df_snp_2[df_snp_2['SNP'].isin([1,2])].FID.tolist() ) \ & set(df_snp_3[df_snp_3['SNP'].isin([1,2])].FID.tolist() ) ) n_ABC = len( set(df_snp_1[df_snp_1['SNP'].isin([1,2])].FID.tolist() ) \ & set(df_snp_2[df_snp_2['SNP'].isin([1,2])].FID.tolist() ) \ & set(df_snp_3[df_snp_3['SNP'].isin([1,2])].FID.tolist() ) ) n_Abc = n_A - n_AC - n_AB + n_ABC n_aBc = n_B - n_BC - n_AB + n_ABC n_ABc = n_AB - n_ABC n_abC = n_C - n_BC - n_AC + n_ABC n_AbC = n_AC - n_ABC n_aBC = n_BC - n_ABC n_ABC = n_ABC print n_Abc, n_aBc, n_ABc, n_abC, n_AbC, n_aBC, n_ABC # (Abc, aBc, ABc, abC, AbC, aBC, ABC) matplotlib_venn.venn3(subsets = (n_Abc, n_aBc, n_ABc, n_abC, n_AbC, n_aBC, n_ABC), set_labels = ('SNP_1', 'SNP_2', 'SNP_3')) plt.savefig(file) plt.close()
def plot_venn3(cp,nc,bx,cp_nc,cp_bx,nc_bx,cnb,plotName=None) : fig = plt.figure() venn3(subsets={'100':cp,'010':nc,'001':bx,'110':cp_nc,'101':cp_bx,'011':nc_bx,'111':cnb},set_labels = ('Salmonella enterica', 'Escherichia coli', 'Staphylococcus aureus')) plt.title("Venn diagram") if plotName : plt.savefig(plotName) else : plt.show()
def main(): """ main function """ args = parse_args() f_chimp = pysam.Samfile(args.f_chimp, "rb") f_bono = pysam.Samfile(args.f_bono, "rb") f_human = pysam.Samfile(args.f_human, "rb") # stores the ID of each mapped reads in each samfile in a list reads_chimp = list() reads_bono = list() reads_human = list() for read in f_chimp: reads_chimp.append(read.qname) for read in f_bono: reads_bono.append(read.qname) for read in f_human: reads_human.append(read.qname) # find out if there is duplicate in the read IDs # find the intersections between the three overlap_bono_chimp = set(reads_bono).intersection(set(reads_chimp)) overlap_bono_human = set(reads_bono).intersection(set(reads_human)) overlap_chimp_human = set(reads_chimp).intersection(set(reads_human)) overlap_bono_chimp_human = (set(reads_bono). intersection(set(reads_chimp)). intersection(set(reads_human))) #venn3() takes a list of 7 numbers: #venn[6] -> number of reads mapped to human, chimp and bonobo #venn[5] -> number of reads mapped to chimp and human but not bonobo #venn[4] -> number of reads mapped to bonobo and human but not chimp #venn[3] -> number of reads mapped to chimp and bonobo but not human #venn[2] -> number of reads mapped only to human #venn[1] -> number of reads mapped only to chimp #venn[0] -> number of reads mapped only to bonobo venn = [0]*7 venn[6] = overlap_bono_chimp_human venn[5] = overlap_chimp_human - overlap_bono_chimp_human venn[4] = overlap_bono_human - overlap_bono_chimp_human venn[3] = overlap_bono_chimp - overlap_bono_chimp_human venn[2] = f_human.mapped - venn[4] - venn[5] - overlap_bono_chimp_human venn[1] = f_chimp.mapped - venn[3] - venn[5] - overlap_bono_chimp_human venn[0] = f_bono.mapped - venn[3] - venn[4] - overlap_bono_chimp_human #plot and save venn diagrams venn3(subsets=venn, set_labels = ("Bonobo", "Chimpanzee", "Human")) plt.title(args.plot_title) plt.show()
def mi_venn(df_snp_1, df_snp_2, df_pheno, file='venn.png'): import matplotlib_venn from matplotlib import pyplot as plt pheno = df_pheno.pheno_name pheno_total = float(len(set(df_pheno[df_pheno['pheno_'+pheno]==2].FID.tolist()) ) ) snp_codes = [1,2] print "Venn Diagram" p_1 = float( len( set(df_snp_1[df_snp_1['SNP'].isin([1,2])].FID.tolist()) \ & set(df_pheno[df_pheno['pheno_'+pheno]==2].FID.tolist()) ) ) p_2 = float( len( set(df_snp_2[df_snp_2['SNP'].isin([1,2])].FID.tolist()) \ & set(df_pheno[df_pheno['pheno_'+pheno]==2].FID.tolist()) ) ) n_ABC = float( len( set(df_snp_1[df_snp_1['SNP'].isin([1,2])].FID.tolist() ) \ & set(df_snp_2[df_snp_2['SNP'].isin([1,2])].FID.tolist() ) \ & set(df_pheno[df_pheno['pheno_'+pheno]==2].FID.tolist()) ) ) n_AB = len( set(df_snp_1[df_snp_1['SNP'].isin([1,2])].FID.tolist() ) \ & set(df_snp_2[df_snp_2['SNP'].isin([1,2])].FID.tolist() ) ) n_AC = len( set(df_snp_1[df_snp_1['SNP'].isin([1,2])].FID.tolist() ) \ & set(df_pheno[df_pheno['pheno_'+pheno]==2].FID.tolist()) ) n_BC = len( set(df_snp_2[df_snp_2['SNP'].isin([1,2])].FID.tolist() ) \ & set(df_pheno[df_pheno['pheno_'+pheno]==2].FID.tolist()) ) n_A = len( set(df_snp_1[df_snp_1['SNP'].isin([1,2])].FID.tolist() ) ) n_B = len( set(df_snp_2[df_snp_2['SNP'].isin([1,2])].FID.tolist() ) ) n_Abc = n_A - n_AC - n_AB + n_ABC n_aBc = n_B - n_BC - n_AB + n_ABC n_ABc = n_AB - n_ABC n_abC = pheno_total - n_BC - n_AC + n_ABC n_AbC = n_AC - n_ABC n_aBC = n_BC - n_ABC n_ABC = n_ABC print "n_pheno: {}\t n_SNP_common: {}\t n_SNP_rare: {}\n".format(pheno_total, n_A, n_B) print n_Abc, n_aBc, n_ABc, n_abC, n_AbC, n_aBC, n_ABC # (Abc, aBc, ABc, abC, AbC, aBC, ABC) matplotlib_venn.venn3(subsets = (n_Abc, n_aBc, n_ABc, n_abC, n_AbC, n_aBC, n_ABC), set_labels = ('SNP_common', 'SNP_rare', 'Pheno')) plt.savefig(file) plt.close()
def venn3_sets(set_a, set_b, set_c, set_labels, ax): # order of values for Venn diagram: (Abc, aBc, ABc, abC, AbC, aBC, ABC) Abc = len(set_a.difference(set_b.union(set_c))) aBc = len(set_b.difference(set_a.union(set_c))) abC = len(set_c.difference(set_a.union(set_b))) ABc = len(set_a.intersection(set_b).difference(set_c)) AbC = len(set_a.intersection(set_c).difference(set_b)) aBC = len(set_b.intersection(set_c).difference(set_a)) ABC = len(set_a.intersection(set_b).intersection(set_c)) venn3(subsets = (Abc, aBc, ABc, abC, AbC, aBC, ABC), set_labels=set_labels, ax=ax)
def main(list_sequence_names, output_prefix): sequence_list = [] labels = [] for [filename, label] in list_sequence_names: sequence_list.append(set(seq_IO.read_sequences(filename))) labels.append(label) fig, ax = pconv.create_ax(1, 1) venn3(sequence_list, set_labels = labels, ax=ax[0,0]) pconv.save_fig(fig, output_prefix, '_'.join(labels)+"_venn", 10, 10, size=12)
def plot_venn(List_of_sets, Set_labels, Main = "I forgot to give this plot a name.", Out_File = "", Custom_overlap_numbers = []): """ Given a list of sets, generate a venn diagram in Out_Dir. Arguments: List_of_sets (two or three only!) Set_labels: Label for each circle Main: Title of plot Out_File: Where should plot be saved? And what should the file be named? Parent directory expected to already exist... This will overwrite plots if they already exist Custom_overlap_numbers: optional. If you want to supply your own 3 overlap sets: [# in first, # in second, # in both] """ if not os.path.isdir(os.path.dirname(Out_File)): raise ValueError(os.path.dirname(Out_File)+" <--- PATH DOESN'T EXIST") if len(Custom_overlap_numbers) != 0 and len(Custom_overlap_numbers) != 3: raise ValueError("Custom overlap only works for 2 circle venn diagrams at the moment...") if len(Custom_overlap_numbers) == 3: plt.figure() venn2(subsets={'10': Custom_overlap_numbers[0], '01': Custom_overlap_numbers[1], '11': Custom_overlap_numbers[2]}, set_labels = Set_labels) plt.title(Main) plt.savefig(Out_File) return if len(List_of_sets) == 2: if len(Set_labels) != 2: raise ValueError("Set_labels needs to be the same length as the number of sets...") # Default figure dimensions... plt.figure() venn2(List_of_sets,Set_labels) plt.title(Main) plt.savefig(Out_File) elif len(List_of_sets) == 3: if len(Set_labels) != 3: raise ValueError("Set_labels needs to be the same length as the number of sets...") # Default figure dimensions... plt.figure() venn3(List_of_sets,Set_labels) plt.title(Main) plt.savefig(Out_File) else: raise ValueError("List_of_sets needs to be of length 2 or 3.")
def draw_venn3(A, B, C, sets): venn = [0]*7 venn[2] = len(sets["AB"]) - len(sets["ABC"]) venn[4] = len(sets["AC"]) - len(sets["ABC"]) venn[5] = len(sets["BC"]) - len(sets["ABC"]) venn[6] = len(sets["ABC"]) venn[0] = len(sets["A"]) - venn[2] - venn[4] - venn[6] venn[1] = len(sets["B"]) - venn[2] - venn[5] - venn[6] venn[3] = len(sets["C"]) - venn[4] - venn[5] - venn[6] labelA = A + " (" + str(len(sets["A"])) + ")" labelB = B + " (" + str(len(sets["B"])) + ")" labelC = C + " (" + str(len(sets["C"])) + ")" venn3(subsets=venn, set_labels = (labelA, labelB, labelC)) plt.show()
def test_pr_28(): import matplotlib_venn as mv v = mv.venn3((1, 2, 3, 4, 5, 6, 7), subset_label_formatter = None) assert v.get_label_by_id('010').get_text() == '2' v = mv.venn3((1, 2, 3, 4, 5, 6, 7), subset_label_formatter = lambda x: 'Value: %+0.3f' % (x / 100.0)) assert v.get_label_by_id('010').get_text() == 'Value: +0.020' v = mv.venn2((1, 2, 3), subset_label_formatter = None) assert v.get_label_by_id('01').get_text() == '2' v = mv.venn2((1, 2, 3), subset_label_formatter = lambda x: 'Value: %+0.3f' % (x / 100.0)) assert v.get_label_by_id('01').get_text() == 'Value: +0.020' v = mv.venn3_unweighted((1, 2, 3, 4, 5, 6, 7), subset_label_formatter = lambda x: 'Value: %+0.3f' % (x / 100.0)) assert v.get_label_by_id('010').get_text() == 'Value: +0.020' v = mv.venn2_unweighted((1, 2, 3), subset_label_formatter = lambda x: 'Value: %+0.3f' % (x / 100.0)) assert v.get_label_by_id('01').get_text() == 'Value: +0.020'
def draw(set1, set2, set3, label1, label2, label3): set1 = set(set1) set2 = set(set2) if label3: set3 = set(set3) v = venn3([set1,set2, set3], (label1, label2, label3)) plt.title('Venn diagram for hubs: ' + label1 + "," + label2 +"," + label3, fontsize=20) else: v = venn2([set1, set2], (label1, label2)) plt.title('Venn diagram for hubs:' + label1 + "," + label2, fontsize=20) # if v.get_label_by_id('110'): # plt.annotate(percent_of(set1,set2)+"% of " +label1 , xy=v.get_label_by_id('110').get_position() - np.array([0.15, 0.10])) # plt.annotate(percent_of(set2,set1)+"% of " +label2 , xy=v.get_label_by_id('110').get_position() - np.array([0.15, 0.15])) if v.get_patch_by_id('100'): v.get_patch_by_id('100').set_color("blue") if v.get_patch_by_id('010'): v.get_patch_by_id('010').set_color("red") if v.get_patch_by_id('110'): v.get_patch_by_id('110').set_color("purple") if label3 and v.get_patch_by_id('001'): v.get_patch_by_id('001').set_color("green") if v.get_patch_by_id('111'): v.get_patch_by_id('111').set_color("black") gca().set_axis_bgcolor('white') gca().set_axis_on() plt.show()
def venn(df1, df2, df3=None, labels=None, ix1=None, ix2=None, ix3=None, return_intersection=False): try: import matplotlib_venn as mplv except: ImportError("To plot venn diagrams, install matplotlib-venn package: pip install matplotlib-venn") if labels is None: labels = ["A", "B", "C"] s1 = _process_ix(df1.index, ix1) s2 = _process_ix(df2.index, ix2) if df3 is not None: s3 = _process_ix(df3.index, ix3) if df3 is not None: vn = mplv.venn3([s1,s2,s3], set_labels=labels) intersection = s1 & s2 & s3 else: vn = mplv.venn2([s1,s2], set_labels=labels) intersection = s1 & s2 ax = plt.gca() if return_intersection: return ax, intersection else: return ax
def venn3_plot(sets, set_labels=('A', 'B', 'C'), set_colors=None, alpha=1.0, circle_on=False): """ venn3 plot based on venn3 and venn3_circles from matplotlib_venn. Example: -------- set1 = set(['A', 'B', 'C', 'D']) set2 = set(['B', 'C', 'D', 'E']) set3 = set(['C', 'D',' E', 'F', 'G']) venn3_plot([set1, set2, set3], ('Set1', 'Set2', 'Set3')) """ from matplotlib_venn import venn3, venn3_circles if circle_on: v = venn3_circles(subsets=(1,1,1,1,1,1,1), alpha=0.8, color="r") if set_colors is None: set_colors = favorite_colors[:3] v = venn3(subsets=(1,1,1,1,1,1,1), set_labels=set_labels, set_colors=set_colors, alpha=alpha) v.get_label_by_id('111').set_text(len(sets[0]&sets[1]&sets[2])) v.get_label_by_id('110').set_text(len(sets[0]&sets[1]-sets[2])) v.get_label_by_id('101').set_text(len(sets[0]-sets[1]&sets[2])) v.get_label_by_id('100').set_text(len(sets[0]-sets[1]-sets[2])) v.get_label_by_id('011').set_text(len(sets[2]&sets[1]-sets[0])) v.get_label_by_id('010').set_text(len(sets[1]-sets[2]-sets[0])) v.get_label_by_id('001').set_text(len(sets[2]-sets[1]-sets[0])) return v
def draw_venn(title, names, numbers, out): if len(numbers) == 7: if numbers[0] + numbers[2] + numbers[4] + numbers[6] == 0: numbers = [ numbers[1], numbers[3], numbers[5] ]; names = [ names[1], names[2] ]; elif numbers[1] + numbers[2] + numbers[5] + numbers[6] == 0: numbers = [ numbers[0], numbers[3], numbers[4] ]; names = [ names[0], names[2] ]; elif numbers[3] + numbers[4] + numbers[5] + numbers[6] == 0: numbers = [ numbers[0], numbers[1], numbers[2] ]; names = [ names[0], names[1] ]; #fi #fi plt.cla(); plt.figure(figsize=(10,10)) if len(numbers) == 7: plt.cla(); plt.figure(figsize=(10,10)) v = venn3(subsets=numbers, set_labels = names) c = venn3_circles(subsets=numbers, linestyle='dashed') else: v = venn2(subsets = numbers, set_labels = names); c = venn2_circles(subsets = numbers, linestyle='dashed'); #fi plt.title(title) plt.savefig(out);
def makeVenn3(self,truthDict,snr,addon=''): type1 = '.svg' type2 = '.png' f = \ os.path.join(self.destDir,self.fname+'.c'+str(self.ch)+'.snr_'+str(snr)+addon) reqdVennOrder = [('map','not','not'), ('not','map','not'), ('map','map','not'), ('not','not','map'), ('map','not','map'), ('not','map','map'), ('map','map','map')] valSet = list() for vSet in reqdVennOrder: val = truthDict[vSet] valSet.append(val) unmapped = truthDict[('not','not','not')] # Making venn diagram plt.figure(figsize=( 5,5)) v = \ venn3(subsets=valSet,set_labels=('Cycle1:Mock','Cycle2:Mock','Cycle3:Edman')) c = venn3_circles(subsets=valSet,ls='solid') txt = 'unmapped='+str(unmapped)+'\n SNR='+str(snr) plt.title('Peak Mapping :'+self.fname + '.'+self.frame+'\n channel:'+str(self.ch)) plt.figtext( 0.7,0.1,txt) plt.savefig(f+type1,dpi=300) plt.savefig(f+type2,dpi=300) plt.close()
def draw_venn_3group(group1, group2, group3, group_labels, save_addr, title='Comparision of significant 3 gene set'): ''' venn3([set(lst1), set(lst2), set(lst3)], set_labels = ('Drug A responsive genes', 'Drug B responsive genes', 'Drug C responsive genes')) $ plt.title('Network of drug responsive genes: The great update\n') ''' venn3([set(group1), set(group2), set(group3)], set_labels=group_labels) plt.title(title) plt.savefig(save_addr) plt.show()
def create_venn(three_col_array, col_names): """ create venn diagram from an array where rows are entries and columns are sets. if i,j is true then entry i has label j make sure exactly three columns """ num_cols = three_col_array.shape[1] assert num_cols == 3, "do not have three columns" assert len(three_col_array.shape) == 2, "not 2d" sets = [] for col in three_col_array.T: sets.append(set(np.nonzero(col)[0].tolist())) venn_input = _create_ven_numbers(sets) venn3(subsets = tuple(venn_input), set_labels = col_names) plt.show()
def process_multiple(names): writer = pd.ExcelWriter('/Users/agatorano/Code/metascape/metascape.org/media/%s'%names[-1]) files_=[] #print(names[:len(names)-1]) for n in names[:len(names)-1]: files_.append(pd.ExcelFile('/Users/agatorano/Code/metascape/metascape.org/media/%s'%n)) for f,i in zip(files_,range(len(files_))): df = f.parse(f.sheet_names[0]) df.to_excel(writer,'Sheet%s'%i) writer.save() xls = xlrd.open_workbook(r'/Users/agatorano/Code/metascape/metascape.org/media/%s'%names[-1], on_demand=True) name = names[-1] data = [] genes = [] for sheet in xls.sheet_names(): list_ = get_gid(name,sheet) list_ = add_annotation(list_) genes.append(set([x[0] for x in list_])) #print(genes) data.append(add_cols(list_,name,sheet)) writer = pd.ExcelWriter('/Users/agatorano/Code/metascape/metascape.org/media/'+name) for i in range(len(data)): save_excel(data[i],writer,i+1) plt.figure(figsize=(7,7)) now = datetime.datetime.now() path = 'img/'+now.strftime("%Y/%m/%d/venn%H_%M_%S.png") img = '/Users/agatorano/Code/metascape/metascape.org/media/'+path output_directory = os.path.dirname(img) if not os.path.exists(output_directory): os.makedirs(output_directory) if(len(genes)==3): venn3(genes, ('File1', 'File2', 'File3')) plt.savefig(img) elif(len(genes)==2): venn2(genes, ('File1', 'File2')) plt.savefig(img) writer.save() return data,path
def generate_venn(mem, preds): run1 = preds[0] run2 = preds[1] run1_tp = [] run1_fp = [] run2_tp = [] run2_fp = [] bothpos = [] bothtruepos = [] for index in range(len(mem)): if mem[index] == 0: if run1[index] == 1: run1_fp += [index] if run2[index] == 1: run2_fp += [index] else: # mem(index) == 0 if run1[index] == 1: run1_tp += [index] if run2[index] == 1: run2_tp += [index] run1pos = run1_fp + run1_tp run2pos = run2_fp + run2_tp for mem in run1pos: if mem in run2pos: bothpos += [mem] for mem in run1_tp: if mem in run2_tp: bothtruepos += [mem] s1 = len(run1_fp) s2 = len(run2_fp) s3 = len(bothpos) - len(bothtruepos) s4 = 0 s5 = len(run1_tp) s6 = len(run2_tp) s7 = len(bothtruepos) venn3(subsets=(s1, s2, s3, s4, s5, s6, s7), set_labels=("Run 1", "Run 2", "TP")) plt.text(-0.70, 0.30, "FP") plt.text(0.61, 0.30, "FP") plt.show()
def joonista_venn(järjend): try: Hulk3=järjend[2] Hulk2=järjend[1] Hulk1=järjend[0] except: Hulk2=järjend[1] Hulk1=järjend[0] Hulk3={""} if Hulk3=={""} and Hulk2!="" and Hulk1!="": venn = venn2([Hulk1,Hulk2], ('Esimene hulk', 'Teine hulk')) try: venn.get_label_by_id('100').set_text('\n'.join(Hulk1-Hulk2)) except: pass try: venn.get_label_by_id('110').set_text('\n'.join(Hulk1&Hulk2)) except: pass try: venn.get_label_by_id('010').set_text('\n'.join(Hulk2-Hulk1)) except: pass else: venn=venn3([Hulk1, Hulk2, Hulk3], ('Esimene hulk', 'Teine hulk', 'Kolmas hulk')) try: venn.get_label_by_id('111').set_text('\n'.join(Hulk1&Hulk2&Hulk3)) except: pass try: venn.get_label_by_id('100').set_text('\n'.join(Hulk1-Hulk2-Hulk3)) except: pass try: venn.get_label_by_id('110').set_text('\n'.join(Hulk1&Hulk2-Hulk3)) except: pass try: venn.get_label_by_id('010').set_text('\n'.join(Hulk2-Hulk3-Hulk1)) except: pass try: venn.get_label_by_id('101').set_text('\n'.join(Hulk1&Hulk3-Hulk2)) except: pass try: venn.get_label_by_id('011').set_text('\n'.join(Hulk2&Hulk3-Hulk1)) except: pass try: venn.get_label_by_id('001').set_text('\n'.join(Hulk3-Hulk2-Hulk1)) except: pass plt.show() #Testimiseks
def plot_venn3_set(dict_of_sets, overlap_name, folder): ''' Makes 3 way venn from 3 sets. Saves to file. Inputs ------ dict_of_sets: dictionary of sets to overlap overlap_name: string with name of overlap folder: output folder Returns ------- None ''' folder = make_folder(f"{val_folder(folder)}venn_plot") plt.clf() plt.figure(figsize=(7, 7)) font = { 'family': 'sans-serif', 'weight': 'normal', 'size': 16, } plt.rc('font', **font) set_list = [] set_names = [] for name, setlist in dict_of_sets.items(): set_list.append(setlist) set_names.append(name.replace('_', ' ')) # make venn venn_plot = venn3(subsets=set_list, set_labels=set_names) patch = ['100', '110', '101', '010', '011', '001', '111'] for p in patch: if venn_plot.get_patch_by_id(p): venn_plot.get_patch_by_id(p).set_color('none') venn_plot.get_patch_by_id(p).set_alpha(.4) venn_plot.get_patch_by_id(p).set_edgecolor('none') # make c = venn3_circles(subsets=set_list) colors_list = ['green', 'blue', 'grey'] for circle, color in zip(c, colors_list): circle.set_edgecolor(color) circle.set_alpha(0.8) circle.set_linewidth(4) plt.title(f"{overlap_name.replace('_', ' ')} Overlaps") plt.tight_layout() plt.savefig(f"{folder}{overlap_name.replace(' ', '_')}-overlap.svg") plt.savefig(f"{folder}{overlap_name.replace(' ', '_')}-overlap.png", dpi=300) plt.close()
def main_tsv(args): if len(args.tsvfiles) == 3: hits = defaultdict(list) for i, file_ in enumerate(args.tsvfiles): f = open(file_, "r") sample_id = args.names[i] for line in f: pred, reference = line.split("\t")[:2] hits[sample_id].append(reference) a = set(hits[args.names[0]]) b = set(hits[args.names[1]]) c = set(hits[args.names[2]]) # if args.inexact: # check_inexact(a,b,c) print(len(a), len(b), len(c)) a_not_b_c = a - (b | c) b_not_a_c = b - (a | c) a_b_not_c = (a & b) - c c_not_a_b = c - (a | b) a_c_not_b = (a & c) - b b_c_not_a = (b & c) - a a_b_c = a & b & c r = venn3([a, b, c], (args.names[0], args.names[1], args.names[2])) plt.savefig(args.outfile) elif len(args.tsvfiles) == 2: hits = defaultdict(list) for i, file_ in enumerate(args.tsvfiles): f = open(file_, "r") sample_id = args.names[i] for line in f: pred, reference = line.split("\t")[:2] hits[sample_id].append(reference) a = set(hits[args.names[0]]) b = set(hits[args.names[1]]) # if args.inexact: # check_inexact(a,b,c) print(len(a), len(b)) a_not_b = a - b b_not_a = b - a a_b = a & b r = venn2([a, b], (args.names[0], args.names[1])) plt.savefig(args.outfile) else: print("only 2 or 3 sets!")
def construct_venn(counts, venn_file, files, level): #2group venn if (len(counts.keys()) == 3): subsets = (counts['10'], counts['01'], counts['11']) set_labels = () for i in sorted(files.keys()): set_labels += (files[i]) textstr = "" for i in sorted(files.keys()): textstr = textstr + str(i) + ": " + files[i] + "\n" fig, ax = plt.subplots(figsize=(10, 6), nrows=1, ncols=1) v = venn2(subsets=subsets, set_labels=set_labels, ax=ax) props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) ax.text(-0.3, 0.15, textstr, transform=ax.transAxes, fontsize=14, verticalalignment='top', bbox=props) ax.set_title(level + " counts over the different bin codes") plt.tight_layout() fig.savefig(venn_file) plt.close(fig) #3group venn if (len(counts.keys()) == 7): subsets = (counts['100'], counts['010'], counts['110'], counts['001'], counts['101'], counts['011'], counts['111']) set_labels = () for i in sorted(files.keys()): set_labels += (files[i], ) textstr = "" for i in sorted(files.keys()): textstr = textstr + str(i) + ": " + files[i] + "\n" fig, ax = plt.subplots(figsize=(10, 6), nrows=1, ncols=1) v = venn3(subsets=subsets, set_labels=set_labels, ax=ax) props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) ax.text(-0.3, 0.15, textstr, transform=ax.transAxes, fontsize=12, verticalalignment='top', bbox=props) ax.set_title(level + " counts over the different bin codes") plt.tight_layout() fig.savefig(venn_file) plt.close(fig) return
def main(): parser = ArgumentParser( description= '''Create a Venn diagram plot of the Metacyc database for categories Biosynthesis, Generation of Precursor Metabolites and Energy, and Degradation/Utilization/Assimilation''' ) parser.add_argument( "-m", "--metacyctab", required=True, help="Tabular file of Metacyc database. See process_metacyc_db.py.") parser.add_argument( "-o", "--outfile", default="metacyc_venn.png", help="Name of outfile plot. Defaults to metacyc_venn.png") args = parser.parse_args() metacyc_df = pd.read_csv(args.metacyctab, header=0, sep="\t", index_col=0) core = ["Generation of Precursor Metabolites and Energy"] syn = ["Biosynthesis"] deg = ["Degradation/Utilization/Assimilation"] ## Make Venn diagram c = set(metacyc_df.loc[(metacyc_df.Category1.isin(core)), "Pathway"]) b = set(metacyc_df.loc[(metacyc_df.Category1.isin(syn)), "Pathway"]) d = set(metacyc_df.loc[(metacyc_df.Category1.isin(deg)), "Pathway"]) BC = b.intersection(c).difference(d) BD = b.intersection(d).difference(c) CD = c.intersection(d).difference(b) BCD = CD.intersection(b) B = b.difference(c.union(d)) C = c.difference(d.union(b)) D = d.difference(b.union(c)) #make subsets as: (B,C,BC,D,BD,CD,BCD) venn3(subsets=(len(B), len(C), len(BC), len(D), len(BD), len(CD), len(BCD)), set_labels=["Biosynthesis", "Core", "Degradation"]) plt.title("#Pathway") plt.savefig(args.outfile, dpi=300, width=150, height=150, bbox_inches="tight")
def progress_venn(df_ground_truths, method=None, plot=False): """ Use the progress df to plot a venn diagram of the datapoints by ground truths. arguments: --- method: "Lateralising" or "Localising" --- Ali Alim-Marvasti July 2019 """ if method == None: print('progress_venn needs a method specified') return elif method == 'Lateralising': method = 'Lateralising Datapoints' elif method == 'Localising': method = 'Localising Datapoints' # use the df to find the venn numbers sz_excl = df_ground_truths.loc['Seizure-Free', (method, 'Exclusive')] conc_excl = df_ground_truths.loc['Concordant', (method, 'Exclusive')] sz_conc = df_ground_truths.loc['Seizure-Free & Concordant', (method, 'Exclusive')] sEEG_excl = df_ground_truths.loc['sEEG and|or ES', (method, 'Exclusive')] sEEG_ES_sz = df_ground_truths.loc['Seizure-Free & sEEG/ES', (method, 'Exclusive')] sEEG_ES_conc = df_ground_truths.loc['Concordant & sEEG/ES', (method, 'Exclusive')] all_three = df_ground_truths.loc['All three', (method, 'Exclusive')] # set a tuple numbers = (sz_excl, conc_excl, sz_conc, sEEG_excl, sEEG_ES_sz, sEEG_ES_conc, all_three) a = [int(n) for n in numbers] numbers = tuple(a) # plot if plot: venn3(subsets=(numbers), set_labels=('Seizure-Free', 'Concordant', 'sEEG/ES')) titre = method + ' by Ground Truth' plt.title(titre) plt.show()
def progress_venn_2(df_study_type, method=None): """ Use the progress df to plot a venn diagram of the datapoints by study types (by ground truth is the original). arguments: --- method: "Lateralising" or "Localising" --- Ali Alim-Marvasti Aug 2019 """ if method == None: print('progress_venn4 needs a method specified') return elif method == 'Lateralising': method = 'Lateralising Datapoints' elif method == 'Localising': method = 'Localising Datapoints' # use the df to find the venn numbers ces_excl = df_study_type.loc['Cortical Stimulation', (method, 'Exclusive')] ss_excl = df_study_type.loc['Semiological', (method, 'Exclusive')] et_excl = df_study_type.loc['Topological', (method, 'Exclusive')] ces_ss = df_study_type.loc['Cortical Stimulation & Semiological', (method, 'Exclusive')] ces_et = df_study_type.loc['Cortical Stimulation & Topological', (method, 'Exclusive')] ss_et = df_study_type.loc['Semiological & Topological', (method, 'Exclusive')] ces_ss_et = df_study_type.loc['CES, SS, ET', (method, 'Exclusive')] # set a tuple numbers = (ces_excl, ss_excl, ces_ss, et_excl, ces_et, ss_et, ces_ss_et) a = [int(n) for n in numbers] numbers = tuple(a) # plot venn3(subsets=(numbers), set_labels=('Stimulation', 'Semiological', 'Topological')) titre = method + ' by Patient Selection Priors (Study Type)' plt.title(titre) plt.show()
def plot_venn_diagram_of_filtered_data(dataset, filter_dict, name, plots_dir): # Iterate through each of the different objects obtaining the shape required for the venn diagram time_subset = dataset.query(' & '.join([ filter_dict['Time'][0], filter_dict['Events Ratio'][1], filter_dict['Max Read Length'][1] ])).shape[0] events_subset = dataset.query(' & '.join([ filter_dict['Time'][1], filter_dict['Events Ratio'][0], filter_dict['Max Read Length'][1] ])).shape[0] time_and_events_subset = dataset.query(' & '.join([ filter_dict['Time'][1], filter_dict['Events Ratio'][1], filter_dict['Max Read Length'][0] ])).shape[0] length_subset = dataset.query(' & '.join([ filter_dict['Time'][1], filter_dict['Events Ratio'][1], filter_dict['Max Read Length'][0] ])).shape[0] time_and_length_subset = dataset.query(' & '.join([ filter_dict['Time'][0], filter_dict['Events Ratio'][1], filter_dict['Max Read Length'][0] ])).shape[0] events_and_length_subset = dataset.query(' & '.join([ filter_dict['Time'][1], filter_dict['Events Ratio'][0], filter_dict['Max Read Length'][0] ])).shape[0] all_subset = dataset.query(' & '.join([ filter_dict['Time'][0], filter_dict['Events Ratio'][0], filter_dict['Max Read Length'][0] ])).shape[0] fig, ax = plt.subplots() venn3(subsets=(time_subset, events_subset, time_and_events_subset, length_subset, time_and_length_subset, events_and_length_subset, all_subset), set_labels=['Time', "Events Ratio", "Max Read Length"], ax=ax) # Set titles ax.set_title("Reads excluded by condition") # Ensure labels are not missed fig.tight_layout() # Save and close figure savefig(os.path.join(plots_dir, "%s.venn_diagram.png" % name))
def plot_venn(ns): n_diff, n01, n12, n02, n012 = ns fig, ax = plt.subplots() venn3( subsets=( n_diff + n12 - n012, n_diff + n02 - n012, n01 - n012, n_diff + n01 - n012, n02 - n012, n12 - n012, n012, ), set_labels=(r"$s_1$", r"$s_2$", r"$s_3$"), ax=ax, ) return fig, ax
def build_venn(df, sample_base, variant_type): """ Create overlapping sets from cosmic or gene variants to build Venn Diagrams Arguments: df - a cosmic or gene dataframe storing specific variants across passages sample_base - the patient id the PDX models were derived from variant_type - which variant class to subset ('gene' or 'cosmic') Output: Matplotlib axes to build a venn diagram """ if variant_type == 'gene': subset_variant = 'Gene.refGene' elif variant_type == 'cosmic': subset_variant = 'cosmic70' # Build Venn Diagram for cosmic variants f0 = '{}-F0'.format(sample_base) f5 = '{}-F5'.format(sample_base) prim = '{}-primary'.format(sample_base) # Get sets of variants matching specific passages set_f0 = set(df.query('sample_name == @f0')[subset_variant]) set_f5 = set(df.query('sample_name == @f5')[subset_variant]) set_prim = set(df.query('sample_name == @prim')[subset_variant]) # Build venn diagram if len(set_prim) == 0: v = venn2(subsets=(set_f0, set_f5), set_labels=(f0, f5)) v.get_patch_by_id('11').set_color('#fdff5b') v.get_patch_by_id('10').set_color('#b8ff87') v.get_patch_by_id('01').set_color('#82fffc') c = venn2_circles(subsets=(set_f0, set_f5), linestyle='dashed') else: v = venn3(subsets=(set_f0, set_f5, set_prim), set_labels=(f0, f5, prim)) v.get_patch_by_id('110').set_color('#fdff5b') v.get_patch_by_id('100').set_color('#b8ff87') v.get_patch_by_id('010').set_color('#82fffc') v.get_patch_by_id('001').set_color('#ff82cf') v.get_patch_by_id('101').set_color('#ffb05b') v.get_patch_by_id('011').set_color('#992dff') v.get_patch_by_id('111').set_color('#6872ff') c = venn3_circles(subsets=(set_f0, set_f5, set_prim), linestyle='dashed') # Obtain axes and show plot plt.title('{} {}'.format(sample_base, variant_type)) fig = plt.gcf() venn_fig = os.path.join('figures', 'venns', 'venn_{}_{}.pdf'.format(sample_base, variant_type)) plt.tight_layout() plt.savefig(venn_fig) plt.show() return fig
def visualize(label_list, title=None, block=False): Abc, aBc, ABc, abC, AbC, aBC, ABC = 0, 0, 0, 0, 0, 0, 0 for label in label_list: if label == (): Abc += 1 elif label == ('hl', ): aBc += 1 ABc += 1 elif label == ('tin', ): abC += 1 elif label == ('tin', 'hl') or label == ('hl', 'tin'): aBC += 1 fig = mpl.figure() venn3([Abc, aBc, ABc, abC, AbC, aBC, ABC], set_labels=('Controls', 'Hearing Loss', 'Tinnitus')) if title: mpl.title(title) mpl.show(block=block)
def venn_digram(self, names, figname): import matplotlib_venn from matplotlib_venn import venn3, venn3_circles, venn2, venn2_circles plt.figure(figsize=(4, 4)) if len(names) == 2: set1 = set(self['venn'][names[0]]) set2 = set(self['venn'][names[1]]) venn2([set1, set2], (names[0], names[1])) venn2_circles([set1, set2]) if len(names) == 3: set1 = set(self['venn'][names[0]]) set2 = set(self['venn'][names[1]]) set3 = set(self['venn'][names[2]]) venn3([set1, set2, set3], (names[0], names[1], names[2])) venn3_circles([set1, set2, set3]) plt.savefig('f.png.' + figname + '.png') plt.savefig('f.eps.' + figname + '.eps') plt.clf()
def create_venn(self, disease1, disease2, disease3): """Creates the venn diagram for the chosen diseases""" d_list1 = set(self[disease1]['subject_id'].tolist()) d_list2 = set(self[disease2]['subject_id'].tolist()) d_list3 = set(self[disease3]['subject_id'].tolist()) return venn3(subsets = (d_list1, d_list2,d_list3), \ set_labels = (disease1, disease2, disease3))
def three_venn (self, collections): self.V3_ABC = set(collections[0]) & set(collections[1]) & set(collections[2]) self.V3_AB = set(collections[0]) & set(collections[1]) - self.V3_ABC self.V3_BC = set(collections[1]) & set(collections[2]) - self.V3_ABC self.V3_AC = set(collections[0]) & set(collections[2]) - self.V3_ABC self.V3_A = set(collections[0]) - (self.V3_ABC | self.V3_AB | self.V3_AC ) self.V3_B = set(collections[1]) - (self.V3_ABC | self.V3_AB | self.V3_BC ) self.V3_C = set(collections[2]) - (self.V3_ABC | self.V3_BC | self.V3_AC ) return (venn3([set(x) for x in collections], set_labels=self.collection_names))
def venn_diagram3(data, semantic_terms, click_lower_bound=5): """ Plots a venn diagram for shared semantic terms. :param data: The dataframe of download.csv :param semantic_terms: A list of 2 semantic terms to match :param click_lower_bound: Records with clicks fewer than this number are excluded from the plot (for performance). """ data = data[data['Clicks'] > click_lower_bound] term_sets = [ set(data[data['Semantic Classification'].str.contains(term)] ['Semantic Classification'].values) for term in semantic_terms ] venn3([term_sets[0], term_sets[1], term_sets[2]], ('Contains {}'.format(semantic_terms[0]), 'Contains {}'.format( semantic_terms[1]), 'Contains {}'.format(semantic_terms[2]))) plt.title('Semantic Classification Overlap') plt.show()
def plot_venn(): files = [ '/Users/kingxu/result/IJCAI19result/nmt_bleu.txt', '/Users/kingxu/result/IJCAI19result/nngen_bleu.txt', '/Users/kingxu/result/IJCAI19result/codisum_bleu.txt', '/Users/kingxu/result/IJCAI19result/nmt_meteor.txt', '/Users/kingxu/result/IJCAI19result/nngen_meteor.txt', '/Users/kingxu/result/IJCAI19result/codisum_meteor.txt' ] dicts = [txt2dict(f) for f in files] sets1 = [dict2set(d, "[0.0, 0.1)") for d in dicts] sets2 = [dict2set(d, "[0.9, 1.0]") for d in dicts] plt.figure() subsets = sets2[3:] venn3(subsets=subsets, set_labels=('NMT', 'NNGen', 'CoDiSum')) venn3_circles(subsets=subsets, linestyle='dotted', linewidth=1.0) plt.savefig("METEOR1venn2.eps", format="eps") plt.show()
def Venn_Inter(dist_dic, dunn_dic, dca_pares, mystic_pares, graficos_ruta, caso_venn, ALL_VENN, caso_graf): # Armo conjuntos entre metodos # siguiente orden: Abc, aBc, ABc, abC, AbC, aBC, ABC. # DCA , MY , DUNN conj_dca = 0 # Abc conj_my = 0 # aBc conj_du = 0 # abC conj_dca_my = 0 #ABc conj_dca_du = 0 # AbC conj_my_du = 0 # aBC conj_dca_my_du = 0 #ABC for llaves in ALL_VENN: if (llaves in dunn_dic): if (llaves in dca_pares) and (llaves in mystic_pares): conj_dca_my_du += 1 #ABC elif (llaves in dca_pares): conj_dca_du += 1 # AbC elif (llaves in mystic_pares): conj_my_du += 1 # aBC else: conj_du += 1 # abC else: if (llaves in dca_pares) and (llaves in mystic_pares): conj_dca_my += 1 #ABc elif (llaves in dca_pares): conj_dca += 1 # Abc elif (llaves in mystic_pares): conj_my += 1 # aBc subsets = (conj_dca, conj_my, conj_dca_my, conj_du, conj_dca_du, conj_my_du, conj_dca_my_du) venn3(subsets, set_labels=('DCA', 'Mystic', caso_graf)) venn3_circles(subsets, color="#008000", alpha=1, linestyle="-.", linewidth=3) plt.savefig(graficos_ruta + "venn_metodos_" + caso_venn + ".png", dpi=720) # ver bien dnd salvar el grafico plt.clf()
def main(): args = get_args() # read in each of the sj dfs pb_df = read_sj_file(args.pb_sj_file, 'PacBio') print(len(pb_df.index)) ont_df = read_sj_file(args.ont_sj_file, 'ONT') print(len(ont_df.index)) ill_df = read_sj_file(args.ill_sj_file, 'Illumina') print(len(ill_df.index)) # get each of the intersection counts that we care about counts, labels = find_intersect_counts(pb_df, ont_df, ill_df) print(counts) print(labels) # change circle sizes if args.log_sizes: intersection_labels = tuple([str(i) for i in counts]) counts = tuple([math.log2(i) if i != 0 else 0 for i in counts]) # plot the venn diagram plt.figure(figsize=(8.5, 8.5)) v = venn3(subsets=counts, set_labels=('A', 'B', 'C')) # messing with label text v.get_label_by_id('A').set_text('PacBio') v.get_label_by_id('B').set_text('ONT') v.get_label_by_id('C').set_text('Illumina') v.get_label_by_id('A').set_fontsize('x-large') v.get_label_by_id('B').set_fontsize('x-large') v.get_label_by_id('C').set_fontsize('x-large') plt.title('{} Splice Junction Support'.format(args.sample_name), fontsize='xx-large') # messing with numerical text for ID in ('100', '010', '001', '110', '101', '011', '111'): try: v.get_label_by_id(ID).set_fontsize('x-large') except: pass if args.log_sizes: i = 0 for ID in ('100', '010', '001', '110', '101', '011', '111'): try: v.get_label_by_id(ID).set_text(intersection_labels[i]) except: pass i += 1 plt.savefig('figures/' + args.sample_name.replace(' ', '_') + '_venn.pdf') plt.savefig('figures/' + args.sample_name.replace(' ', '_') + '_venn.png', dpi=600)
def main(): usage = 'usage: %prog [options] <peaks1_bed> <peaks2_bed> <peaks3_bed> <out_pdf>' parser = OptionParser(usage) parser.add_option('--l1', dest='label1', default='peaks1', help='Label for peak set 1') parser.add_option('--l2', dest='label2', default='peaks2', help='Label for peak set 2') parser.add_option('--l3', dest='label3', default='peaks3', help='Label for peak set 3') (options, args) = parser.parse_args() if len(args) != 4: parser.error('Must provide three peaks BED files and output PDF') else: peak_beds = args[:3] out_pdf = args[3] merge_fd, merge_bed = tempfile.mkstemp() # merge peaks cmd = 'cat %s %s %s | awk \'{OFS="\t"} {print $1, $2, $3}\' | bedtools sort -i stdin | bedtools merge -i stdin > %s' % ( peak_beds[0], peak_beds[1], peak_beds[2], merge_bed) subprocess.call(cmd, shell=True) # annotate merged peaks with each individual set num_peaks = count_peaks(merge_bed) peak_overlaps = [set(), set(), set()] for bi in range(3): cmd = 'bedtools intersect -c -a %s -b %s' % (merge_bed, peak_beds[bi]) p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) pi = 0 for line in p.stdout: a = line.split() if int(a[-1]) > 0: peak_overlaps[bi].add(pi) pi += 1 # plot plt.figure() venn_diag = venn3(peak_overlaps, set_labels=[ options.label1, options.label2, options.label3 ]) # , set_colors=['#e41a1c', '#A1A838', '']) plt.savefig(out_pdf) plt.close() # clean up os.close(merge_fd) os.remove(merge_bed)
def venn(data, names=None, fill="number", show_names=True, show_plot=True, outputDir=False, **kwds): """ data: a list names: names of groups in data fill = ["number"|"logic"|"both"], fill with number, logic label, or both show_names = [True|False] show_plot = [True|False] """ if data is None: raise Exception("No data!") if len(data) == 2: venn2(data, names, fill, show_names, show_plot, outputDir, **kwds) elif len(data) == 3: venn3(data, names, fill, show_names, show_plot, outputDir, **kwds) elif len(data) == 4: venn4(data, names, fill, show_names, show_plot, outputDir, **kwds) else: print len(data), 'files submitted, must be less than 4 and greater than 1...'
def venn(vennset=(1, 1, 1, 1, 1, 1, 1), venncolor=('#00909e', '#f67280', '#ff971d'), vennalpha=0.5, vennlabel=('A', 'B', 'C')): fig = plt.figure() if len(vennset) == 7: venn3(subsets=vennset, set_labels=vennlabel, set_colors=venncolor, alpha=vennalpha) plt.savefig('venn3.png', format='png', bbox_inches='tight', dpi=300) elif len(vennset) == 3: venn2(subsets=vennset, set_labels=vennlabel, set_colors=venncolor, alpha=vennalpha) plt.savefig('venn2.png', format='png', bbox_inches='tight', dpi=300) else: print("Error: check the set dataset")
def show_venn_diagram(intersections, set_labels=('KEGG', 'Reactome', 'WikiPathways')): """Show venn diagram.""" intersections_len = [ len(intersection) for name, intersection in intersections.items() ] plt.figure(figsize=(17, 8)) _ = venn3(subsets=intersections_len, set_labels=set_labels) plt.show()
def venn_seed_pairs(dataset: str, parent_dir: Path): path_to_save = parent_dir / "venn"/ f"{dataset}_venn.pdf" Path(path_to_save.parent).mkdir(exist_ok=True) # f = plt.figure(figsize=(6, 6)) fig, (ax1, ax2) = plt.subplots(1, 2) for ax, f, t in [(ax1, parent_dir/f"{dataset}_duplex_positive.csv", "Positive"), (ax2, parent_dir/f"{dataset}_duplex_negative.csv", "Negative")]: d = pd.read_csv(f) p_canonic = d["canonic_seed"]==1 p_non_canonic = d["non_canonic_seed"]==1 d = d[p_canonic | p_non_canonic] p_11 = d[d["num_of_pairs"] >= 11] p_canonic = d[d["canonic_seed"] == 1] p_non_canonic = d[d["non_canonic_seed"] == 1] venn3([set(p_11.index), set(p_canonic.index), set(p_non_canonic.index)], ("", "Canonic", "Non Canonic"), ax=ax) ax.set_title(t) # plt.title(dataset) plt.savefig(path_to_save, format="pdf", bbox_inches='tight')
def plot_ven(list_of_sets: list, list_of_names: list): """ Plots venn diagram for 2/3 sets. list_of_sets: list of lists or of sets """ assert len(list_of_names) in [2, 3], "Venn diagram only works for 2/3 sets" assert len(list_of_names) == len( list_of_sets), "Num of names does not match num of groups" if not all(isinstance(elem, set) for elem in list_of_sets): # if some are not sets list_of_sets = [set(group) for group in list_of_sets] plt.figure() plt.title("Gene sets", fontsize=16) if len(list_of_names) == 2: venn2(subsets=(list_of_sets), set_labels=(list_of_names)) if len(list_of_names) == 3: venn3(subsets=(list_of_sets), set_labels=(list_of_names))
def plot_venn3_counts(element_list, set_labels, overlap_name, folder): ''' Plot three way venn based on counts of specific overlaping numbers. Saves to file. Inputs ------ element_list: tuple with counts of the the overlaps from (Abc,aBc,ABc,abC,AbC,ABC) set_labels: list or tuple with names of the overlaps ('A','B','C') overlap_name: string with name of overlap folder: output folder Returns ------- None ''' folder = make_folder(f"{val_folder(folder)}venn_plot") plt.clf() plt.figure(figsize=(7, 7)) font = { 'family': 'sans-serif', 'weight': 'normal', 'size': 16, } plt.rc('font', **font) # make venn venn_plot = venn3( subsets=element_list, set_labels=[name.replace('_', ' ') for name in set_labels]) patch = ['100', '110', '101', '010', '011', '001', '111'] for p in patch: if venn_plot.get_patch_by_id(p): venn_plot.get_patch_by_id(p).set_color('none') venn_plot.get_patch_by_id(p).set_alpha(.4) venn_plot.get_patch_by_id(p).set_edgecolor('none') # make c = venn3_circles(subsets=element_list) colors_list = ['green', 'blue', 'grey'] for circle, color in zip(c, colors_list): circle.set_edgecolor(color) circle.set_alpha(0.8) circle.set_linewidth(4) plt.title(f"{overlap_name.replace('_', ' ')} Overlaps") plt.tight_layout() plt.savefig(f"{folder}{overlap_name.replace(' ', '_')}-overlap.svg") plt.savefig(f"{folder}{overlap_name.replace(' ', '_')}-overlap.png", dpi=300)
def getVenn3plot(sets, labels, title, path_to_img): v = venn3(subsets=sets, set_labels=labels) print(_venn3.compute_venn3_subsets(sets[0], sets[1], sets[2])) # v.get_patch_by_id('11').set_color('purple') # v.get_patch_by_id('01').set_color('blue') plt.title(title) plt.savefig(path_to_img, bbox_inches='tight') plt.show()
def plot_three_set_venn(set1, set2, set3, adj_params_dic, mycolors=('r', 'g', 'b'), mylabels=None, title='Plot title'): ''' Plot three circle venn diagram. adj_params_dic is of form: {labelid:(x_adj, yadj), ...} If adj_params_dic is None, then no adjustmenst would be made ''' # Set matplotlib font size globally font = { 'family': 'sans', 'sans-serif': 'Arial', 'weight': 'bold', 'size': 25 } matplotlib.rc('font', **font) Abc = len(set1 - set2 - set3) aBc = len(set2 - set1 - set3) ABc = len(set1 & set2 - set3) abC = len(set3 - set1 - set2) AbC = len(set1 & set3 - set2) aBC = len(set2 & set3 - set1) ABC = len(set1 & set2 & set3) fig = plt.figure() # Fill whitespace in the margins by adjusting subplot fig.subplots_adjust(bottom=0.10) fig.subplots_adjust(left=0.12) fig.subplots_adjust(right=0.90) fig.subplots_adjust(top=0.90) ax = fig.add_subplot(111) p = venn3(subsets=(Abc, aBc, ABc, abC, AbC, aBC, ABC), set_colors=mycolors, set_labels=mylabels) # Adjust textbased on adj_params_dic if adj_params_dic is not None: for labelid, adj_params in adj_params_dic.iteritems(): label = p.get_label_by_id(labelid) label.set_x(label.get_position()[0] + adj_params[0]) label.set_y(label.get_position()[1] + adj_params[1]) plt.title(title) plt.show()
def venn(A, B, U, nombre1='A', nombre2='B', fs=12): """ Grafica y devuelve un diagrama de Venn con el conjunto universal U y los conjuntos A y B Parámetros: A-tipo Conjunto: el primer conjunto a graficar B-tipo Conjunto: el segundo conjunto a graficar U-tipo Conjunto: conjunto universal nombre1-tipo String: nombre del primer conjunto nombre2-tipo String: nombre del segundo conjunto fs-tipo Int: tamaño de la fuente """ if (A == B): # Chequeo si los dos conjuntos son iguales nombre2 = '' fig = plt.figure(figsize=(10, 10), linewidth=10, edgecolor="black", facecolor="white") # Creo la figura ax = fig.add_subplot(111) plt.text(1, 1, 'U', ha='right', va='top', transform=ax.transAxes, fontsize=fs) # Agrego el label 'U' v = venn3([A, B, U - A - B], (nombre1, nombre2, '')) # Creo el diagrama de Venn if ((A - B) != set()): # Agrego los elementos de la región A - B v.get_label_by_id('100').set_text(A - B) venn_line(v, '100') # Agrego un contorno if (interseccion(A, B) != set()): # Agrego los elementos de la región A&B v.get_label_by_id('110').set_text(A & B) venn_line(v, '110') # Agrego un contorno if ((B - A) != set()): # Agrego los elementos de la región B - A v.get_label_by_id('010').set_text(B - A) venn_line(v, '010') # Agrego un contorno if ((U - A - B) != set()): # Agrego los elementos de la región U - A - B v.get_label_by_id('001').set_text(U - A - B) v.get_patch_by_id('001').set_color( 'white') # Hago transparente el fondo del universo set_fontsize(v, fs) return v
def SimpleMatplotVenn(names,data,outputDir=False,display=True): """ Uses http://pypi.python.org/pypi/matplotlib-venn (code combined into one module) to export simple or complex, overlapp weighted venn diagrams as an alternative to the default methods in this module """ import numpy as np pylab.figure(figsize=(11,7),facecolor='w') vd = get_labels(data, fill="number") set_labels=[] for i in names: set_labels.append(string.replace(i,'.txt','')) if len(set_labels)==2: from matplotlib_venn import venn2, venn2_circles set_colors = ('r', 'g') subsets = (vd['10'], vd['01'], vd['11']) v = venn2(subsets=subsets, set_labels = set_labels, set_colors=set_colors) c = venn2_circles(subsets=subsets, alpha=0.5, linewidth=1.5, linestyle='dashed') if len(set_labels)==3: from matplotlib_venn import venn3, venn3_circles set_colors = ('r', 'g', 'b') subsets = (vd['100'], vd['010'], vd['110'], vd['001'], vd['101'], vd['011'], vd['111']) v = venn3(subsets=subsets, set_labels = set_labels,set_colors=set_colors) c = venn3_circles(subsets=subsets, alpha=0.5, linewidth=1.5, linestyle='dashed') pylab.title("Overlap Weighted Venn Diagram",fontsize=24) try: if outputDir!=False: filename = outputDir+'/%s.pdf' % venn_export_weighted pylab.savefig(filename) filename = outputDir+'/%s.png' % venn_export_weighted pylab.savefig(filename, dpi=100) #,dpi=200 except Exception: print 'Image file not saved...' if display: pylab.show() try: import gc fig.clf() pylab.close() gc.collect() except Exception: pass
def main(): args=processArgs() plt.figure(figsize=(4,4)) v = venn3(subsets=(1, 1, 1, 1, 1, 1, 1), set_labels = ('GenomicHit', 'NoGenomicHit', 'JillPipeline')) v.get_patch_by_id('100').set_alpha(1.0) v.get_patch_by_id('100').set_color('white') v.get_label_by_id('100').set_text('Unknown') v.get_label_by_id('A').set_text('Set "A"') #c = venn3_circles(subsets=(1, 1, 1, 1, 1, 1, 1), linestyle='dashed') #c[0].set_lw(1.0) #c[0].set_ls('dotted') plt.title("Sample Venn diagram") plt.annotate('Unknown set', xy=v.get_label_by_id('100').get_position() - np.array([0, 0.05]), xytext=(-70,-70), ha='center', textcoords='offset points', bbox=dict(boxstyle='round,pad=0.5', fc='gray', alpha=0.1), arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0.5',color='gray'))
def commons2plot(samples, csizes, output, title, dpi, format, verbose): """ #https://github.com/konstantint/matplotlib-venn """ #get sizes plt.figure(figsize=(6,6)) v = venn3(subsets=csizes, set_labels=samples) c = venn3_circles(subsets=csizes, linewidth=0.1) # ,linestyle='dashed' plt.title(title) #show Venn if not outfile provided if output.name=="<stdout>": plt.show() else: fpath = "%s.%s" % (output.name, format) plt.savefig(fpath, dpi=dpi, facecolor='w', edgecolor='w', orientation='landscape', format=format, transparent=False)
def venn_diagram(self, directory, title, labels): if len(self.sets) == 2: from matplotlib_venn import venn2 f = plt.figure(figsize=(10, 10)) inter = len(self.gene_sets[0].intersection(self.gene_sets[1])) fig_venn = venn2(subsets=(len(self.gene_sets[0]) - inter, inter, len(self.gene_sets[1]) - inter), set_labels=(self.sets[0].partition("/")[2].partition(".")[0], self.sets[1].partition("/")[2].partition(".")[0])) plt.title("Sample Venn diagram") return f elif len(self.sets) == 3: from matplotlib_venn import venn3 def write_genes(filename, geneset): with open(filename, "w") as g: for gene in geneset: print(gene, file=g) f = plt.figure(figsize=(10, 10)) s100 = self.gene_sets[0] - self.gene_sets[1] - self.gene_sets[2] write_genes(filename=os.path.join(directory, title, "list_" + labels[0] + ".txt"), geneset=s100) s010 = self.gene_sets[1] - self.gene_sets[0] - self.gene_sets[2] write_genes(filename=os.path.join(directory, title, "list_" + labels[1] + ".txt"), geneset=s010) s001 = self.gene_sets[2] - self.gene_sets[0] - self.gene_sets[1] write_genes(filename=os.path.join(directory, title, "list_" + labels[2] + ".txt"), geneset=s001) s111 = self.gene_sets[0].intersection(self.gene_sets[1].intersection(self.gene_sets[2])) write_genes(filename=os.path.join(directory, title, "list_" + labels[0] + "_" + labels[1] + "_" + labels[2] + ".txt"), geneset=s111) s110 = self.gene_sets[0].intersection(self.gene_sets[1]) - self.gene_sets[2] write_genes(filename=os.path.join(directory, title, "list_" + labels[0] + "_" + labels[1] + ".txt"), geneset=s110) s011 = self.gene_sets[1].intersection(self.gene_sets[2]) - self.gene_sets[0] write_genes(filename=os.path.join(directory, title, "list_" + labels[1] + "_" + labels[2] + ".txt"), geneset=s011) s101 = self.gene_sets[0].intersection(self.gene_sets[2]) - self.gene_sets[1] write_genes(filename=os.path.join(directory, title, "list_" + labels[0] + "_" + labels[2] + ".txt"), geneset=s101) fig_venn = venn3(subsets=(len(s100), len(s010), len(s110), len(s001), len(s101), len(s011), len(s111)), set_labels=labels) return f
def plot_three_set_venn(set1, set2, set3, adj_params_dic, mycolors=('r', 'g', 'b'), mylabels=None, title='Plot title'): ''' Plot three circle venn diagram. adj_params_dic is of form: {labelid:(x_adj, yadj), ...} If adj_params_dic is None, then no adjustmenst would be made ''' # Set matplotlib font size globally font = {'family': 'sans', 'sans-serif': 'Arial', 'weight': 'bold', 'size': 25} matplotlib.rc('font', **font) Abc = len(set1 - set2 - set3) aBc = len(set2 - set1 - set3) ABc = len(set1 & set2 - set3) abC = len(set3 - set1 - set2) AbC = len(set1 & set3 - set2) aBC = len(set2 & set3 - set1) ABC = len(set1 & set2 & set3) fig = plt.figure() # Fill whitespace in the margins by adjusting subplot fig.subplots_adjust(bottom=0.10) fig.subplots_adjust(left=0.12) fig.subplots_adjust(right=0.90) fig.subplots_adjust(top=0.90) ax = fig.add_subplot(111) p = venn3(subsets=(Abc, aBc, ABc, abC, AbC, aBC, ABC), set_colors=mycolors, set_labels=mylabels) # Adjust textbased on adj_params_dic if adj_params_dic is not None: for labelid, adj_params in adj_params_dic.iteritems(): label = p.get_label_by_id(labelid) label.set_x(label.get_position()[0] + adj_params[0]) label.set_y(label.get_position()[1] + adj_params[1]) plt.title(title) plt.show()
def venndiagram(names,labels,ax=None): from matplotlib_venn import venn2,venn3 import pylab as plt f=None if ax==None: f=plt.figure(figsize=(4,4)) ax=f.add_subplot(111) if len(names)==2: n1,n2=names v = venn2([set(n1), set(n2)], set_labels=labels) elif len(names)==3: n1,n2,n3=names v = venn3([set(n1), set(n2), set(n3)], set_labels=labels) ax.axis('off') #f.patch.set_visible(False) ax.set_axis_off() return f
def make_venn(self): """ Create a venn diagram from a dictionary of replica sets """ subsets = self._subsets() # Remap rows with binary codes def binkey(row): b = [0, 0, 0] keys = self.keys() for r in row: b[keys.index(r)] = 1 return ''.join(str(x) for x in b) subsets_remapped = dict((binkey(k), v) for (k, v) in subsets.items()) return (venn3(subsets=subsets_remapped, set_labels = self.keys()), subsets)
def main(): usage = 'usage: %prog [options] <peaks1_bed> <peaks2_bed> <peaks3_bed> <out_pdf>' parser = OptionParser(usage) parser.add_option('--l1', dest='label1', default='peaks1', help='Label for peak set 1') parser.add_option('--l2', dest='label2', default='peaks2', help='Label for peak set 2') parser.add_option('--l3', dest='label3', default='peaks3', help='Label for peak set 3') (options,args) = parser.parse_args() if len(args) != 4: parser.error('Must provide three peaks BED files and output PDF') else: peak_beds = args[:3] out_pdf = args[3] merge_fd, merge_bed = tempfile.mkstemp() # merge peaks cmd = 'cat %s %s %s | awk \'{OFS="\t"} {print $1, $2, $3}\' | bedtools sort -i stdin | bedtools merge -i stdin > %s' % (peak_beds[0], peak_beds[1], peak_beds[2], merge_bed) subprocess.call(cmd, shell=True) # annotate merged peaks with each individual set num_peaks = count_peaks(merge_bed) peak_overlaps = [set(), set(), set()] for bi in range(3): cmd = 'bedtools intersect -c -a %s -b %s' % (merge_bed, peak_beds[bi]) p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) pi = 0 for line in p.stdout: a = line.split() if int(a[-1]) > 0: peak_overlaps[bi].add(pi) pi += 1 # plot plt.figure() venn_diag = venn3(peak_overlaps, set_labels=[options.label1, options.label2, options.label3]) # , set_colors=['#e41a1c', '#A1A838', '']) plt.savefig(out_pdf) plt.close() # clean up os.close(merge_fd) os.remove(merge_bed)
def main(fn): plt.figure(figsize=(8.5,11)) n101, n001, n011, n010, n100, n110, n111 = parse_vcf_compare(fn) n001s, n101s, n011s, n111s = parse_vcf_compare_percentage(fn) l001, l101, l011, l111 = ['%d(%s)'%(a,b) for a,b in zip((n001, n101, n011, n111), (n001s, n101s, n011s, n111s))] mysets = (n100,n010,n110,n001,n101,n011,n111) mylables = ('FreeBayes', 'GATiK (UG)', 'Plantinum v0.7') v = venn3(subsets = mysets, set_labels = mylables) #venn3_circles(subsets=mysets, linestyle='dashed') # solid plt.title("Venn diagram") #v.get_patch_by_id('100').set_color('white') #v.get_patch_by_id('010').set_color('white') #v.get_patch_by_id('110').set_color('white') v.get_label_by_id('001').set_text(l001) v.get_label_by_id('011').set_text(l011) v.get_label_by_id('101').set_text(l101) v.get_label_by_id('111').set_text(l111) plt.show()
def venn_diagram(sets, names): """ Plot a Venndiagram Parameters: ----------------------------- sets: list of sets, elements that should be compared between samples names: tuple of str for the sets, elements that should be compared between samples """ if len(sets) == 2: from matplotlib_venn import venn2, venn2_circles f = venn2(sets, names) f = venn2_circles(sets) elif len(sets) == 3: from matplotlib_venn import venn3, venn3_circles f = venn3(sets, names) f = venn3_circles(sets) return (f)
def comb_venn(self, directory): if len(self.references) == 2: print(2) elif len(self.references) == 3: for ind_ty, ty in enumerate(self.groupedreference.keys()): for q in self.query: plt.figure(figsize=(6, 4)) plt.title("Venn Diagram: " + q.name) freq = [] for r in self.groupedreference[ty]: freq.append(self.frequency[ty][q.name][r.name]) # print([r.name for r in self.groupedreference[ty]]) self.venn = venn3(subsets=[freq[i] for i in [0, 1, 3, 2, 4, 5, 6]], set_labels=[n.name for n in self.references]) plt.annotate(str(len(q) - sum(freq)), xy=(0.1, 0.1), xytext=(-120, -120), ha='left', textcoords='offset points', bbox=dict(boxstyle='round,pad=0.5', fc='gray', alpha=0.1)) plt.savefig(os.path.join(directory, "venn_" + ty + "_" + q.name + ".png")) plt.savefig(os.path.join(directory, "venn_" + ty + "_" + q.name + ".pdf"), format='pdf') else: print("*** For plotting Venn diagram, the number of references must be 2 or 3.")