def main(): args = parse_args() first_model_path = args.first_model_path second_model_path = args.second_model_path third_model_path = args.third_model_path out_path = args.out_path fname = args.fname sname = args.sname tname = args.tname # if not os.path.isdir(out_path): # os.mkdir(out_path) fm_ids = read_ids(first_model_path) sm_ids = read_ids(second_model_path) tm_ids = read_ids(third_model_path) plt.figure(figsize=(6, 4), dpi=150) venn3_unweighted([fm_ids, sm_ids, tm_ids], (fname, sname, tname)) plt.savefig(out_path, dpi=150) only_fm_ids = fm_ids - sm_ids - tm_ids only_sm_ids = sm_ids - fm_ids - tm_ids only_tm_ids = tm_ids - fm_ids - sm_ids common_ids = fm_ids & sm_ids & tm_ids
def draw_venn(labels, data, totals): figure, axes = plt.subplots(1, 3) figure.set_size_inches(12, 4) axes[0].set_title('D2Refine', y=1.08, fontsize=14, fontweight='bold') axes[1].set_title('OntoMaton', y=1.08, fontsize=14, fontweight='bold') axes[2].set_title('RightField', y=1.08, fontsize=14, fontweight='bold') #plt.suptitle('Function Distribution') def font(out): for text in out.set_labels: text.set_horizontalalignment("center") font( venn3_unweighted(subsets=data[0], alpha=alpha, set_labels=labels, ax=axes[0])) font( venn3_unweighted(subsets=data[1], alpha=alpha, set_labels=labels, ax=axes[1])) font( venn3_unweighted(subsets=data[2], alpha=alpha, set_labels=labels, ax=axes[2])) plt.savefig('figures/models_venn.png')
def plot_venn(self): self.pop_analysis_venn_diagram.canvas.figure.clf() #self.pop_analysis_venn_diagram.canvas = FigureCanvas(plt.figure(figsize=(7.5,7.5))) rows, cols = (self.total_org_number, self.total_org_number) arr = [[0 for i in range(cols)] for j in range(rows)] all_3 = 0 singles = [0 for i in range(cols)] counter = 0 self.names_venn = [] for keys in self.parser.popData: for items in self.parser.popData[keys]: if items[0] not in self.names_venn: self.names_venn.append(items[0]) if len(self.names_venn) >= 3: for keys in self.parser.popData: temp_names = [] for items in self.parser.popData[keys]: if items[0] not in temp_names: temp_names.append(items[0]) if len(temp_names) >= 2: for i in range(len(temp_names) - 1): j = i + 1 while j != len(temp_names): arr[self.names_venn.index( temp_names[i])][self.names_venn.index( temp_names[j])] += 1 arr[self.names_venn.index( temp_names[j])][self.names_venn.index( temp_names[i])] += 1 j += 1 else: if temp_names[ 0] == 'Corynebacterium tuberculostearicum SK141 contig00004': counter += 1 singles[self.names_venn.index(temp_names[0])] += 1 #all 3 orgs for keys in self.parser.popData: temp_names = [] for items in self.parser.popData[keys]: if items[0] not in temp_names: temp_names.append(items[0]) if all(x in temp_names for x in [ self.names_venn[0], self.names_venn[1], self.names_venn[2] ]): all_3 += 1 venn3_unweighted(subsets=(singles[0], singles[1], arr[0][1], singles[2], arr[0][2], arr[1][1], all_3), set_labels=('0', '1', '2')) self.pop_analysis_venn_diagram.canvas.draw() else: self.pop_analysis_venn_diagram.canvas.figure.clf() self.pop_analysis_venn_diagram.canvas.draw()
def sets(self): union = len( self._public_ids.union(self._personal_ids).union(self._masked_ids)) print('A ∪ B ∪ C %d' % union) print('A (population variants) %d' % len(self._public_ids)) print('B (personal variants) %d' % len(self._personal_ids)) print('C (masked variants) %d' % len(self._masked_ids)) print() value = len( self._public_ids.intersection(self._personal_ids).intersection( self._masked_ids)) print('A ∩ B ∩ C %d (%.2f%%)' % (value, value / union * 100)) value = len( self._public_ids.intersection(self._personal_ids).difference( self._masked_ids)) print('A ∩ B - C %d (%.2f%%)' % (value, value / union * 100)) value = len( self._personal_ids.intersection(self._masked_ids).difference( self._public_ids)) print('B ∩ C - A %d (%.2f%%)' % (value, value / union * 100)) value = len( self._public_ids.intersection(self._masked_ids).difference( self._personal_ids)) print('A ∩ C - B %d (%.2f%%)' % (value, value / union * 100)) value = len( self._public_ids.difference(self._masked_ids).difference( self._personal_ids)) print('A - C - B %d (%.2f%%)' % (value, value / union * 100)) value = len( self._personal_ids.difference(self._public_ids).difference( self._masked_ids)) print('B - A - C %d (%.2f%%)' % (value, value / union * 100)) value = len( self._masked_ids.difference(self._public_ids).difference( self._personal_ids)) print('C - A - B %d (%.2f%%)' % (value, value / union * 100)) value = len(self._public_ids.intersection(self._personal_ids)) print('A ∩ B %d (%.2f%%)' % (value, value / union * 100)) value = len(self._personal_ids.intersection(self._masked_ids)) print('B ∩ C %d (%.2f%%)' % (value, value / union * 100)) value = len(self._public_ids.intersection(self._masked_ids)) print('A ∩ C %d (%.2f%%)' % (value, value / union * 100)) plt.figure() venn.venn3_unweighted( [self._public_ids, self._personal_ids, self._masked_ids], set_labels=('A', 'B', 'C')) plt.savefig(os.path.join(self._out_dir, 'sets.png'))
def venn3(subsets, title, unit_title, filename, set_labels=None, normalize=1.0, annotation=None): plt.figure() v = venn.venn3_unweighted(subsets=subsets, set_labels=set_labels) c = venn.venn3_circles(subsets=(1,1,1,1,1,1,1), linestyle='solid', linewidth=1.5, normalize_to=normalize) for i in range(len(venn3_keys)): label_id = venn3_keys[i] text = v.get_label_by_id(label_id) text.set_position(text.get_position() + np.array([0, 0.02])) # TEMPORALLY COUPLED WITH CREATION OF DIAGRAM subtitle = unit_title if text.get_text() != '1': subtitle += 's' text.set_text(text.get_text() + '\n' + subtitle) text.set_size(text.get_size() - 2) if annotation is not None: for a in annotation: text = v.get_label_by_id(a) xy= text.get_position() - np.array([0, 0.085]) plt.annotate(annotation[a], xy=xy, xytext=xy, ha='center', textcoords='offset points', color='r', weight='bold') for label in v.subset_labels: label.set_fontname('sans-serif') if title is not None: plt.title(title) plt.savefig(filename) plt.close()
def venn3_plot(set1 = set(), set2 = set(), set3 = set(), lab_set1 = 'Set1', lab_set2 = 'Set2', lab_set3 = 'Set3', linewidth = 1, color_line = 'black', alpha_sets = 0.3, font_sets = False, # False o 'bold' size_vals_sets = 12, alpha_inter = 0.3, font_inter = False, # False o 'bold' size_vals_inter = 12, size_label = 12, font_label = False): # False o 'bold' v = venn3_unweighted(subsets = (set1, set2, set3), set_labels = (lab_set1, lab_set2, lab_set3)) c = venn3_circles(subsets = (1, 1, 1, 1, 1, 1, 1), linestyle='--', linewidth = linewidth, color = color_line) partes = ['100', '010', '110', '001', '101', '011', '111'] partes2 = ['100', '010', '110', '001', '101', '011'] venn_info = [[i, j] for i, j in zip(v.subset_labels, partes)] for i in venn_info: if i[0] != None: if i[1] in partes2: v.get_patch_by_id(i[1]).set_alpha(alpha_sets) # i[1] = el conjunto creado, 0 = alpha del conjunto v.get_label_by_id(i[1]).set_fontweight(font_sets) v.get_label_by_id(i[1]).set_fontsize(size_vals_sets) if i[1] == '111': # configurar la intersección independientemente '111' v.get_patch_by_id('111').set_alpha(alpha_inter) # i[1] = el conjunto creado, 0 = alpha del conjunto v.get_label_by_id('111').set_fontweight(font_inter) v.get_label_by_id('111').set_fontsize(size_vals_inter) for text in v.set_labels: text.set_fontsize(size_label) text.set_fontweight(font_label)
def plot_overlap(): fetal=find_fetal() adult=find_adult() nb=load_nonbrain_pred_genes() syndromic=find_sfari_syndromic_genes() db=find_db_genes() pred_val=list(set(fetal+adult)&set(nb)) f = plt.figure() v=venn3_unweighted([set(pred_val),set(syndromic), set(db)], set_labels=('Proteomics Validated \n EnSig', 'Syndromic Autism', 'Synapse Databases'), set_colors=('skyblue', 'coral', 'gray'),alpha=0.7) #venn3_circles([set(pred_val),set(syndromic), set(db)], linestyle='solid', linewidth=0.5, color='k'); for text in v.set_labels: #print (text) text.set_fontweight('bold') for text in v.set_labels: text.set_fontsize(30) for text in v.subset_labels: print (text) text.set_fontsize(30) target=v.subset_labels[2] target.set_fontweight('bold') target.set_fontsize(35) v.get_patch_by_id('110').set_color('red') plt.show() f.savefig("ensig_synautism_syndb.pdf", bbox_inches='tight') plt.close()
def plot_venn_diagrams(venn, doc): plt.figure() c = venn3_unweighted([venn["oneie"], venn["tear-tbd"], venn["tear-matres"]], tuple( ["OneIE Events", "TEAR-TBD Events", "TEAR-MATRES Events"]), alpha=0.5) plt.savefig(f"../analysis/figures/{doc}_venn") return
def main(): args = parse_args() first_model_path = args.first_model_path second_model_path = args.second_model_path third_model_path = args.third_model_path peaks_path = args.input_peaks tag = args.tag out_dir = args.out_dir fname = args.fname sname = args.sname tname = args.tname id_to_name = { '001': 'INMODE', '010': 'BAMM', '011': 'BAMMxINMODE', '100': 'PWM', '101': 'PWMxINMODE', '110': 'PWMxBAMM', '111': 'PWMxBAMMxINMODE' } if not os.path.isdir(out_dir): os.mkdir(out_dir) peaks = read_bed(peaks_path) first_model_sites = read_bed(first_model_path) first_names = set(get_indexes(peaks, first_model_sites)) second_model_sites = read_bed(second_model_path) second_names = set(get_indexes(peaks, second_model_sites)) third_model_sites = read_bed(third_model_path) third_names = set(get_indexes(peaks, third_model_sites)) petal_labels = creat_petal(first_names, second_names, third_names) ######################## # WRITE RESULTS TO TSV # data = dict() for k in petal_labels.keys(): data[id_to_name[k]] = petal_labels[k] write_table(out_dir + '/' + tag + '_COUNT.tsv', data) ######################## ############# # DRAW VENN # for k in petal_labels.keys(): petal_labels[k] = '{:.2f}%'.format( (int(petal_labels[k]) / len(peaks) * 100)) ax = venn3_unweighted(petal_labels, set_labels=(fname, sname, tname), set_colors=generate_colors(n_colors=3)) plt.savefig(out_dir + '/' + tag + '_VENN.pdf', dpi=150)
def _venn3(filea, fileb, filec): set1 = readdata(filea + '.txt') set2 = readdata(fileb + '.txt') set3 = readdata(filec + '.txt') fig = plt.figure(figsize=(cm2inch(17, 17)), dpi=600) v = venn3_unweighted([set1, set2, set3], set_labels=(filea, fileb, filec)) #v.get_patch_by_id('100').set_color('red') #v.get_patch_by_id('100').set_color('blue') fig.savefig(filea + ' n ' + fileb + ' n ' + filec + '.png', dpi=600) png2tiff(filea + ' n ' + fileb + ' n ' + filec + '.png') os.remove(filea + ' n ' + fileb + ' n ' + filec + '.png')
def draw_venn(df): #number of genes common to all three isolate all_ = df[df['No. isolates'] == 3][lists] all_.dropna(inplace=True) common_to_all = all_.shape[0] #values common to all three isolates subset['ABC'] = common_to_all data = [] for member in group.keys(): isolate = group[member] unique = df[(df['No. isolates'] == 1) & (df[isolate].notnull())] data.append(unique.shape[0]) subset[member] = unique.shape[0] subgroups = ['AB', 'AC', 'BC'] for subgroup in subgroups: member1 = group[subgroup[0]] member2 = group[subgroup[1]] common = df[(df['No. isolates'] == 2)][[member1, member2]] common.dropna(inplace=True) group[subgroup] = member1 + '-' + member2 subset[subgroup] = common.shape[0] #Prepare values for venn diagra. It should be in the order= A','B','AB','C','AC','BC','ABC' venn_values = [] #print(group.keys()) for key in ['A', 'B', 'AB', 'C', 'AC', 'BC', 'ABC']: venn_values.append(subset[key]) venn3_unweighted(subsets=venn_values, set_labels=(group['A'], group['B'], group['C']), alpha=0.7) plt.title('Gene Count') plt.savefig('gene_count_summary.jpg')
def motifplot(G, proatom_order, lig_name, MotifFolder): '''this function is to make a picture combined bar plot with pie chart characterizing the motif biochemical properties, and make a venn plot to show overlaps of protein pockets containing different motifs for one ligand o input: (1) G: the number of function groups of the ligand , type: int (2) proatom_order: a dict of different binding motifs for a ligand (3) lig_name: the ligand name (4) MotifFolder: the location of a folder to place plots that display motif features o output: save the pictures into motifFolder folder. for each of 233 ligands: return 2 pictures, one is a picture comprising bar plot and pie chart,and the other one is a venn plot. note: if the number of function groups for one ligand is more than 3, there will be only one picture. ''' fig, axes = plt.subplots(G, 2, figsize=(8, 3 * G), dpi=300) fig1, axes1 = plt.subplots(1, 1) atomorder = sorted(proatom_order.items()) cluster_set = [] labels = [] # plot bar and pie chart displaying motif amino acids distribution and atom properties distribution respectively for cluster in atomorder: A, aminoAcids, props = GetMotif(cluster[1]) PlotBarChart(axes[atomorder.index(cluster), 0], aminoAcids) PlotPieChart(axes[atomorder.index(cluster), 1], props) cluster_set.append(set(A)) labels.append(cluster[0]) fig.savefig(os.path.join(MotifFolder, "%s_analysis.png" % lig_name), dpi=300, bbox_inches="tight") # make a venn plot to show overlaps of protein pockets containing 2 motifs for one ligand if G == 2: venn2_unweighted(cluster_set, labels, ax=axes1) fig1.savefig(os.path.join(MotifFolder, "%s_provenn.png" % lig_name), dpi=300) # make a venn plot to show overlaps of protein pockets containing 3 motifs for one ligand elif G == 3: venn3_unweighted(cluster_set, labels, ax=axes1) fig1.savefig(os.path.join(MotifFolder, "%s_provenn.png" % lig_name), dpi=300)
def venn_of_df(df, labels): subsets = [] for col in df.columns: s = set(df.index[df[col] > 0].tolist()) subsets.append(s) # labels = [label.replace('_', ' ').capitalize() for label in df.columns] v = venn3_unweighted(subsets, set_labels=labels) areas = (1, 1, 1, 1, 1, 1, 1) centers, radii = _venn3.solve_venn3_circles(areas) ax = plt.gca() _common.prepare_venn_axes(ax, centers, radii)
def test_pr_28(): import matplotlib_venn as mv v = mv.venn3((1, 2, 3, 4, 5, 6, 7), subset_label_formatter = None) assert v.get_label_by_id('010').get_text() == '2' v = mv.venn3((1, 2, 3, 4, 5, 6, 7), subset_label_formatter = lambda x: 'Value: %+0.3f' % (x / 100.0)) assert v.get_label_by_id('010').get_text() == 'Value: +0.020' v = mv.venn2((1, 2, 3), subset_label_formatter = None) assert v.get_label_by_id('01').get_text() == '2' v = mv.venn2((1, 2, 3), subset_label_formatter = lambda x: 'Value: %+0.3f' % (x / 100.0)) assert v.get_label_by_id('01').get_text() == 'Value: +0.020' v = mv.venn3_unweighted((1, 2, 3, 4, 5, 6, 7), subset_label_formatter = lambda x: 'Value: %+0.3f' % (x / 100.0)) assert v.get_label_by_id('010').get_text() == 'Value: +0.020' v = mv.venn2_unweighted((1, 2, 3), subset_label_formatter = lambda x: 'Value: %+0.3f' % (x / 100.0)) assert v.get_label_by_id('01').get_text() == 'Value: +0.020'
def venn_diagram(all_barcode_number_dict): fig, axes = plt.subplots(2, 3, sharex=True, sharey=True, figsize=[12, 8]) plt.suptitle("Distribution of Lineages") for iy in range(2): for ix in range(3): i = 3 * iy + ix ax = axes[iy, ix] ax.axis('off') if i == 5: break ax.set_title('Day {}'.format(6 * i)) timepoint_list = all_barcode_number_dict['Day {}'.format(6 * i)] v = venn3_unweighted(subsets=timepoint_list, set_labels=('S2', 'S1', 'S3'), set_colors=('#70A1D7', '#F47C7C', '#A1DE93'), ax=ax) plt.savefig('VennDiagram_EachTimepoint.svg', format='svg', dpi=720) plt.show()
def venn(self, amplicons, primers, mismatch): fig, ax = plt.subplots() n = len(primers) if n > 2: primers = primers[:3] venn_data = venn3_unweighted(map(lambda k: set(amplicons.get(k)), primers), set_labels = primers) elif n == 2: venn_data = venn2_unweighted(map(lambda k: set(amplicons.get(k)), primers), set_labels = primers) else: return [ text.set_fontsize(16) for text in venn_data.set_labels ] fig_fname = '.'.join(['venn_mismatch{}'.format(mismatch),'pdf']) plt.tight_layout() plt.savefig(fig_fname, dpi=1000) plt.close() plt.clf()
def test_pr_28(): import matplotlib_venn as mv v = mv.venn3((1, 2, 3, 4, 5, 6, 7), subset_label_formatter=None) assert v.get_label_by_id('010').get_text() == '2' v = mv.venn3((1, 2, 3, 4, 5, 6, 7), subset_label_formatter=lambda x: 'Value: %+0.3f' % (x / 100.0)) assert v.get_label_by_id('010').get_text() == 'Value: +0.020' v = mv.venn2((1, 2, 3), subset_label_formatter=None) assert v.get_label_by_id('01').get_text() == '2' v = mv.venn2((1, 2, 3), subset_label_formatter=lambda x: 'Value: %+0.3f' % (x / 100.0)) assert v.get_label_by_id('01').get_text() == 'Value: +0.020' v = mv.venn3_unweighted((1, 2, 3, 4, 5, 6, 7), subset_label_formatter=lambda x: 'Value: %+0.3f' % (x / 100.0)) assert v.get_label_by_id('010').get_text() == 'Value: +0.020' v = mv.venn2_unweighted((1, 2, 3), subset_label_formatter=lambda x: 'Value: %+0.3f' % (x / 100.0)) assert v.get_label_by_id('01').get_text() == 'Value: +0.020'
def draw_venn(infile, outfile, weighted=False) : with open(infile, "r") as f: sets = pickle.load(f) if weighted: venn3_circles(sets) venn = venn3(sets, set_labels=('Aesthetics', 'Semantics', 'Aes+Sem')) for l in venn.subset_labels : l.set_fontsize(14) else : venn = venn3_unweighted(sets, set_labels=('Aesthetics', 'Semantics', 'Aes+Sem')) for l in venn.subset_labels : l.set_fontsize(14) ax = pp.gca() for (c, r) in zip(venn.centers, venn.radii): circle = Circle(c, r, lw=2, alpha=1, facecolor='none') ax.add_patch(circle) pp.savefig(outfile) pp.show()
pairs.append(tuple(l.split())) #get all locus names, loci informative for each cross informative, loci = read_genotypes(sys.argv[1], pairs) #get the loci captured at each k, the number of added loci, #and the pairs decending order of how many more loci they add shared_loci, added_loci, best_pairs = optimize_crosses(6, pairs, informative) #labels for pairs best_pairs_labels=["{0} x {1}".format(*x) for x in best_pairs] #plot results fig = plt.figure(figsize = (8,8)) ax1 = plt.subplot2grid((3,3), (0,0), colspan = 3, rowspan = 2) ax1.set_title('Shared Loci in Top Three Pairs', fontsize = 26) #venn diagram on top venn3_unweighted([informative[x] for x in best_pairs[:3]], set_labels = best_pairs_labels[:3], ax=ax1) #bar chart on bottom ax2 = plt.subplot2grid((3,3), (2,0), colspan=3) ax2.bar(range(len(added_loci[:8])), added_loci[:8]) ax2.set_ylabel('Number of Loci') ax2.set_title('Added Informative Loci Per Pair', fontsize = 26) ax2.set_xticklabels(best_pairs_labels[:8], rotation = 17 ) #add a bit of extra space fig.subplots_adjust(hspace=.44) fig.savefig('summmary.pdf') plt.show()
dict1 = D_G_list dict2 = compareSiteList['F9_UD_vs_F9_D4_TCP']['F9_D4_TCP_diff'] findG = FindOverlap(dict1, dict2, dictNames = ['D', 'C'], seqChr = chrSeq) G_len = np.sum([len(values) for keys, values in findG['siteList']['Intersection'].items()]) G = findG['siteNum']['Total']['Intersection'] E = E_G_len - G A = A_E_len - E F = G_F_len - G B = B_F_len - F D = D_G_len - G C = C_E_len - E plt.figure(figsize = (6,6)) out = venn3_unweighted(subsets = (A, B, D, C, E, F, G), set_labels = ('F9_D4_Min_UD', 'F9_D4_PG_Min_UD', 'F9_D4_TCP_Min_UD')) out.get_patch_by_id('100').set_alpha(1.0) for text in out.set_labels: text.set_fontsize(6) for text in out.subset_labels: text.set_fontsize(6) plt.savefig("Figures/F9_D4_Min_UD_diffVenn.pdf", bbox_inches = 'tight') #### Save sites into bed files #### def dict2df(dicty, seqChr, filename): df = pd.DataFrame(columns = ['Chr', 'Start', 'End']) for ii in seqChr: cc = np.repeat("chr" + str(ii), len(dicty[ii])) tmparr = list(dicty[str(ii)]) tmparr = sorted(tmparr)
import matplotlib.pyplot as plt from matplotlib_venn import venn3_unweighted from omicsplot.omics_excel import OmicsExcel from omicsplot.set_tools import venn_set in_file1 = r'C:\Users\Chen\IdeaProjects\semi\data\kidney_rerank.xlsx' in_file2 = r"C:\Users\Chen\IdeaProjects\semi\data\kidney_unspecific_test.xlsx" in_file3 = r"C:\Users\Chen\IdeaProjects\semi\data\kidney_sequest.xlsx" excel1 = OmicsExcel(in_file1) excel2 = OmicsExcel(in_file2) excel3 = OmicsExcel(in_file3) venn_region = venn_set(excel1.get_seq_set(), excel2.get_seq_set(), excel3.get_seq_set()) seq_v = venn3_unweighted(subsets=venn_region, set_labels=("Specific", "Unspecific", "Sequest")) plt.title("Sequence Overlap") plt.show() # venn_delta_region = venn_set(excel1.get_pep_delta_set(), excel2.get_pep_delta_set(), excel3.get_pep_delta_set()) # seq_delta_v = venn3_unweighted(subsets=venn_delta_region, set_labels=("Specific", "Unspecific", "Sequest")) # # plt.title("Glycopeptide Overlap") # plt.show()
t_contrast_2 = [] with open(contrast_2) as inp: for line in inp: t_contrast_2.append(line.strip()) t_contrast_3 = [] with open(contrast_3) as inp: for line in inp: t_contrast_3.append(line.strip()) # perform sanity check assert len(t_contrast_2) == len(list(set(t_contrast_2))) assert len(t_contrast_1) == len(list(set(t_contrast_1))) assert len(t_contrast_3) == len(list(set(t_contrast_3))) # if sanity checks pass convert lists into sets (if they fail there are duplicate transcripts in one of the files which is not expected) t_contrast_2 = set(t_contrast_2) t_contrast_1 = set(t_contrast_1) t_contrast_3 = set(t_contrast_3) # Plot venn3_unweighted(subsets=(t_contrast_1, t_contrast_2, t_contrast_3), set_labels=labels, alpha=0.55) plt.savefig(output, transparent=True, format='pdf')
for s, (f, t, c) in zip(sets, set_data): print("%i in %s" % (len(s), c)) names = [one_name for one_file, one_type, one_name in set_data] lengths = [len(one_set) for one_set in sets] if len(sets) == 3: try: from matplotlib_venn import venn3_unweighted except ImportError: sys.exit("Requires the Python library matplotlib_venn") venn3_unweighted( sets, [ "{} (Total {})".format(name, length) for (name, length) in zip(names, lengths) ], ) if len(sets) == 2: try: from matplotlib_venn import venn2_unweighted except ImportError: sys.exit("Requires the Python library matplotlib_venn") venn2_unweighted( sets, [ "{} (Total {})".format(name, length) for (name, length) in zip(names, lengths) ],
def venn_analysis_diagram(key_id, col_from_patients, col_from_procedures, col_from_medications): """ The utility function venn_analysis_diagram was created to obtain a better understanding of the missing records that we have encountered with the data set provided. For instance, the raw data provided patient and provider IDs in the Medications and Procedures datasets that does not exist in the patients table. Cautions: you need to understand that the context is associated to the analysis we are creating for the Fairfax County Fire and Rescue Department. Properties: ----------- key_id : string (mandatory) The string will help document the ID/Column we used to perform this particular analysis. (i.e., PatientId, FRDResponelID (a.k.a., ProviderId), Composite Index (Concatenation of PatientId with FRDPersonnelID)) col_from_patients: Pandas Series (mandatory) The Series object needs to cointain a single feature. It should either be the PatientId, FRDPersonelID, or a concatenate version of these two columns. Caution: In order to maintain the proper context for this analysis this pandas series or data frame should come from the *Patients* dataset. Otherwise the Observations made may not make sense with the results obtained col_from_procedures: Pandas Series (mandatory) The Series object needs to cointain a single feature. It should either be the PatientId, FRDPersonelID, or a concatenate version of these two columns. Caution: In order to maintain the proper context for this analysis this pandas series or data frame should come from the *Procedures* dataset. Otherwise the Observations made may not make sense with the results obtained col_from_medications: Pandas Series (mandatory) The Series object needs to cointain a single feature. It should either be the PatientId, FRDPersonelID, or a concatenate version of these two columns. Caution: In order to maintain the proper context for this analysis this pandas series or data frame should come from the *Medications* dataset. Otherwise the Observations made may not make sense with the results obtained Return ------ A Venn Diagram plot. Caution: Attributes provided in the wrong order will cause the observations to loose the context for which they were made. """ ### Assert Input ### exp_out = "<class 'pandas.core.series.Series'>" s1_in = str(type(col_from_patients)) s2_in = str(type(col_from_procedures)) s3_in = str(type(col_from_medications)) assert (s1_in == exp_out and s2_in == exp_out and s3_in == exp_out), '''One of the column inputs provided is not a Series, please read the function information available at /src/d06_reporting folder''' assert (str( type(key_id)) == "<class 'str'>"), '''The key_id provided is not a string, please read the function information available at /src/d06_reporting folder''' vd3 = venn3_unweighted([ set(col_from_patients), set(col_from_procedures), set(col_from_medications) ], set_labels=('Patients', 'Procedures', 'Medications'), set_colors=('#d7191c', '#abdda4', '#2b83ba'), alpha=0.8) for text in vd3.set_labels: # Change Label Size text.set_fontsize(16) for text in vd3.subset_labels: # Change number size text.set_fontsize(12) if key_id == 'PatientId': key_id = 'Patients' if key_id == 'FRDPersonnelID': key_id = 'Providers' if key_id == 'comp_idx': key_id = 'Compound IDs: Patient & Provider' plt.title('Venn Diagram for {} Across All Datasets'.format(key_id), fontname='Times New Roman', fontsize=20, pad=30, backgroundcolor='#f1a340', color='black') return plt.show()
phylumQuarSet = allTaxaSets['P'][2] phylumSets = [phylumFullSet, phylumHalfSet, phylumQuarSet] genusFullSet = allTaxaSets['G'][0] genusHalfSet = allTaxaSets['G'][1] genusQuarSet = allTaxaSets['G'][2] genusSets = [genusFullSet, genusHalfSet, genusQuarSet] speciesFullSet = allTaxaSets['S'][0] speciesHalfSet = allTaxaSets['S'][1] speciesQuarSet = allTaxaSets['S'][2] speciesSets = [speciesFullSet, speciesHalfSet, speciesQuarSet] # Add option to plot friendlier Venn diagrams for color-blind audiences v3Phylum = venn3_unweighted(phylumSets, ('D1', 'D0.5', 'D0.25'), set_colors=('y', 'b', 'r')) for text in v3Phylum.set_labels: text.set_fontsize(16) for text in v3Phylum.subset_labels: text.set_fontsize(18) plt.title('Phylum', fontsize=20) plt.savefig('krakenPhylumVennCB.png') plt.clf() plt.cla() v3Order = venn3_unweighted(orderSets, ('D1', 'D0.5', 'D0.25'), set_colors=('y', 'b', 'r')) for text in v3Order.set_labels: text.set_fontsize(16) for text in v3Order.subset_labels: text.set_fontsize(18)
A +=1 else: if mean_d > 0: if mean_n > 0: DN +=1 else: D +=1 else: if mean_n > 0: N +=1 ''' if lnum<3: print(lnum) print(ln_ls) print(mean_a, mean_d, mean_n) ''' total = [A,D,AD, N, AN, DN, ADN] print('A: '+str(A) + ';\nD: '+str(D)+';\nAD: '+str(AD) + ';\nN: '+str(N)+';\nAN: '+str(AN)+';\nDN: '+str(DN)+';\nADN: '+str(ADN)) OUTFILE.write('A: '+str(A)+'\n'+'D: '+str(D)+'\nN: '+str(N)+'\nAD: '+str(AD)+'\nAN: '+str(AN)+'\nDN: '+str(DN)+'\nADN: '+str(ADN)+'\n') print('Sum: '+str(sum(total))) print('Over!!!') plt.figure(figsize = (15, 15)) v = venn3_unweighted(subsets = ( A, D, AD, N, AN, DN, ADN), set_labels = ('A', 'D', 'N')) plt.savefig('venn_gene.jpg')
D_cutoff_id = namingscheme_parts[1] D_plant_id = namingscheme_parts[2] if D_plant_id == 'AC': plant_type = 'WT' elif D_plant_id =='dgt': plant_type = 'dgt' TPM_value = D_cutoff_id.split('_')[-1] D_expr = set(readExpressedGenes(D_filepath)) E_expr = set(readExpressedGenes(E_filepath)) M_expr = set(readExpressedGenes(M_filepath)) #Generate venn diagram venn = venn3_unweighted([D_expr, E_expr, M_expr], set_labels=("Differentiation", "Elongation", "Meristem")) plt.title(f"Expressed Genes TPM > {TPM_value} ({plant_type})") plt.savefig(os.path.join(venn_output_location, venn_output_filename)) #Create gene lists for each section of venn diagram intersections_dict = {'DEM': D_expr.intersection(E_expr).intersection(M_expr), 'DE_only': D_expr.intersection(E_expr).difference(M_expr),\ 'EM_only': E_expr.intersection(M_expr).difference(D_expr), 'DM_only': D_expr.intersection(M_expr).difference(E_expr),\ 'D_only': D_expr.difference(E_expr).difference(M_expr), 'E_only': E_expr.difference(D_expr).difference(M_expr),\ 'M_only': M_expr.difference(D_expr).difference(E_expr)} for inter_key in intersections_dict: filename = inter_key + '_' + D_plant_id + '_' + D_cutoff_id + '.txt' filepath = os.path.join(genelist_output_location, filename) IF = open(filepath, 'w') for member in intersections_dict[inter_key]: IF.write(member + '\n')
for condition_nb in set_to_title.values(): if condition_nb in venn_diagram: sub.append(venn_diagram[condition_nb]) else: sub.append(0) # print tuple(sub) # avec les 3 fichiers test, resultat attendu #Only in test1 Only in test2 Common test1 test2 Only in test3 Common test1 test3 Common test2 test3 Common test1 test2 test3 #(8, 6, 1, 7, 0, 2, 0) plt.figure(figsize=(14,10)) # first number : width , second number : height if len(liste_sets) == 2: v = venn2_unweighted(subsets = tuple(sub), set_labels = (liste_sets[0], liste_sets[1])) elif len(liste_sets) == 3: v = venn3_unweighted(subsets = tuple(sub), set_labels = (liste_sets[0], liste_sets[1], liste_sets[2])) for text in v.set_labels: # file name size text.set_fontsize(12) for text in v.subset_labels: # numbers inside circles size text.set_fontsize(16) if args.venn_title is not None : plt.title(args.venn_title) # display title #liste_sets_string = ', '.join(liste_sets) #print liste_sets_string #plt.title(liste_sets_string) try : mpl.pyplot.savefig(args.img_output_file)
############################# VENN DIAGRAM ############################### ############################################################################ # reading the pickle tree infile = open('data/Data519_original20_Results_mix100_ALL.pickle','rb') myanalysistestsift = pickle.load(infile) infile.close() from matplotlib_venn import venn3, venn3_circles, venn3_unweighted from matplotlib import pyplot as plt ################################################# #Missing candidates using accuracy less than 100% x = list(myanalysistestsift[myanalysistestsift['acc_hsv']<100]['file']) y = list(myanalysistestsift[myanalysistestsift['acc_rgb']<100]['file']) z = list(myanalysistestsift[myanalysistestsift['acc_sift_BF']<100]['file']) venn3_unweighted([set(x), set(y), set(z)], set_labels = ('hsv', 'rgb', 'sift')) plt.title('candidates_missed_Accuracy_20 < 100') ########################################################### #### captured candidates using accuracy greater than 66% x = list(myanalysistestsift[myanalysistestsift['acc_hsv']==100]['file']) y = list(myanalysistestsift[myanalysistestsift['acc_rgb']==100]['file']) z = list(myanalysistestsift[myanalysistestsift['acc_sift_BF']==100]['file']) venn3_unweighted([set(x), set(y), set(z)], set_labels = ('hsv', 'rgb', 'sift')) plt.title('candidates_captured_Accuracy_20 = 100') ########################################################## #Missing candidates using count less than 4 x = list(myanalysistestsift[myanalysistestsift['Count_hsv']<4]['file']) y = list(myanalysistestsift[myanalysistestsift['Count_rgb']<4]['file']) z = list(myanalysistestsift[myanalysistestsift['Count_sift_BF']<4]['file'])
# if(v.get_patch_by_id('101')): # v.get_patch_by_id('101').set_color('#ff00ff') # if(v.get_patch_by_id('111')): # v.get_patch_by_id('111').set_color('#ffffff') path = './images/' if not os.path.exists(path): os.makedirs(path) path1 = path + x + y + z #c = venn3_circles(s, linestyle='solid') plt.savefig(path1 + '_weighted') plt.close() print('Generating unweighted Venn diagram for:', x, y, z) v = venn3_unweighted(s, set_labels=(x,y,z), alpha=0.7) path2 = path1 + '_unweighted' plt.savefig(path2) plt.close() ## create venn sets filled with words ## moved to vennwords.py # print('Generating Venn diagram with words for:', x, y, z) # v = venn3_wordcloud(s, set_labels=(x,y,z), alpha=0.7, wordcloud_kwargs={'max_words':5,'min_font_size':5}) # path3 = path1 + '_words' # plt.savefig(path3) # plt.close() ## for wordclouds I want also to count the frequencies so I use a list instead of a set
def main(): args = parse_args() first_model_path = args.first_model_path second_model_path = args.second_model_path third_model_path = args.third_model_path peaks_path = args.input_peaks tag = args.tag out_dir = args.out_dir fname = args.fname sname = args.sname tname = args.tname if not os.path.isdir(out_dir): os.mkdir(out_dir) data = read_peaks(peaks_path) names = [int(i.split('_')[1]) for i in data['name']] data['name'] = names first_model_sites = read_bed_like_file(first_model_path) first_model_sites['type'] = 'first_model' names = [int(i.split('_')[1]) for i in first_model_sites['name']] first_model_sites['name'] = names second_model_sites = read_bed_like_file(second_model_path) second_model_sites['type'] = 'second_model' names = [int(i.split('_')[1]) for i in second_model_sites['name']] second_model_sites['name'] = names third_model_sites = read_bed_like_file(third_model_path) third_model_sites['type'] = 'third_model' names = [int(i.split('_')[1]) for i in third_model_sites['name']] third_model_sites['name'] = names classification = [] for index, peak in data.iterrows(): classification.append( peak_classification(peak, first_model_sites, second_model_sites, third_model_sites)) ############################################# #Make table with count of diff kind of peaks# ############################################# count = [] #top = [i * 1000 for i in range(1, len(data) // 1000 + 1)] top = [i * 100 for i in range(1, len(data) // 100 + 1)] for i in range(len(top)): subset_classification = classification[i * 100:(i + 1) * 100] count_first_model_sites = sum( ['first_model' == i for i in subset_classification]) count_second_model_sites = sum( ['second_model' == i for i in subset_classification]) count_third_model_sites = sum( ['third_model' == i for i in subset_classification]) count_no_sites = sum(['no_sites' == i for i in subset_classification]) overlap_first_second_models = sum([ 'overlap_first_second_models' == i for i in subset_classification ]) overlap_first_third_models = sum( ['overlap_first_third_models' == i for i in subset_classification]) overlap_second_third_models = sum([ 'overlap_second_third_models' == i for i in subset_classification ]) overlap_all_models = sum( ['overlap_all_models' == i for i in subset_classification]) not_overlap = sum(['not_overlap' == i for i in subset_classification]) count.append({ 'no_sites': count_no_sites, 'not_overlap': not_overlap, 'first_model_sites': count_first_model_sites, 'second_model_sites': count_second_model_sites, 'third_model_sites': count_third_model_sites, 'overlap_first_second_models': overlap_first_second_models, 'overlap_first_third_models': overlap_first_third_models, 'overlap_second_third_models': overlap_second_third_models, 'overlap_all_models': overlap_all_models }) count_ = pd.DataFrame(count) count = pd.DataFrame() count = count.append(count_.iloc[0]) for i in range(1, len(count_)): count = count.append(count.iloc[i - 1] + count_.iloc[i], ignore_index=True) count['peaks'] = top count = count[[ 'first_model_sites', 'second_model_sites', 'overlap_first_second_models', 'third_model_sites', 'overlap_first_third_models', 'overlap_second_third_models', 'overlap_all_models', 'no_sites', 'not_overlap', 'peaks' ]] count.to_csv(out_dir + '/' + tag + '_COUNT.tsv', sep='\t', index=False) frequency = pd.DataFrame(count).copy() for column in frequency: if column == 'peaks': continue frequency[column] = frequency[column] / frequency['peaks'] frequency.to_csv(out_dir + '/' + tag + '_FREQUENCY.tsv', sep='\t', index=False) #venn3_unweighted venn3_unweighted(subsets=np.around(np.array(frequency.iloc[-1, :7]), 2), set_labels=(fname, sname, tname)) plt.savefig(out_dir + '/' + tag + '_PIC.pdf', dpi=150) ################################## only_first_model_sites = first_model_sites.loc[ first_model_sites['name'].searchsorted( np.array([ index for index, i in enumerate(classification) if i == 'first_model' ]))] only_second_model_sites = second_model_sites.loc[ second_model_sites['name'].searchsorted( np.array([ index for index, i in enumerate(classification) if i == 'second_model' ]))] only_third_model_sites = third_model_sites.loc[ third_model_sites['name'].searchsorted( np.array([ index for index, i in enumerate(classification) if i == 'third_model' ]))] only_second_and_third_model_sites_3 = third_model_sites.loc[ third_model_sites['name'].searchsorted( np.array([ index for index, i in enumerate(classification) if i == 'overlap_second_third_models' ]))] only_second_and_third_model_sites_2 = second_model_sites.loc[ second_model_sites['name'].searchsorted( np.array([ index for index, i in enumerate(classification) if i == 'overlap_second_third_models' ]))] overlap_model_sites = third_model_sites.loc[ third_model_sites['name'].searchsorted( np.array([ index for index, i in enumerate(classification) if i == 'overlap_all_models' ]))] only_first_model_sites = only_first_model_sites[[ 'chromosome', 'start', 'end', 'name', 'score', 'strand', 'site' ]] only_first_model_sites.to_csv(out_dir + '/' + tag + '_only_first_model.sites', sep='\t', index=False, header=False) first_model_sites = first_model_sites[[ 'chromosome', 'start', 'end', 'name', 'score', 'strand', 'site' ]] first_model_sites.to_csv(out_dir + '/' + tag + '_all_first_model.sites', sep='\t', index=False, header=False) only_second_model_sites = only_second_model_sites[[ 'chromosome', 'start', 'end', 'name', 'score', 'strand', 'site' ]] only_second_model_sites.to_csv(out_dir + '/' + tag + '_only_second_model.sites', sep='\t', index=False, header=False) second_model_sites = second_model_sites[[ 'chromosome', 'start', 'end', 'name', 'score', 'strand', 'site' ]] second_model_sites.to_csv(out_dir + '/' + tag + '_all_second_model.sites', sep='\t', index=False, header=False) only_third_model_sites = only_third_model_sites[[ 'chromosome', 'start', 'end', 'name', 'score', 'strand', 'site' ]] only_third_model_sites.to_csv(out_dir + '/' + tag + '_only_third_model.sites', sep='\t', index=False, header=False) third_model_sites = third_model_sites[[ 'chromosome', 'start', 'end', 'name', 'score', 'strand', 'site' ]] third_model_sites.to_csv(out_dir + '/' + tag + '_all_third_model.sites', sep='\t', index=False, header=False) overlap_model_sites = overlap_model_sites[[ 'chromosome', 'start', 'end', 'name', 'score', 'strand', 'site' ]] overlap_model_sites.to_csv(out_dir + '/' + tag + '_overlap_model_sites.sites', sep='\t', index=False, header=False) only_second_and_third_model_sites_3 = only_second_and_third_model_sites_3[[ 'chromosome', 'start', 'end', 'name', 'score', 'strand', 'site' ]] only_second_and_third_model_sites_3.to_csv( out_dir + '/' + tag + '_overlap_second_and_third_sites_3.sites', sep='\t', index=False, header=False) only_second_and_third_model_sites_2 = only_second_and_third_model_sites_2[[ 'chromosome', 'start', 'end', 'name', 'score', 'strand', 'site' ]] only_second_and_third_model_sites_2.to_csv( out_dir + '/' + tag + '_overlap_second_and_third_sites_2.sites', sep='\t', index=False, header=False)
'SynSig'), set_colors=('coral', 'skyblue', 'lightgreen'), alpha=0.7) for text in v.set_labels: text.set_fontweight('bold') for text in v.set_labels: text.set_fontsize(25) for text in v.subset_labels: text.set_fontsize(25) plt.show() plt.close() v = venn3_unweighted([set(adult), set(db), set(pred)], set_labels=('Adult Brain \n Synapse Validation', 'Synapse Databases', 'SynSig'), set_colors=('gray', 'lightgray', 'red'), alpha=0.7) for text in v.set_labels: text.set_fontweight('bold') for text in v.set_labels: text.set_fontsize(25) for text in v.subset_labels: text.set_fontsize(25) plt.show() plt.close() v = venn3([set(adult), set(db), set(pred)], set_labels=('Adult Brain \n Synapse Validation', 'Synapse Databases', 'SynSig'),
def venn_stats(fnames, f_c2n, f_jsnice, f_jsnaughty): only_c2n = 0 only_jsnice = 0 only_jsnaughty = 0 c2n_jsnice = 0 c2n_jsnaughty = 0 jsnice_jsnaughty = 0 all_tools = 0 total = 0 def update_cnts(corr_map): nonlocal only_c2n, only_jsnice, only_jsnaughty, c2n_jsnice, c2n_jsnaughty, jsnice_jsnaughty, all_tools if corr_map == (0, 0, 0): return elif corr_map == (1, 0, 0): only_c2n += 1 elif corr_map == (0, 1, 0): only_jsnice += 1 elif corr_map == (0, 0, 1): only_jsnaughty += 1 elif corr_map == (1, 1, 0): c2n_jsnice += 1 elif corr_map == (1, 0, 1): c2n_jsnaughty += 1 elif corr_map == (0, 1, 1): jsnice_jsnaughty += 1 elif corr_map == (1, 1, 1): all_tools += 1 venn_process_line.fname_map = {fname: 1 for fname in fnames} venn_process_line.f_c2n_map = {fname: 1 for fname in f_c2n} venn_process_line.f_jsnice_map = {fname: 1 for fname in f_jsnice} venn_process_line.f_jsnaughty_map = {fname: 1 for fname in f_jsnaughty} with open('name_stats.csv', 'r') as f, multiprocessing.Pool() as p: for res in tqdm.tqdm(p.imap_unordered(venn_process_line, f)): if res: total += 1 b1, b2, b3 = res update_cnts((b1, b2, b3)) nums = [ round(only_c2n * 100 / total, 2), round(only_jsnice * 100 / total, 2), round(only_jsnaughty * 100 / total, 2), round(c2n_jsnice * 100 / total, 2), round(c2n_jsnaughty * 100 / total, 2), round(jsnice_jsnaughty * 100 / total, 2), round(all_tools * 100 / total, 2) ] print() print("============") print("Venn Diagram Stats") print("==================") print() print("Only Context2Name : ", nums[0], '%') print("Only JSNice : ", nums[1], '%') print("Only JSNaughty : ", nums[2], '%') print() print("Only Context2Name & JSNice : ", nums[3], '%') print("Only Context2Name & JSNaughty : ", nums[4], '%') print("Only JSNice & JSNaughty : ", nums[5], '%') print() print("All three : ", nums[6], '%') print() print("============") print() if args.save_venn is not None: nums[2], nums[3] = nums[3], nums[2] fig = plt.figure() if args.venn_weighted: matplotlib_venn.venn3(subsets=nums, set_labels=('Context2Name', 'JSNice', 'JSNaughty')) matplotlib_venn.venn3_circles(subsets=nums, linestyle='solid', linewidth=0.3) else: matplotlib_venn.venn3_unweighted(subsets=nums, set_labels=('Context2Name', 'JSNice', 'JSNaughty')) matplotlib_venn.venn3_circles(subsets=nums, linestyle='solid', linewidth=0.3) fig.savefig(args.save_venn, bbox_inches='tight', dpi=1000)
adult_df = pd.DataFrame({ 'Stage': 'Adult Synapse: Syndromic Autism Genes', 'Genes': list(set(adult_overlap)) }) final = pd.concat([fetal_df, adult_df], axis=0) print(final) final.to_csv('nb_val_new_syndromic.csv') #print (df) pred_val = list(set(fetal + adult) & set(nb)) v = venn3_unweighted( [set(pred_val), set(syndromic), set(db)], set_labels=('Proteomics Validated \n ENSig', 'Syndromic Autism', 'Synapse Databases'), set_colors=('skyblue', 'coral', 'gray'), alpha=0.7) #venn3_circles([set(pred_val),set(syndromic), set(db)], linestyle='solid', linewidth=0.5, color='k'); for text in v.set_labels: #print (text) text.set_fontweight('bold') for text in v.set_labels: text.set_fontsize(30) for text in v.subset_labels: print(text) text.set_fontsize(30) target = v.subset_labels[2] target.set_fontweight('bold') target.set_fontsize(35)
from nltk.corpus.reader.wordnet import WordNetCorpusReader from matplotlib import pyplot as plt from matplotlib_venn import venn3_unweighted wn = WordNetCorpusReader("./resources/WordNet-3.0/dict",None) adjectives = {a for a in wn.all_synsets('a')} attributes = {n for n in wn.all_synsets('n') if n.lexname() == 'noun.attribute'} direct_attributes = {attribute for adjective in adjectives for attribute in adjective.attributes()} morphologically_related = {related_lemma.synset() for adjective in adjectives for lemma in adjective.lemmas() for related_lemma in lemma.derivationally_related_forms() if related_lemma.synset().pos() == 'n'} diagram = venn3_unweighted([attributes, direct_attributes, morphologically_related], ['labeled as\nnoun.attribute', 'direct\nattributes', 'morphologically\nrelated nouns']) for patch in diagram.patches: patch.set_edgecolor('k') patch.set_facecolor('w') # remove this line for color diagram. plt.savefig('./images/venn.pdf')