Пример #1
0
def main():

    args = parse_args()
    first_model_path = args.first_model_path
    second_model_path = args.second_model_path
    third_model_path = args.third_model_path
    out_path = args.out_path
    fname = args.fname
    sname = args.sname
    tname = args.tname

    # if not os.path.isdir(out_path):
    #     os.mkdir(out_path)

    fm_ids = read_ids(first_model_path)
    sm_ids = read_ids(second_model_path)
    tm_ids = read_ids(third_model_path)

    plt.figure(figsize=(6, 4), dpi=150)
    venn3_unweighted([fm_ids, sm_ids, tm_ids], (fname, sname, tname))
    plt.savefig(out_path, dpi=150)

    only_fm_ids = fm_ids - sm_ids - tm_ids
    only_sm_ids = sm_ids - fm_ids - tm_ids
    only_tm_ids = tm_ids - fm_ids - sm_ids
    common_ids = fm_ids & sm_ids & tm_ids
Пример #2
0
def draw_venn(labels, data, totals):
    figure, axes = plt.subplots(1, 3)
    figure.set_size_inches(12, 4)

    axes[0].set_title('D2Refine', y=1.08, fontsize=14, fontweight='bold')
    axes[1].set_title('OntoMaton', y=1.08, fontsize=14, fontweight='bold')
    axes[2].set_title('RightField', y=1.08, fontsize=14, fontweight='bold')

    #plt.suptitle('Function Distribution')

    def font(out):
        for text in out.set_labels:
            text.set_horizontalalignment("center")

    font(
        venn3_unweighted(subsets=data[0],
                         alpha=alpha,
                         set_labels=labels,
                         ax=axes[0]))

    font(
        venn3_unweighted(subsets=data[1],
                         alpha=alpha,
                         set_labels=labels,
                         ax=axes[1]))

    font(
        venn3_unweighted(subsets=data[2],
                         alpha=alpha,
                         set_labels=labels,
                         ax=axes[2]))

    plt.savefig('figures/models_venn.png')
Пример #3
0
    def plot_venn(self):
        self.pop_analysis_venn_diagram.canvas.figure.clf()
        #self.pop_analysis_venn_diagram.canvas = FigureCanvas(plt.figure(figsize=(7.5,7.5)))

        rows, cols = (self.total_org_number, self.total_org_number)
        arr = [[0 for i in range(cols)] for j in range(rows)]
        all_3 = 0
        singles = [0 for i in range(cols)]
        counter = 0
        self.names_venn = []

        for keys in self.parser.popData:
            for items in self.parser.popData[keys]:
                if items[0] not in self.names_venn:
                    self.names_venn.append(items[0])

        if len(self.names_venn) >= 3:
            for keys in self.parser.popData:
                temp_names = []
                for items in self.parser.popData[keys]:
                    if items[0] not in temp_names:
                        temp_names.append(items[0])

                if len(temp_names) >= 2:
                    for i in range(len(temp_names) - 1):
                        j = i + 1
                        while j != len(temp_names):
                            arr[self.names_venn.index(
                                temp_names[i])][self.names_venn.index(
                                    temp_names[j])] += 1
                            arr[self.names_venn.index(
                                temp_names[j])][self.names_venn.index(
                                    temp_names[i])] += 1
                            j += 1
                else:
                    if temp_names[
                            0] == 'Corynebacterium tuberculostearicum SK141 contig00004':
                        counter += 1
                    singles[self.names_venn.index(temp_names[0])] += 1

            #all 3 orgs
            for keys in self.parser.popData:
                temp_names = []
                for items in self.parser.popData[keys]:
                    if items[0] not in temp_names:
                        temp_names.append(items[0])
                if all(x in temp_names for x in [
                        self.names_venn[0], self.names_venn[1],
                        self.names_venn[2]
                ]):
                    all_3 += 1

            venn3_unweighted(subsets=(singles[0], singles[1], arr[0][1],
                                      singles[2], arr[0][2], arr[1][1], all_3),
                             set_labels=('0', '1', '2'))

            self.pop_analysis_venn_diagram.canvas.draw()
        else:
            self.pop_analysis_venn_diagram.canvas.figure.clf()
            self.pop_analysis_venn_diagram.canvas.draw()
Пример #4
0
    def sets(self):
        union = len(
            self._public_ids.union(self._personal_ids).union(self._masked_ids))
        print('A ∪ B ∪ C %d' % union)
        print('A (population variants) %d' % len(self._public_ids))
        print('B (personal variants) %d' % len(self._personal_ids))
        print('C (masked variants) %d' % len(self._masked_ids))
        print()

        value = len(
            self._public_ids.intersection(self._personal_ids).intersection(
                self._masked_ids))
        print('A ∩ B ∩ C %d (%.2f%%)' % (value, value / union * 100))

        value = len(
            self._public_ids.intersection(self._personal_ids).difference(
                self._masked_ids))
        print('A ∩ B - C %d (%.2f%%)' % (value, value / union * 100))

        value = len(
            self._personal_ids.intersection(self._masked_ids).difference(
                self._public_ids))
        print('B ∩ C - A %d (%.2f%%)' % (value, value / union * 100))

        value = len(
            self._public_ids.intersection(self._masked_ids).difference(
                self._personal_ids))
        print('A ∩ C - B %d (%.2f%%)' % (value, value / union * 100))

        value = len(
            self._public_ids.difference(self._masked_ids).difference(
                self._personal_ids))
        print('A - C - B %d (%.2f%%)' % (value, value / union * 100))

        value = len(
            self._personal_ids.difference(self._public_ids).difference(
                self._masked_ids))
        print('B - A - C %d (%.2f%%)' % (value, value / union * 100))

        value = len(
            self._masked_ids.difference(self._public_ids).difference(
                self._personal_ids))
        print('C - A - B %d (%.2f%%)' % (value, value / union * 100))

        value = len(self._public_ids.intersection(self._personal_ids))
        print('A ∩ B %d (%.2f%%)' % (value, value / union * 100))

        value = len(self._personal_ids.intersection(self._masked_ids))
        print('B ∩ C %d (%.2f%%)' % (value, value / union * 100))

        value = len(self._public_ids.intersection(self._masked_ids))
        print('A ∩ C %d (%.2f%%)' % (value, value / union * 100))

        plt.figure()
        venn.venn3_unweighted(
            [self._public_ids, self._personal_ids, self._masked_ids],
            set_labels=('A', 'B', 'C'))
        plt.savefig(os.path.join(self._out_dir, 'sets.png'))
Пример #5
0
def venn3(subsets, title, unit_title, filename, set_labels=None, normalize=1.0, annotation=None):
    plt.figure()
    v = venn.venn3_unweighted(subsets=subsets, set_labels=set_labels)
    c = venn.venn3_circles(subsets=(1,1,1,1,1,1,1), linestyle='solid', linewidth=1.5, normalize_to=normalize)
    for i in range(len(venn3_keys)):
        label_id = venn3_keys[i]
        text = v.get_label_by_id(label_id)
        text.set_position(text.get_position() + np.array([0, 0.02]))
        # TEMPORALLY COUPLED WITH CREATION OF DIAGRAM
        subtitle = unit_title
        if text.get_text() != '1':
            subtitle += 's'
        text.set_text(text.get_text() + '\n' + subtitle)
        text.set_size(text.get_size() - 2)
    if annotation is not None:
        for a in annotation:
            text = v.get_label_by_id(a)
            xy= text.get_position() - np.array([0, 0.085])
            plt.annotate(annotation[a], xy=xy, xytext=xy, ha='center', textcoords='offset points', color='r', weight='bold')
    for label in v.subset_labels:
        label.set_fontname('sans-serif')
    if title is not None:
        plt.title(title)
    plt.savefig(filename)
    plt.close()
Пример #6
0
def venn3_plot(set1 = set(),
               set2 = set(),
               set3 = set(),
               lab_set1 = 'Set1',
               lab_set2 = 'Set2',
               lab_set3 = 'Set3',
               linewidth = 1,
               color_line = 'black',
               alpha_sets = 0.3,
               font_sets = False, # False o 'bold'
               size_vals_sets = 12,
               alpha_inter = 0.3,
               font_inter = False, # False o 'bold'
               size_vals_inter = 12,
               size_label = 12,
               font_label = False): # False o 'bold'
    v = venn3_unweighted(subsets = (set1, set2, set3), set_labels = (lab_set1, lab_set2, lab_set3))
    c = venn3_circles(subsets = (1, 1, 1, 1, 1, 1, 1),
                      linestyle='--', linewidth = linewidth, color = color_line)
    partes = ['100', '010', '110', '001', '101', '011', '111']
    partes2 = ['100', '010', '110', '001', '101', '011']
    venn_info = [[i, j] for i, j in zip(v.subset_labels, partes)]
    for i in venn_info:
        if i[0] != None:
            if i[1] in partes2:
                v.get_patch_by_id(i[1]).set_alpha(alpha_sets) # i[1] = el conjunto creado,  0 = alpha del conjunto
                v.get_label_by_id(i[1]).set_fontweight(font_sets)
                v.get_label_by_id(i[1]).set_fontsize(size_vals_sets)
            if i[1] == '111': # configurar la intersección independientemente '111'
                v.get_patch_by_id('111').set_alpha(alpha_inter) # i[1] = el conjunto creado,  0 = alpha del conjunto
                v.get_label_by_id('111').set_fontweight(font_inter)
                v.get_label_by_id('111').set_fontsize(size_vals_inter)    
    for text in v.set_labels:
        text.set_fontsize(size_label)
        text.set_fontweight(font_label)
Пример #7
0
def plot_overlap():
	fetal=find_fetal()
	adult=find_adult()
	nb=load_nonbrain_pred_genes()
	syndromic=find_sfari_syndromic_genes()
	db=find_db_genes()

	pred_val=list(set(fetal+adult)&set(nb))

	f = plt.figure()

	v=venn3_unweighted([set(pred_val),set(syndromic), set(db)], set_labels=('Proteomics Validated \n EnSig',  'Syndromic Autism', 'Synapse Databases'), set_colors=('skyblue', 'coral', 'gray'),alpha=0.7)
	#venn3_circles([set(pred_val),set(syndromic), set(db)], linestyle='solid', linewidth=0.5, color='k');
	for text in v.set_labels:
		#print (text)
		text.set_fontweight('bold')
	for text in v.set_labels:
	    text.set_fontsize(30)
	for text in v.subset_labels:
		print (text)
		text.set_fontsize(30)

	target=v.subset_labels[2]
	target.set_fontweight('bold')
	target.set_fontsize(35)
	v.get_patch_by_id('110').set_color('red')
	plt.show()
	f.savefig("ensig_synautism_syndb.pdf", bbox_inches='tight')
	plt.close()
Пример #8
0
def plot_venn_diagrams(venn, doc):
    plt.figure()
    c = venn3_unweighted([venn["oneie"], venn["tear-tbd"], venn["tear-matres"]], tuple(
        ["OneIE Events", "TEAR-TBD Events", "TEAR-MATRES Events"]), alpha=0.5)
    plt.savefig(f"../analysis/figures/{doc}_venn")

    return
Пример #9
0
def main():

    args = parse_args()
    first_model_path = args.first_model_path
    second_model_path = args.second_model_path
    third_model_path = args.third_model_path
    peaks_path = args.input_peaks
    tag = args.tag
    out_dir = args.out_dir
    fname = args.fname
    sname = args.sname
    tname = args.tname

    id_to_name = {
        '001': 'INMODE',
        '010': 'BAMM',
        '011': 'BAMMxINMODE',
        '100': 'PWM',
        '101': 'PWMxINMODE',
        '110': 'PWMxBAMM',
        '111': 'PWMxBAMMxINMODE'
    }

    if not os.path.isdir(out_dir):
        os.mkdir(out_dir)

    peaks = read_bed(peaks_path)

    first_model_sites = read_bed(first_model_path)
    first_names = set(get_indexes(peaks, first_model_sites))

    second_model_sites = read_bed(second_model_path)
    second_names = set(get_indexes(peaks, second_model_sites))

    third_model_sites = read_bed(third_model_path)
    third_names = set(get_indexes(peaks, third_model_sites))

    petal_labels = creat_petal(first_names, second_names, third_names)

    ########################
    # WRITE RESULTS TO TSV #
    data = dict()
    for k in petal_labels.keys():
        data[id_to_name[k]] = petal_labels[k]
    write_table(out_dir + '/' + tag + '_COUNT.tsv', data)
    ########################

    #############
    # DRAW VENN #

    for k in petal_labels.keys():
        petal_labels[k] = '{:.2f}%'.format(
            (int(petal_labels[k]) / len(peaks) * 100))

    ax = venn3_unweighted(petal_labels,
                          set_labels=(fname, sname, tname),
                          set_colors=generate_colors(n_colors=3))
    plt.savefig(out_dir + '/' + tag + '_VENN.pdf', dpi=150)
Пример #10
0
def _venn3(filea, fileb, filec):
    set1 = readdata(filea + '.txt')
    set2 = readdata(fileb + '.txt')
    set3 = readdata(filec + '.txt')
    fig = plt.figure(figsize=(cm2inch(17, 17)), dpi=600)
    v = venn3_unweighted([set1, set2, set3], set_labels=(filea, fileb, filec))
    #v.get_patch_by_id('100').set_color('red')
    #v.get_patch_by_id('100').set_color('blue')
    fig.savefig(filea + ' n ' + fileb + ' n ' + filec + '.png', dpi=600)
    png2tiff(filea + ' n ' + fileb + ' n ' + filec + '.png')
    os.remove(filea + ' n ' + fileb + ' n ' + filec + '.png')
Пример #11
0
def draw_venn(df):
    #number of genes common to all three isolate
    all_ = df[df['No. isolates'] == 3][lists]
    all_.dropna(inplace=True)
    common_to_all = all_.shape[0]

    #values common to all three isolates
    subset['ABC'] = common_to_all

    data = []

    for member in group.keys():
        isolate = group[member]
        unique = df[(df['No. isolates'] == 1) & (df[isolate].notnull())]
        data.append(unique.shape[0])
        subset[member] = unique.shape[0]

    subgroups = ['AB', 'AC', 'BC']

    for subgroup in subgroups:
        member1 = group[subgroup[0]]
        member2 = group[subgroup[1]]
        common = df[(df['No. isolates'] == 2)][[member1, member2]]
        common.dropna(inplace=True)

        group[subgroup] = member1 + '-' + member2
        subset[subgroup] = common.shape[0]

    #Prepare values for venn diagra. It should be in the order= A','B','AB','C','AC','BC','ABC'
    venn_values = []

    #print(group.keys())

    for key in ['A', 'B', 'AB', 'C', 'AC', 'BC', 'ABC']:
        venn_values.append(subset[key])

    venn3_unweighted(subsets=venn_values,
                     set_labels=(group['A'], group['B'], group['C']),
                     alpha=0.7)
    plt.title('Gene Count')
    plt.savefig('gene_count_summary.jpg')
Пример #12
0
def motifplot(G, proatom_order, lig_name, MotifFolder):
    '''this function is to make a picture combined bar plot with pie chart characterizing the motif biochemical properties,
       and make a venn plot to show overlaps of protein pockets containing different motifs for one ligand
       o input: (1) G: the number of function groups of the ligand , type: int
                (2) proatom_order: a dict of different binding motifs for a ligand
                (3) lig_name: the ligand name
                (4) MotifFolder: the location of a folder to place plots that display motif features
       o output: save the pictures into motifFolder folder.
                 for each of 233 ligands:
                    return 2 pictures, one is a picture comprising bar plot and pie chart,and the other one is a venn plot.
                 note: if the number of function groups for one ligand is more than 3, there will be only one picture.
    '''
    fig, axes = plt.subplots(G, 2, figsize=(8, 3 * G), dpi=300)
    fig1, axes1 = plt.subplots(1, 1)
    atomorder = sorted(proatom_order.items())
    cluster_set = []
    labels = []
    # plot bar and pie chart displaying motif amino acids distribution and atom properties distribution respectively
    for cluster in atomorder:
        A, aminoAcids, props = GetMotif(cluster[1])
        PlotBarChart(axes[atomorder.index(cluster), 0], aminoAcids)
        PlotPieChart(axes[atomorder.index(cluster), 1], props)
        cluster_set.append(set(A))
        labels.append(cluster[0])
    fig.savefig(os.path.join(MotifFolder, "%s_analysis.png" % lig_name),
                dpi=300,
                bbox_inches="tight")

    # make a venn plot to show overlaps of protein pockets containing 2 motifs for one ligand
    if G == 2:
        venn2_unweighted(cluster_set, labels, ax=axes1)
        fig1.savefig(os.path.join(MotifFolder, "%s_provenn.png" % lig_name),
                     dpi=300)

    # make a venn plot to show overlaps of protein pockets containing 3 motifs for one ligand
    elif G == 3:
        venn3_unweighted(cluster_set, labels, ax=axes1)
        fig1.savefig(os.path.join(MotifFolder, "%s_provenn.png" % lig_name),
                     dpi=300)
Пример #13
0
def venn_of_df(df, labels):
    subsets = []
    for col in df.columns:
        s = set(df.index[df[col] > 0].tolist())
        subsets.append(s)

    # labels = [label.replace('_', ' ').capitalize() for label in df.columns]

    v = venn3_unweighted(subsets, set_labels=labels)
    areas = (1, 1, 1, 1, 1, 1, 1)
    centers, radii = _venn3.solve_venn3_circles(areas)
    ax = plt.gca()
    _common.prepare_venn_axes(ax, centers, radii)
Пример #14
0
def test_pr_28():
    import matplotlib_venn as mv
    v = mv.venn3((1, 2, 3, 4, 5, 6, 7), subset_label_formatter = None)
    assert v.get_label_by_id('010').get_text() == '2'
    v = mv.venn3((1, 2, 3, 4, 5, 6, 7), subset_label_formatter = lambda x: 'Value: %+0.3f' % (x / 100.0))
    assert v.get_label_by_id('010').get_text() == 'Value: +0.020'
    v = mv.venn2((1, 2, 3), subset_label_formatter = None)
    assert v.get_label_by_id('01').get_text() == '2'
    v = mv.venn2((1, 2, 3), subset_label_formatter = lambda x: 'Value: %+0.3f' % (x / 100.0))
    assert v.get_label_by_id('01').get_text() == 'Value: +0.020'
    
    v = mv.venn3_unweighted((1, 2, 3, 4, 5, 6, 7), subset_label_formatter = lambda x: 'Value: %+0.3f' % (x / 100.0))
    assert v.get_label_by_id('010').get_text() == 'Value: +0.020'
    v = mv.venn2_unweighted((1, 2, 3), subset_label_formatter = lambda x: 'Value: %+0.3f' % (x / 100.0))
    assert v.get_label_by_id('01').get_text() == 'Value: +0.020'
Пример #15
0
def venn_diagram(all_barcode_number_dict):
    fig, axes = plt.subplots(2, 3, sharex=True, sharey=True, figsize=[12, 8])
    plt.suptitle("Distribution of Lineages")
    for iy in range(2):
        for ix in range(3):
            i = 3 * iy + ix
            ax = axes[iy, ix]
            ax.axis('off')
            if i == 5:
                break
            ax.set_title('Day {}'.format(6 * i))
            timepoint_list = all_barcode_number_dict['Day {}'.format(6 * i)]
            v = venn3_unweighted(subsets=timepoint_list,
                                 set_labels=('S2', 'S1', 'S3'),
                                 set_colors=('#70A1D7', '#F47C7C', '#A1DE93'),
                                 ax=ax)
    plt.savefig('VennDiagram_EachTimepoint.svg', format='svg', dpi=720)
    plt.show()
Пример #16
0
    def venn(self, amplicons, primers, mismatch):

        fig, ax = plt.subplots()
        
        n = len(primers)
        
        if  n > 2:
            primers = primers[:3]
            venn_data = venn3_unweighted(map(lambda k: set(amplicons.get(k)), primers), set_labels = primers)
        elif n ==  2:
            venn_data = venn2_unweighted(map(lambda k: set(amplicons.get(k)), primers), set_labels = primers)
        else:
            return

        [ text.set_fontsize(16) for text in venn_data.set_labels ]
        fig_fname = '.'.join(['venn_mismatch{}'.format(mismatch),'pdf'])
        plt.tight_layout()
        plt.savefig(fig_fname, dpi=1000)
        plt.close()
        plt.clf()
Пример #17
0
def test_pr_28():
    import matplotlib_venn as mv
    v = mv.venn3((1, 2, 3, 4, 5, 6, 7), subset_label_formatter=None)
    assert v.get_label_by_id('010').get_text() == '2'
    v = mv.venn3((1, 2, 3, 4, 5, 6, 7),
                 subset_label_formatter=lambda x: 'Value: %+0.3f' %
                 (x / 100.0))
    assert v.get_label_by_id('010').get_text() == 'Value: +0.020'
    v = mv.venn2((1, 2, 3), subset_label_formatter=None)
    assert v.get_label_by_id('01').get_text() == '2'
    v = mv.venn2((1, 2, 3),
                 subset_label_formatter=lambda x: 'Value: %+0.3f' %
                 (x / 100.0))
    assert v.get_label_by_id('01').get_text() == 'Value: +0.020'

    v = mv.venn3_unweighted((1, 2, 3, 4, 5, 6, 7),
                            subset_label_formatter=lambda x: 'Value: %+0.3f' %
                            (x / 100.0))
    assert v.get_label_by_id('010').get_text() == 'Value: +0.020'
    v = mv.venn2_unweighted((1, 2, 3),
                            subset_label_formatter=lambda x: 'Value: %+0.3f' %
                            (x / 100.0))
    assert v.get_label_by_id('01').get_text() == 'Value: +0.020'
Пример #18
0
def draw_venn(infile, outfile, weighted=False) :

	with open(infile, "r") as f:
		sets = pickle.load(f)

	if weighted:
		venn3_circles(sets)
		venn = venn3(sets, set_labels=('Aesthetics', 'Semantics', 'Aes+Sem'))
		for l in venn.subset_labels : 
			l.set_fontsize(14)

	else :
		venn = venn3_unweighted(sets, set_labels=('Aesthetics', 'Semantics', 'Aes+Sem'))
		for l in venn.subset_labels : 
			l.set_fontsize(14)

		ax = pp.gca()
		for (c, r) in zip(venn.centers, venn.radii):
				circle = Circle(c, r, lw=2, alpha=1, facecolor='none')
				ax.add_patch(circle)

	pp.savefig(outfile)
	pp.show()
Пример #19
0
            pairs.append(tuple(l.split()))

    #get all locus names, loci informative for each cross
    informative, loci = read_genotypes(sys.argv[1], pairs)

    #get the loci captured at each k, the number of added loci,
    #and the pairs decending order of how many more loci they add
    shared_loci, added_loci, best_pairs = optimize_crosses(6, pairs, 
                                                           informative)

    #labels for pairs
    best_pairs_labels=["{0} x {1}".format(*x) for x in best_pairs]

    #plot results
    fig = plt.figure(figsize = (8,8))
    ax1 = plt.subplot2grid((3,3), (0,0), colspan = 3, rowspan = 2)
    ax1.set_title('Shared Loci in Top Three Pairs', fontsize = 26)
    #venn diagram on top
    venn3_unweighted([informative[x] for x in best_pairs[:3]], 
                     set_labels = best_pairs_labels[:3], ax=ax1)
    #bar chart on bottom
    ax2 = plt.subplot2grid((3,3), (2,0), colspan=3)
    ax2.bar(range(len(added_loci[:8])), added_loci[:8])
    ax2.set_ylabel('Number of Loci')
    ax2.set_title('Added Informative Loci Per Pair', fontsize = 26)
    ax2.set_xticklabels(best_pairs_labels[:8], rotation = 17 )
    #add a bit of extra space
    fig.subplots_adjust(hspace=.44)
    fig.savefig('summmary.pdf')
    plt.show()
Пример #20
0
dict1 = D_G_list
dict2 = compareSiteList['F9_UD_vs_F9_D4_TCP']['F9_D4_TCP_diff']
findG = FindOverlap(dict1, dict2, dictNames = ['D', 'C'], seqChr = chrSeq)

G_len = np.sum([len(values) for keys, values in findG['siteList']['Intersection'].items()])

G = findG['siteNum']['Total']['Intersection']
E = E_G_len - G
A = A_E_len - E
F = G_F_len - G
B = B_F_len - F 
D = D_G_len - G
C = C_E_len - E

plt.figure(figsize = (6,6))
out = venn3_unweighted(subsets = (A, B, D, C, E, F, G), set_labels = ('F9_D4_Min_UD', 'F9_D4_PG_Min_UD', 'F9_D4_TCP_Min_UD'))
out.get_patch_by_id('100').set_alpha(1.0)
for text in out.set_labels:
	text.set_fontsize(6)
for text in out.subset_labels:
	text.set_fontsize(6)
plt.savefig("Figures/F9_D4_Min_UD_diffVenn.pdf", bbox_inches = 'tight')

#### Save sites into bed files ####

def dict2df(dicty, seqChr, filename):
	df = pd.DataFrame(columns = ['Chr', 'Start', 'End'])
	for ii in seqChr:
		cc = np.repeat("chr" + str(ii), len(dicty[ii]))
		tmparr = list(dicty[str(ii)])
		tmparr = sorted(tmparr)
Пример #21
0
import matplotlib.pyplot as plt
from matplotlib_venn import venn3_unweighted

from omicsplot.omics_excel import OmicsExcel
from omicsplot.set_tools import venn_set

in_file1 = r'C:\Users\Chen\IdeaProjects\semi\data\kidney_rerank.xlsx'
in_file2 = r"C:\Users\Chen\IdeaProjects\semi\data\kidney_unspecific_test.xlsx"
in_file3 = r"C:\Users\Chen\IdeaProjects\semi\data\kidney_sequest.xlsx"

excel1 = OmicsExcel(in_file1)
excel2 = OmicsExcel(in_file2)
excel3 = OmicsExcel(in_file3)

venn_region = venn_set(excel1.get_seq_set(), excel2.get_seq_set(),
                       excel3.get_seq_set())
seq_v = venn3_unweighted(subsets=venn_region,
                         set_labels=("Specific", "Unspecific", "Sequest"))
plt.title("Sequence Overlap")
plt.show()

# venn_delta_region = venn_set(excel1.get_pep_delta_set(), excel2.get_pep_delta_set(), excel3.get_pep_delta_set())
# seq_delta_v = venn3_unweighted(subsets=venn_delta_region, set_labels=("Specific", "Unspecific", "Sequest"))
#
# plt.title("Glycopeptide Overlap")
# plt.show()
Пример #22
0
t_contrast_2 = []

with open(contrast_2) as inp:
    for line in inp:
        t_contrast_2.append(line.strip())

t_contrast_3 = []

with open(contrast_3) as inp:
    for line in inp:
        t_contrast_3.append(line.strip())

# perform sanity check

assert len(t_contrast_2) == len(list(set(t_contrast_2)))
assert len(t_contrast_1) == len(list(set(t_contrast_1)))
assert len(t_contrast_3) == len(list(set(t_contrast_3)))

# if sanity checks pass convert lists into sets (if they fail there are duplicate transcripts in one of the files which is not expected)

t_contrast_2 = set(t_contrast_2)
t_contrast_1 = set(t_contrast_1)
t_contrast_3 = set(t_contrast_3)

# Plot

venn3_unweighted(subsets=(t_contrast_1, t_contrast_2, t_contrast_3),
                 set_labels=labels,
                 alpha=0.55)
plt.savefig(output, transparent=True, format='pdf')
Пример #23
0
for s, (f, t, c) in zip(sets, set_data):
    print("%i in %s" % (len(s), c))

names = [one_name for one_file, one_type, one_name in set_data]
lengths = [len(one_set) for one_set in sets]

if len(sets) == 3:
    try:
        from matplotlib_venn import venn3_unweighted
    except ImportError:
        sys.exit("Requires the Python library matplotlib_venn")
    venn3_unweighted(
        sets,
        [
            "{} (Total {})".format(name, length)
            for (name, length) in zip(names, lengths)
        ],
    )

if len(sets) == 2:
    try:
        from matplotlib_venn import venn2_unweighted
    except ImportError:
        sys.exit("Requires the Python library matplotlib_venn")
    venn2_unweighted(
        sets,
        [
            "{} (Total {})".format(name, length)
            for (name, length) in zip(names, lengths)
        ],
def venn_analysis_diagram(key_id, col_from_patients, col_from_procedures,
                          col_from_medications):
    """
    The utility function venn_analysis_diagram was created to obtain a better understanding
    of the missing records that we have encountered with the data set provided. For instance,
    the raw data provided patient and provider IDs in the Medications and Procedures datasets
    that does not exist in the patients table.
    
    Cautions: you need to understand that the context is associated to the analysis we
    are creating for the Fairfax County Fire and Rescue Department.
    
    Properties:
    -----------
        key_id : string (mandatory)
            The string will help document the ID/Column we used to perform this particular
            analysis. (i.e., PatientId, 
                             FRDResponelID (a.k.a., ProviderId), 
                             Composite Index (Concatenation of PatientId with FRDPersonnelID))
                             
        col_from_patients: Pandas Series (mandatory)
            The Series object needs to cointain a single feature. It should either be the
            PatientId, FRDPersonelID, or a concatenate version of these two columns.
            
            Caution: In order to maintain the proper context for this analysis this pandas
                     series or data frame should come from the *Patients* dataset. Otherwise
                     the Observations made may not make sense with the results obtained
        
        col_from_procedures: Pandas Series (mandatory)
            The Series object needs to cointain a single feature. It should either be the
            PatientId, FRDPersonelID, or a concatenate version of these two columns.
            
            Caution: In order to maintain the proper context for this analysis this pandas
                     series or data frame should come from the *Procedures* dataset. Otherwise
                     the Observations made may not make sense with the results obtained
        
        col_from_medications: Pandas Series (mandatory)
            The Series object needs to cointain a single feature. It should either be the
            PatientId, FRDPersonelID, or a concatenate version of these two columns.
            
            Caution: In order to maintain the proper context for this analysis this pandas
                     series or data frame should come from the *Medications* dataset. Otherwise
                     the Observations made may not make sense with the results obtained
        
    Return
    ------
        A Venn Diagram plot.
        
        Caution: Attributes provided in the wrong order will cause the observations to 
        loose the context for which they were made.
    """

    ### Assert Input ###
    exp_out = "<class 'pandas.core.series.Series'>"
    s1_in = str(type(col_from_patients))
    s2_in = str(type(col_from_procedures))
    s3_in = str(type(col_from_medications))

    assert (s1_in == exp_out and s2_in == exp_out
            and s3_in == exp_out), '''One 
        of the column inputs provided is not a Series, please read the function 
        information available at /src/d06_reporting folder'''

    assert (str(
        type(key_id)) == "<class 'str'>"), '''The key_id provided is not
        a string, please read the function information available at 
        /src/d06_reporting folder'''

    vd3 = venn3_unweighted([
        set(col_from_patients),
        set(col_from_procedures),
        set(col_from_medications)
    ],
                           set_labels=('Patients', 'Procedures',
                                       'Medications'),
                           set_colors=('#d7191c', '#abdda4', '#2b83ba'),
                           alpha=0.8)

    for text in vd3.set_labels:  # Change Label Size
        text.set_fontsize(16)
    for text in vd3.subset_labels:  # Change number size
        text.set_fontsize(12)

    if key_id == 'PatientId':
        key_id = 'Patients'

    if key_id == 'FRDPersonnelID':
        key_id = 'Providers'

    if key_id == 'comp_idx':
        key_id = 'Compound IDs: Patient & Provider'

    plt.title('Venn Diagram for {} Across All Datasets'.format(key_id),
              fontname='Times New Roman',
              fontsize=20,
              pad=30,
              backgroundcolor='#f1a340',
              color='black')

    return plt.show()
Пример #25
0
phylumQuarSet = allTaxaSets['P'][2]
phylumSets = [phylumFullSet, phylumHalfSet, phylumQuarSet]

genusFullSet = allTaxaSets['G'][0]
genusHalfSet = allTaxaSets['G'][1]
genusQuarSet = allTaxaSets['G'][2]
genusSets = [genusFullSet, genusHalfSet, genusQuarSet]

speciesFullSet = allTaxaSets['S'][0]
speciesHalfSet = allTaxaSets['S'][1]
speciesQuarSet = allTaxaSets['S'][2]
speciesSets = [speciesFullSet, speciesHalfSet, speciesQuarSet]

# Add option to plot friendlier Venn diagrams for color-blind audiences

v3Phylum = venn3_unweighted(phylumSets, ('D1', 'D0.5', 'D0.25'),
                            set_colors=('y', 'b', 'r'))
for text in v3Phylum.set_labels:
    text.set_fontsize(16)
for text in v3Phylum.subset_labels:
    text.set_fontsize(18)
plt.title('Phylum', fontsize=20)
plt.savefig('krakenPhylumVennCB.png')
plt.clf()
plt.cla()

v3Order = venn3_unweighted(orderSets, ('D1', 'D0.5', 'D0.25'),
                           set_colors=('y', 'b', 'r'))
for text in v3Order.set_labels:
    text.set_fontsize(16)
for text in v3Order.subset_labels:
    text.set_fontsize(18)
Пример #26
0
                A +=1
    else:
        if mean_d > 0:
            if mean_n > 0:
                DN +=1
            else:
                D +=1
        else:
            if mean_n > 0:
                N +=1
    
    '''
    if lnum<3:
        print(lnum)
        print(ln_ls)
        print(mean_a, mean_d, mean_n)
    '''


total = [A,D,AD, N, AN, DN, ADN]
print('A: '+str(A) + ';\nD: '+str(D)+';\nAD: '+str(AD) + ';\nN: '+str(N)+';\nAN: '+str(AN)+';\nDN: '+str(DN)+';\nADN: '+str(ADN))
OUTFILE.write('A: '+str(A)+'\n'+'D: '+str(D)+'\nN: '+str(N)+'\nAD: '+str(AD)+'\nAN: '+str(AN)+'\nDN: '+str(DN)+'\nADN: '+str(ADN)+'\n')

print('Sum: '+str(sum(total)))
print('Over!!!')
plt.figure(figsize = (15, 15))
v = venn3_unweighted(subsets = ( A, D, AD, N, AN, DN, ADN), set_labels = ('A', 'D', 'N'))

plt.savefig('venn_gene.jpg')

Пример #27
0
D_cutoff_id = namingscheme_parts[1]
D_plant_id = namingscheme_parts[2]

if D_plant_id == 'AC': 
    plant_type = 'WT'
elif D_plant_id =='dgt': 
    plant_type = 'dgt'

TPM_value = D_cutoff_id.split('_')[-1]

D_expr = set(readExpressedGenes(D_filepath))
E_expr = set(readExpressedGenes(E_filepath))
M_expr = set(readExpressedGenes(M_filepath))

#Generate venn diagram
venn = venn3_unweighted([D_expr, E_expr, M_expr], set_labels=("Differentiation", "Elongation", "Meristem"))
plt.title(f"Expressed Genes TPM > {TPM_value} ({plant_type})")
plt.savefig(os.path.join(venn_output_location, venn_output_filename))

#Create gene lists for each section of venn diagram
intersections_dict = {'DEM': D_expr.intersection(E_expr).intersection(M_expr), 'DE_only': D_expr.intersection(E_expr).difference(M_expr),\
'EM_only': E_expr.intersection(M_expr).difference(D_expr), 'DM_only': D_expr.intersection(M_expr).difference(E_expr),\
'D_only': D_expr.difference(E_expr).difference(M_expr), 'E_only': E_expr.difference(D_expr).difference(M_expr),\
'M_only': M_expr.difference(D_expr).difference(E_expr)}

for inter_key in intersections_dict: 
    filename = inter_key + '_' + D_plant_id + '_' + D_cutoff_id + '.txt' 
    filepath = os.path.join(genelist_output_location, filename)
    IF = open(filepath, 'w')
    for member in intersections_dict[inter_key]: 
        IF.write(member + '\n')
Пример #28
0
for condition_nb in set_to_title.values():
	if condition_nb in venn_diagram:
		sub.append(venn_diagram[condition_nb])
	else:
		sub.append(0)

# print tuple(sub)
# avec les 3 fichiers test, resultat attendu 
#Only in test1	Only in test2	Common test1 test2	Only in test3	Common test1 test3	Common test2 test3	Common test1 test2 test3
#(8, 6, 1, 7, 0, 2, 0)

plt.figure(figsize=(14,10)) # first number : width , second number : height
if len(liste_sets) == 2:
	v = venn2_unweighted(subsets = tuple(sub), set_labels = (liste_sets[0], liste_sets[1]))
elif len(liste_sets) == 3:
	v = venn3_unweighted(subsets = tuple(sub), set_labels = (liste_sets[0], liste_sets[1], liste_sets[2]))
	for text in v.set_labels: # file name size
		text.set_fontsize(12)
	for text in v.subset_labels: # numbers inside circles size
   		 text.set_fontsize(16)

if args.venn_title is not None :
	plt.title(args.venn_title)

# display title
#liste_sets_string = ', '.join(liste_sets)
#print liste_sets_string
#plt.title(liste_sets_string)

try :
	mpl.pyplot.savefig(args.img_output_file)
Пример #29
0
#############################  VENN DIAGRAM  ###############################
############################################################################

 # reading the pickle tree
infile = open('data/Data519_original20_Results_mix100_ALL.pickle','rb')
myanalysistestsift = pickle.load(infile)
infile.close()
from matplotlib_venn import venn3, venn3_circles, venn3_unweighted
from matplotlib import pyplot as plt

#################################################
#Missing candidates using accuracy less than 100%
x = list(myanalysistestsift[myanalysistestsift['acc_hsv']<100]['file'])
y = list(myanalysistestsift[myanalysistestsift['acc_rgb']<100]['file'])
z = list(myanalysistestsift[myanalysistestsift['acc_sift_BF']<100]['file'])
venn3_unweighted([set(x), set(y), set(z)], set_labels = ('hsv', 'rgb', 'sift'))
plt.title('candidates_missed_Accuracy_20 < 100')

###########################################################
#### captured candidates using accuracy greater than 66%
x = list(myanalysistestsift[myanalysistestsift['acc_hsv']==100]['file'])
y = list(myanalysistestsift[myanalysistestsift['acc_rgb']==100]['file'])
z = list(myanalysistestsift[myanalysistestsift['acc_sift_BF']==100]['file'])
venn3_unweighted([set(x), set(y), set(z)], set_labels = ('hsv', 'rgb', 'sift'))
plt.title('candidates_captured_Accuracy_20 = 100')

##########################################################
#Missing candidates using count less than 4
x = list(myanalysistestsift[myanalysistestsift['Count_hsv']<4]['file'])
y = list(myanalysistestsift[myanalysistestsift['Count_rgb']<4]['file'])
z = list(myanalysistestsift[myanalysistestsift['Count_sift_BF']<4]['file'])
Пример #30
0
#    if(v.get_patch_by_id('101')):
#        v.get_patch_by_id('101').set_color('#ff00ff')
#    if(v.get_patch_by_id('111')):
#        v.get_patch_by_id('111').set_color('#ffffff')

    path = './images/'
    if not os.path.exists(path):
        os.makedirs(path)
        
    path1 = path + x + y + z
    #c = venn3_circles(s, linestyle='solid')
    plt.savefig(path1 + '_weighted')
    plt.close() 
     
    print('Generating unweighted Venn diagram for:', x, y, z)
    v = venn3_unweighted(s, set_labels=(x,y,z), alpha=0.7)
    path2 = path1 + '_unweighted'
    plt.savefig(path2)
    plt.close()
     
## create venn sets filled with words  
## moved to vennwords.py
 #   print('Generating Venn diagram with words for:', x, y, z)    
 #   v = venn3_wordcloud(s, set_labels=(x,y,z), alpha=0.7, wordcloud_kwargs={'max_words':5,'min_font_size':5})
 #   path3 = path1 + '_words'
 #   plt.savefig(path3)
 #   plt.close()



 ## for wordclouds I want also to count the frequencies so I use a list instead of a set   
Пример #31
0
def main():

    args = parse_args()
    first_model_path = args.first_model_path
    second_model_path = args.second_model_path
    third_model_path = args.third_model_path
    peaks_path = args.input_peaks
    tag = args.tag
    out_dir = args.out_dir
    fname = args.fname
    sname = args.sname
    tname = args.tname

    if not os.path.isdir(out_dir):
        os.mkdir(out_dir)

    data = read_peaks(peaks_path)
    names = [int(i.split('_')[1]) for i in data['name']]
    data['name'] = names

    first_model_sites = read_bed_like_file(first_model_path)
    first_model_sites['type'] = 'first_model'
    names = [int(i.split('_')[1]) for i in first_model_sites['name']]
    first_model_sites['name'] = names

    second_model_sites = read_bed_like_file(second_model_path)
    second_model_sites['type'] = 'second_model'
    names = [int(i.split('_')[1]) for i in second_model_sites['name']]
    second_model_sites['name'] = names

    third_model_sites = read_bed_like_file(third_model_path)
    third_model_sites['type'] = 'third_model'
    names = [int(i.split('_')[1]) for i in third_model_sites['name']]
    third_model_sites['name'] = names

    classification = []
    for index, peak in data.iterrows():
        classification.append(
            peak_classification(peak, first_model_sites, second_model_sites,
                                third_model_sites))

    #############################################
    #Make table with count of diff kind of peaks#
    #############################################
    count = []
    #top = [i * 1000 for i in range(1, len(data) // 1000 + 1)]
    top = [i * 100 for i in range(1, len(data) // 100 + 1)]

    for i in range(len(top)):
        subset_classification = classification[i * 100:(i + 1) * 100]
        count_first_model_sites = sum(
            ['first_model' == i for i in subset_classification])
        count_second_model_sites = sum(
            ['second_model' == i for i in subset_classification])
        count_third_model_sites = sum(
            ['third_model' == i for i in subset_classification])
        count_no_sites = sum(['no_sites' == i for i in subset_classification])
        overlap_first_second_models = sum([
            'overlap_first_second_models' == i for i in subset_classification
        ])
        overlap_first_third_models = sum(
            ['overlap_first_third_models' == i for i in subset_classification])
        overlap_second_third_models = sum([
            'overlap_second_third_models' == i for i in subset_classification
        ])
        overlap_all_models = sum(
            ['overlap_all_models' == i for i in subset_classification])
        not_overlap = sum(['not_overlap' == i for i in subset_classification])

        count.append({
            'no_sites': count_no_sites,
            'not_overlap': not_overlap,
            'first_model_sites': count_first_model_sites,
            'second_model_sites': count_second_model_sites,
            'third_model_sites': count_third_model_sites,
            'overlap_first_second_models': overlap_first_second_models,
            'overlap_first_third_models': overlap_first_third_models,
            'overlap_second_third_models': overlap_second_third_models,
            'overlap_all_models': overlap_all_models
        })

    count_ = pd.DataFrame(count)
    count = pd.DataFrame()
    count = count.append(count_.iloc[0])
    for i in range(1, len(count_)):
        count = count.append(count.iloc[i - 1] + count_.iloc[i],
                             ignore_index=True)
    count['peaks'] = top
    count = count[[
        'first_model_sites', 'second_model_sites',
        'overlap_first_second_models', 'third_model_sites',
        'overlap_first_third_models', 'overlap_second_third_models',
        'overlap_all_models', 'no_sites', 'not_overlap', 'peaks'
    ]]
    count.to_csv(out_dir + '/' + tag + '_COUNT.tsv', sep='\t', index=False)

    frequency = pd.DataFrame(count).copy()
    for column in frequency:
        if column == 'peaks':
            continue
        frequency[column] = frequency[column] / frequency['peaks']
    frequency.to_csv(out_dir + '/' + tag + '_FREQUENCY.tsv',
                     sep='\t',
                     index=False)

    #venn3_unweighted
    venn3_unweighted(subsets=np.around(np.array(frequency.iloc[-1, :7]), 2),
                     set_labels=(fname, sname, tname))
    plt.savefig(out_dir + '/' + tag + '_PIC.pdf', dpi=150)

    ##################################
    only_first_model_sites = first_model_sites.loc[
        first_model_sites['name'].searchsorted(
            np.array([
                index for index, i in enumerate(classification)
                if i == 'first_model'
            ]))]
    only_second_model_sites = second_model_sites.loc[
        second_model_sites['name'].searchsorted(
            np.array([
                index for index, i in enumerate(classification)
                if i == 'second_model'
            ]))]
    only_third_model_sites = third_model_sites.loc[
        third_model_sites['name'].searchsorted(
            np.array([
                index for index, i in enumerate(classification)
                if i == 'third_model'
            ]))]

    only_second_and_third_model_sites_3 = third_model_sites.loc[
        third_model_sites['name'].searchsorted(
            np.array([
                index for index, i in enumerate(classification)
                if i == 'overlap_second_third_models'
            ]))]
    only_second_and_third_model_sites_2 = second_model_sites.loc[
        second_model_sites['name'].searchsorted(
            np.array([
                index for index, i in enumerate(classification)
                if i == 'overlap_second_third_models'
            ]))]
    overlap_model_sites = third_model_sites.loc[
        third_model_sites['name'].searchsorted(
            np.array([
                index for index, i in enumerate(classification)
                if i == 'overlap_all_models'
            ]))]

    only_first_model_sites = only_first_model_sites[[
        'chromosome', 'start', 'end', 'name', 'score', 'strand', 'site'
    ]]
    only_first_model_sites.to_csv(out_dir + '/' + tag +
                                  '_only_first_model.sites',
                                  sep='\t',
                                  index=False,
                                  header=False)

    first_model_sites = first_model_sites[[
        'chromosome', 'start', 'end', 'name', 'score', 'strand', 'site'
    ]]
    first_model_sites.to_csv(out_dir + '/' + tag + '_all_first_model.sites',
                             sep='\t',
                             index=False,
                             header=False)

    only_second_model_sites = only_second_model_sites[[
        'chromosome', 'start', 'end', 'name', 'score', 'strand', 'site'
    ]]
    only_second_model_sites.to_csv(out_dir + '/' + tag +
                                   '_only_second_model.sites',
                                   sep='\t',
                                   index=False,
                                   header=False)

    second_model_sites = second_model_sites[[
        'chromosome', 'start', 'end', 'name', 'score', 'strand', 'site'
    ]]
    second_model_sites.to_csv(out_dir + '/' + tag + '_all_second_model.sites',
                              sep='\t',
                              index=False,
                              header=False)

    only_third_model_sites = only_third_model_sites[[
        'chromosome', 'start', 'end', 'name', 'score', 'strand', 'site'
    ]]
    only_third_model_sites.to_csv(out_dir + '/' + tag +
                                  '_only_third_model.sites',
                                  sep='\t',
                                  index=False,
                                  header=False)

    third_model_sites = third_model_sites[[
        'chromosome', 'start', 'end', 'name', 'score', 'strand', 'site'
    ]]
    third_model_sites.to_csv(out_dir + '/' + tag + '_all_third_model.sites',
                             sep='\t',
                             index=False,
                             header=False)

    overlap_model_sites = overlap_model_sites[[
        'chromosome', 'start', 'end', 'name', 'score', 'strand', 'site'
    ]]
    overlap_model_sites.to_csv(out_dir + '/' + tag +
                               '_overlap_model_sites.sites',
                               sep='\t',
                               index=False,
                               header=False)

    only_second_and_third_model_sites_3 = only_second_and_third_model_sites_3[[
        'chromosome', 'start', 'end', 'name', 'score', 'strand', 'site'
    ]]
    only_second_and_third_model_sites_3.to_csv(
        out_dir + '/' + tag + '_overlap_second_and_third_sites_3.sites',
        sep='\t',
        index=False,
        header=False)

    only_second_and_third_model_sites_2 = only_second_and_third_model_sites_2[[
        'chromosome', 'start', 'end', 'name', 'score', 'strand', 'site'
    ]]
    only_second_and_third_model_sites_2.to_csv(
        out_dir + '/' + tag + '_overlap_second_and_third_sites_2.sites',
        sep='\t',
        index=False,
        header=False)
Пример #32
0
                      'SynSig'),
          set_colors=('coral', 'skyblue', 'lightgreen'),
          alpha=0.7)
for text in v.set_labels:
    text.set_fontweight('bold')
for text in v.set_labels:
    text.set_fontsize(25)
for text in v.subset_labels:
    text.set_fontsize(25)

plt.show()
plt.close()

v = venn3_unweighted([set(adult), set(db), set(pred)],
                     set_labels=('Adult Brain \n Synapse Validation',
                                 'Synapse Databases', 'SynSig'),
                     set_colors=('gray', 'lightgray', 'red'),
                     alpha=0.7)
for text in v.set_labels:
    text.set_fontweight('bold')
for text in v.set_labels:
    text.set_fontsize(25)
for text in v.subset_labels:
    text.set_fontsize(25)

plt.show()
plt.close()

v = venn3([set(adult), set(db), set(pred)],
          set_labels=('Adult Brain \n Synapse Validation', 'Synapse Databases',
                      'SynSig'),
Пример #33
0
for condition_nb in set_to_title.values():
	if condition_nb in venn_diagram:
		sub.append(venn_diagram[condition_nb])
	else:
		sub.append(0)

# print tuple(sub)
# avec les 3 fichiers test, resultat attendu 
#Only in test1	Only in test2	Common test1 test2	Only in test3	Common test1 test3	Common test2 test3	Common test1 test2 test3
#(8, 6, 1, 7, 0, 2, 0)

plt.figure(figsize=(14,10)) # first number : width , second number : height
if len(liste_sets) == 2:
	v = venn2_unweighted(subsets = tuple(sub), set_labels = (liste_sets[0], liste_sets[1]))
elif len(liste_sets) == 3:
	v = venn3_unweighted(subsets = tuple(sub), set_labels = (liste_sets[0], liste_sets[1], liste_sets[2]))
	for text in v.set_labels: # file name size
		text.set_fontsize(12)
	for text in v.subset_labels: # numbers inside circles size
   		 text.set_fontsize(16)

if args.venn_title is not None :
	plt.title(args.venn_title)

# display title
#liste_sets_string = ', '.join(liste_sets)
#print liste_sets_string
#plt.title(liste_sets_string)

try :
	mpl.pyplot.savefig(args.img_output_file)
Пример #34
0
def venn_stats(fnames, f_c2n, f_jsnice, f_jsnaughty):
    only_c2n = 0
    only_jsnice = 0
    only_jsnaughty = 0

    c2n_jsnice = 0
    c2n_jsnaughty = 0
    jsnice_jsnaughty = 0

    all_tools = 0
    total = 0

    def update_cnts(corr_map):
        nonlocal only_c2n, only_jsnice, only_jsnaughty, c2n_jsnice, c2n_jsnaughty, jsnice_jsnaughty, all_tools
        if corr_map == (0, 0, 0): return
        elif corr_map == (1, 0, 0): only_c2n += 1
        elif corr_map == (0, 1, 0): only_jsnice += 1
        elif corr_map == (0, 0, 1): only_jsnaughty += 1
        elif corr_map == (1, 1, 0): c2n_jsnice += 1
        elif corr_map == (1, 0, 1): c2n_jsnaughty += 1
        elif corr_map == (0, 1, 1): jsnice_jsnaughty += 1
        elif corr_map == (1, 1, 1): all_tools += 1

    venn_process_line.fname_map = {fname: 1 for fname in fnames}
    venn_process_line.f_c2n_map = {fname: 1 for fname in f_c2n}
    venn_process_line.f_jsnice_map = {fname: 1 for fname in f_jsnice}
    venn_process_line.f_jsnaughty_map = {fname: 1 for fname in f_jsnaughty}

    with open('name_stats.csv', 'r') as f, multiprocessing.Pool() as p:
        for res in tqdm.tqdm(p.imap_unordered(venn_process_line, f)):
            if res:
                total += 1
                b1, b2, b3 = res
                update_cnts((b1, b2, b3))

    nums = [
        round(only_c2n * 100 / total, 2),
        round(only_jsnice * 100 / total, 2),
        round(only_jsnaughty * 100 / total, 2),
        round(c2n_jsnice * 100 / total, 2),
        round(c2n_jsnaughty * 100 / total, 2),
        round(jsnice_jsnaughty * 100 / total, 2),
        round(all_tools * 100 / total, 2)
    ]

    print()
    print("============")
    print("Venn Diagram Stats")
    print("==================")
    print()
    print("Only Context2Name : ", nums[0], '%')
    print("Only JSNice       : ", nums[1], '%')
    print("Only JSNaughty    : ", nums[2], '%')
    print()
    print("Only Context2Name & JSNice    : ", nums[3], '%')
    print("Only Context2Name & JSNaughty : ", nums[4], '%')
    print("Only JSNice & JSNaughty       : ", nums[5], '%')
    print()
    print("All three : ", nums[6], '%')
    print()
    print("============")
    print()

    if args.save_venn is not None:
        nums[2], nums[3] = nums[3], nums[2]
        fig = plt.figure()
        if args.venn_weighted:
            matplotlib_venn.venn3(subsets=nums,
                                  set_labels=('Context2Name', 'JSNice',
                                              'JSNaughty'))
            matplotlib_venn.venn3_circles(subsets=nums,
                                          linestyle='solid',
                                          linewidth=0.3)
        else:
            matplotlib_venn.venn3_unweighted(subsets=nums,
                                             set_labels=('Context2Name',
                                                         'JSNice',
                                                         'JSNaughty'))
            matplotlib_venn.venn3_circles(subsets=nums,
                                          linestyle='solid',
                                          linewidth=0.3)

        fig.savefig(args.save_venn, bbox_inches='tight', dpi=1000)
adult_df = pd.DataFrame({
    'Stage': 'Adult Synapse: Syndromic Autism Genes',
    'Genes': list(set(adult_overlap))
})

final = pd.concat([fetal_df, adult_df], axis=0)
print(final)
final.to_csv('nb_val_new_syndromic.csv')

#print (df)

pred_val = list(set(fetal + adult) & set(nb))

v = venn3_unweighted(
    [set(pred_val), set(syndromic), set(db)],
    set_labels=('Proteomics Validated \n ENSig', 'Syndromic Autism',
                'Synapse Databases'),
    set_colors=('skyblue', 'coral', 'gray'),
    alpha=0.7)
#venn3_circles([set(pred_val),set(syndromic), set(db)], linestyle='solid', linewidth=0.5, color='k');
for text in v.set_labels:
    #print (text)
    text.set_fontweight('bold')
for text in v.set_labels:
    text.set_fontsize(30)
for text in v.subset_labels:
    print(text)
    text.set_fontsize(30)

target = v.subset_labels[2]
target.set_fontweight('bold')
target.set_fontsize(35)
Пример #36
0
for s, (f, t, c) in zip(sets, set_data):
    print("%i in %s" % (len(s), c))

names = [one_name for one_file, one_type, one_name in set_data]
lengths = [len(one_set) for one_set in sets]

if len(sets) == 3:
    try:
        from matplotlib_venn import venn3_unweighted
    except ImportError:
        sys.exit("Requires the Python library matplotlib_venn")
    venn3_unweighted(
        sets,
        [
            "{} (Total {})".format(name, length)
            for (name, length) in zip(names, lengths)
        ],
    )

if len(sets) == 2:
    try:
        from matplotlib_venn import venn2_unweighted
    except ImportError:
        sys.exit("Requires the Python library matplotlib_venn")
    venn2_unweighted(
        sets,
        [
            "{} (Total {})".format(name, length)
            for (name, length) in zip(names, lengths)
        ],
from nltk.corpus.reader.wordnet import WordNetCorpusReader
from matplotlib import pyplot as plt
from matplotlib_venn import venn3_unweighted

wn = WordNetCorpusReader("./resources/WordNet-3.0/dict",None)

adjectives = {a for a in wn.all_synsets('a')}
attributes = {n for n in wn.all_synsets('n') if n.lexname() == 'noun.attribute'}

direct_attributes = {attribute for adjective in adjectives
                               for attribute in adjective.attributes()}
morphologically_related = {related_lemma.synset() for adjective in adjectives
                                                  for lemma in adjective.lemmas()
                                                  for related_lemma in lemma.derivationally_related_forms()
                                                  if related_lemma.synset().pos() == 'n'}

diagram = venn3_unweighted([attributes, direct_attributes, morphologically_related],
                ['labeled as\nnoun.attribute', 'direct\nattributes', 'morphologically\nrelated nouns'])

for patch in diagram.patches:
    patch.set_edgecolor('k')
    patch.set_facecolor('w') # remove this line for color diagram.

plt.savefig('./images/venn.pdf')