示例#1
0
文件: gtf.py 项目: nvictus/CoolBox
 def plot(self, ax, gr: GenomeRange, **kwargs):
     self.ax = ax
     df = self.fetch_plot_data(gr)
     if self.has_prop("row_filter"):
         filters = self.properties["row_filter"]
         for filter_ in filters.split(";"):
             try:
                 op_idx = list(re.finditer("[=><!]", filter_))[0].start()
                 l_ = filter_[:op_idx].strip()
                 r_ = filter_[op_idx:]
                 df = eval(f'df[df["{l_}"]{r_}]')
             except IndexError:
                 log.warning(f"row filter {filter_} is not valid.")
     region_length = gr.end - gr.start
     len_ratio_th = self.properties["length_ratio_thresh"]
     df = df[(df["end"] - df["start"]) > region_length * len_ratio_th]
     features = []
     for _, row in df.iterrows():
         gf = GraphicFeature(
             start=row['start'],
             end=row['end'],
             strand=(1 if row['strand'] == '+' else -1),
             label=row['gene_name'],
             color=random.choice(self.colors),
         )
         features.append(gf)
     record = GraphicRecord(sequence_length=gr.end - gr.start,
                            features=features,
                            first_index=gr.start)
     record.plot(ax=ax, with_ruler=False, draw_line=False)
     self.plot_label()
示例#2
0
def vis_pegRNA2(df,genome_fasta=None,**kwargs):
	"""Given one instance of easy-prime prediction (rawX format), generate DNA visualization
	
	Input
	--------
	the data frame contains 4 rows: RTT, PBS, sgRNA, ngRNA
	
	"""
	pegRNA_id = df.index.tolist()[0]
	variant_id = pegRNA_id.split("_")[0]
	chr = df['CHROM'][0]
	start = df['start'].min()
	start -= start%10
	start -= 1
	end = df['end'].max()
	end -= end%10
	end += 10

	variant_pos = df.POS.min()
	ref = df.REF[0]
	alt = df.ALT[0]
	predicted_efficiency = df.predicted_efficiency[0]*100
	pos = variant_pos-start
	sequence = get_fasta_single(chr,start,end,genome_fasta).upper()
	fig,ax = plt.subplots()
	feature_list = []
	for s,r in df.iterrows():
		r_start = r.start-start
		r_end = r_start+(r.end-r.start)
		r_strand = get_strand(r.strand)
		gf = GraphicFeature(start=r_start, end=r_end, strand=r_strand, 
			color=my_colors[r.type],label=r.type)
		feature_list.append(gf)
	record = GraphicRecord(sequence=sequence, features=feature_list)

	# ax, _ = record.plot(figure_width=int(len(sequence)/5))
	record.plot(ax=ax,figure_width=int(len(sequence)/5))
	return 0
	record.plot_sequence(ax)
	ax.fill_between((pos-1.5, pos-0.5), +1000, -1000, alpha=0.5,color=my_colors['variant'])
	locs, labels = plt.xticks()
	new_labels = []
	flag = True
	for i in locs:
		if flag:
			new_labels.append("%s %s"%(chr,int(start+i+1)))
			flag=False
		else:
			new_labels.append(int(start+i+1))
	plt.xticks(locs,new_labels)
	plt.title("ID: %s, CHR: %s, POS: %s, REF: %s, ALT: %s \n Predicted efficiency: %.1f"%(variant_id,chr,variant_pos,ref,alt,predicted_efficiency)+"%")

	my_stringIObytes = io.BytesIO()
	ax.figure.savefig(my_stringIObytes, format='png',bbox_inches='tight')
	my_stringIObytes.seek(0)
	img_string = base64.b64encode(my_stringIObytes.read())
	return "data:image/png;base64,%s"%(img_string.decode("utf-8"))
示例#3
0
文件: viz.py 项目: BMJHayward/navtome
def demo_dna_features_viewer():
    features=[
        GraphicFeature(start=0, end=20, strand=+1, color="#ffd700",
                       label="Small feature"),
        GraphicFeature(start=20, end=500, strand=+1, color="#ffcccc",
                       label="Gene 1 with a very long name"),
        GraphicFeature(start=400, end=700, strand=-1, color="#cffccc",
                       label="Gene 2"),
        GraphicFeature(start=600, end=900, strand=+1, color="#ccccff",
                       label="Gene 3")
    ]
    record = GraphicRecord(sequence_length=1000, features=features)
    record.plot(figure_width=5)
    return plt
示例#4
0
def Visualiser_sekvens(gen):
    import warnings
    from Bio import BiopythonParserWarning
    warnings.simplefilter('ignore', BiopythonParserWarning)

    if gen == 'alle':
        fil = 'Artemisia%20annua.gb'
        graphic_record = ChangeFeatures().translate_record(fil)
        ax, _ = graphic_record.plot(figure_width=20)
        ax.figure.tight_layout()

    elif gen == "aldh1":
        sequence = "CTGTGTCTAGATTTACGGTTTTGTTGAGTATGGAGTATTTATCCCTGTGTCTAGATTTACGGTTTGAAGACTCAGGAAACTCTCATTAAGCGATCAACGTAGCATGATCATCAAAAGCATGGTTTTGTAAACTCGACATGTCAATGTACCAGCCGATCCAAGTATCCAAGCAATTGGTTCACCACACCAAAAGAGTTTTACACTTAAAAACAACAATTAATTCTAAATAGTCTATGTAATGAAATATGTTTTGTGTGGGTTAGTTTAGTTCATAGTTGCGCCATAAGTATTTACAGCAA"
        record = GraphicRecord(sequence=sequence,
                               features=[
                                   GraphicFeature(start=0,
                                                  end=28,
                                                  strand=+1,
                                                  color='#ffd700',
                                                  label="Promotor"),
                                   GraphicFeature(start=29,
                                                  end=299,
                                                  strand=+1,
                                                  color="#ffcccc",
                                                  label="aldh1")
                               ])
        ax, _ = record.plot(figure_width=50)
        record.plot_sequence(ax)
        record.plot_translation(ax, (29, 299), fontdict={'weight': 'bold'})

    elif gen == 'CYP71AV1':
        sequence = "ATTTTTGGGGGCCCCCCCCCATTTTTTGGGGGGCGCGCGATGAAGTTGGTCATTCGAAATATACTTCCAAAATATGAAGTTGGTCATTCGAAATATACTTCCAAACAACCGAGCTGGTCAGGTAGATTTTGTTTCAGATGAAGATGCAATCCACCGTTGGGGGAGTTTCATGAATAACAATCGCAAATAAGATATATTGTTGATTCTTGATGATGTTTGGTCTGATACCATCATCACCGACCTCCAATTCAGGTCACGTGGATACAAGATCCTCGTGACCTCTGAAACAACCTTTAAGAGATTCGATACATATAAAGTGAGACCTCTCAGTGTTCAAGATGCCATCAATCTGTTATGCTATTCAACACTTTCGGAGCGTGCAAGTCAAGCCACAAATGACATACAGACCTTGTTGACAAGGTGAAATTTCAAATTATTCCAAGATTCATGTTTCATACCTTTATAAGAAAGTAATATCTAAACCATATTAACAAATACTAACAATTAACTTTCAAATGTTTTTGTAGTTAACCAAATGTTGCAAGAAGAATCCGCTCGCCTTAAGTGTCATTGGTGGTCGCCTAAAGGGGACACAAATGGAAAGTTGGCATCATACACTGAAAAAGCTATCTCAAGCCACACACCCTCTTATCGACCTTCCTTTGGATGAGGCAAACAGATTTCATCTCGCAAGAGCTCTCGGTTTACTCAAAGATGATGAACGCAACAGCCCCAGAAGTTCAACCTCGAAATTGACCCGATCTTACCAAGTCA"
        record = GraphicRecord(sequence=sequence,
                               features=[
                                   GraphicFeature(start=1,
                                                  end=38,
                                                  strand=+1,
                                                  color='#cffccc',
                                                  label="Promotor"),
                                   GraphicFeature(start=39,
                                                  end=774,
                                                  strand=+1,
                                                  color="#cff77d",
                                                  label="CYP71AV1")
                               ])
        ax, _ = record.plot(figure_width=100)
        record.plot_sequence(ax)
        record.plot_translation(ax, (39, 774), fontdict={'weight': 'bold'})
    return
def contig_visualization_onefile(contig_array, genome, drug, rgi):

    save_path = "contigend_visualizations_single_genome/"
    Features = []
    temp_array_totrack_length = []

    contig_array.reset_index(drop=True, inplace=True)

    for i in range(len(contig_array)):
        #print(contig_array["GeneStart"][i])
        a = GraphicFeature(start=contig_array["GeneStart"][i],
                           end=contig_array["GeneEnd"][i],
                           strand=contig_array["Strand"][i],
                           color=contig_array["Genecolor"][i],
                           label=str(contig_array["GeneName"][i]))
        Features.append(a)
        temp_array_totrack_length.append(
            (contig_array["GeneStart"][i], contig_array["GeneEnd"][i]))

    length = temp_array_totrack_length[-1][1] - temp_array_totrack_length[0][0]

    record = GraphicRecord(first_index=temp_array_totrack_length[0][0],
                           sequence_length=length,
                           features=Features)
    ax, _ = record.plot(figure_width=20, strand_in_label_threshold=7)

    temp_name = drug + "_" + str(rgi) + ".png"
    title = drug + "_" + str(rgi)
    ax.set_title(title)

    name = os.path.join(save_path, temp_name)

    ax.figure.savefig(name)
def contigend_visualization(contig_array, end_direction, genome, reverse_term,
                            drug, rgi_gene):

    Features = []
    temp_array_totrack_length = []

    contig_array.reset_index(drop=True, inplace=True)

    if end_direction == "upward":
        b = GraphicFeature(start=contig_array["GeneStart"][0] - 1500,
                           end=contig_array["GeneStart"][0],
                           strand=+1,
                           color="#0A090A",
                           label="Contig_Ends")
        Features.append(b)
        temp_array_totrack_length.append((contig_array["GeneStart"][0] - 1500,
                                          contig_array["GeneStart"][0]))

    for i in range(len(contig_array)):
        #print(contig_array["GeneStart"][i])
        a = GraphicFeature(start=contig_array["GeneStart"][i],
                           end=contig_array["GeneEnd"][i],
                           strand=contig_array["Strand"][i],
                           color=contig_array["Genecolor"][i],
                           label=str(contig_array["GeneName"][i]))
        Features.append(a)
        temp_array_totrack_length.append(
            (contig_array["GeneStart"][i], contig_array["GeneEnd"][i]))

    if end_direction == "downward":
        b = GraphicFeature(start=contig_array["GeneEnd"].iloc[-1] + 1500,
                           end=contig_array["GeneEnd"].iloc[-1] + 1500,
                           strand=+1,
                           color="#0A090A",
                           label="Contig_Ends")
        Features.append(b)
        temp_array_totrack_length.append(
            (contig_array["GeneEnd"].iloc[-1] + 1500,
             contig_array["GeneEnd"].iloc[-1] + 1500))

    length = temp_array_totrack_length[-1][1] - temp_array_totrack_length[0][0]

    record = GraphicRecord(first_index=temp_array_totrack_length[0][0],
                           sequence_length=length,
                           features=Features)

    ax, _ = record.plot(figure_width=20, strand_in_label_threshold=7)

    if (reverse_term == "-1"):
        ax.invert_xaxis()

    title = drug + "_" + genome
    ax.set_title(title)
    temp_name = str(genome) + ".png"

    save_path = "contigend_visualizations/"
    name = os.path.join(save_path, temp_name)

    ax.figure.savefig(name)
示例#7
0
 def show_feature(self, figure_width=8, xlabel=""):
     if len(self._features) < 1:
         print("No feautres to show")
         return
     record = GraphicRecord(sequence_length=self._max_length,
                            features=self._features)
     ax, _ = record.plot(figure_width=figure_width)
     ax.set_xlabel(xlabel, fontweight="bold", fontsize=16)
     return ax
def test_by_hand(tmpdir):
    """Test building a GraphicRecord "by hand" """
    features = [
        GraphicFeature(start=5,
                       end=20,
                       strand=+1,
                       color="#ffd700",
                       label="Small feature"),
        GraphicFeature(
            start=20,
            end=500,
            strand=+1,
            color="#ffcccc",
            label="Gene 1 with a very long name",
        ),
        GraphicFeature(start=400,
                       end=700,
                       strand=-1,
                       color="#cffccc",
                       label="Gene 2"),
        GraphicFeature(start=600,
                       end=900,
                       strand=+1,
                       color="#ccccff",
                       label="Gene 3"),
    ]

    # PLOT AND EXPORT A LINEAR VIEW OF THE CONSTRUCT
    record = GraphicRecord(sequence_length=1000, features=features)
    record.plot(figure_width=5, with_ruler=False)  # lazy, just for coverage
    ax, _ = record.plot(figure_width=5)
    target_file = os.path.join(str(tmpdir), "by_hand.png")
    ax.figure.savefig(target_file)

    # PLOT AND EXPORT A CIRCULAR VIEW OF THE CONSTRUCT
    circular_rec = CircularGraphicRecord(sequence_length=1000,
                                         features=features)
    ax2, _ = circular_rec.plot(figure_width=4)
    ax2.figure.tight_layout()
    target_file = os.path.join(str(tmpdir), "by_hand_circular.png")
    ax2.figure.savefig(target_file, bbox_inches="tight")
示例#9
0
def Protein_structure(ID,exons,domains,path,trID,exons_in_interface):
    #save Image of protein Structure
    features1,features2,fend=Visualize_transciript(exons,domains,exons_in_interface)
    
    fig, (ax1, ax2) = plt.subplots(
        2, 1,figsize=(16, 3.5))
    
    record = GraphicRecord(sequence_length=fend, features=features1,)
    record.plot(ax=ax1,figure_width=23,with_ruler=False)
    
    record = GraphicRecord(sequence_length=fend, features=features2,)
    record.plot(ax=ax2,figure_width=23,with_ruler=True,annotate_inline=True)
    
    ax1.title.set_text('Coding Exons')
    ax1.title.set_position([.5, -0.4])
    ax2.title.set_text('Pfam Domains')
    ax2.title.set_position([.5, -0.5])
    
      
    fig.savefig(path+trID, bbox_inches='tight')
    return 
示例#10
0
 def plot_align(self, ax, genome_range):
     gr = genome_range
     df = self.fetch_intervals(gr)
     df_ = df[np.bitwise_and(df['flag'], 0b100) == 0]
     len_thresh = self.properties.get("length_ratio_thresh", 0.005)
     df_ = df_[df_['seq'].str.len() > (gr.length * len_thresh)]
     if df_.shape[0] <= 0:
         return
     rev_flag = np.bitwise_and(df['flag'], 0b10000) != 0
     features = []
     for idx, row in df_.iterrows():
         start = row['pos'] - gr.start
         end = row['pos'] + len(row['seq']) - gr.start
         strand = -1 if rev_flag.iloc[idx] else 1
         gf = GraphicFeature(
             start=start,
             end=end,
             strand=strand,
             color=self.properties['color'],
         )
         features.append(gf)
     record = GraphicRecord(sequence_length=gr.length, features=features)
     record.plot(ax=ax, with_ruler=False, draw_line=False)
示例#11
0
def vis_sccmec(faa_file_sccmec, annotation_file, length_sccmec, core_proteins,
               blastp):

    # use faa file from prokka annotation on sccmec
    faa_dict_sccmec = fasta2dict(faa_file_sccmec)

    # update annotation based on core proteins in cluster
    datafile = annotation_data(annotation_file)
    update_datafile = update_annotation(datafile, blastp, faa_dict_sccmec,
                                        core_proteins)

    # create features object to visualisation using dna_features_viewer
    features = []
    for line in update_datafile:
        id_, sense, start, end, size, length, gene = line
        if gene == 'core-proteins':
            color = '#ff8848'
            label = None
        else:
            try:
                color = colors[gene]
            except KeyError:
                color = 'grey'
            try:
                label = labels[gene]
            except KeyError:
                label = None
        if '-' in sense:
            features.append(
                GraphicFeature(start=int(start),
                               end=int(end),
                               strand=-1,
                               color=color,
                               label=label))

        if '+' in sense:
            features.append(
                GraphicFeature(start=int(start),
                               end=int(end),
                               strand=+1,
                               color=color,
                               label=label))

    record = GraphicRecord(sequence_length=length_sccmec, features=features)
    ax, _ = record.plot(figure_width=20)

    id_ = annotation_file.split('_')[-1].split('.')[0]
    filename = 'SCCmec_{}.png'.format(id_)

    ax.figure.savefig(filename, dpi=300)
示例#12
0
文件: bam.py 项目: wkopp/CoolBox
 def plot_align(self, ax, gr: GenomeRange):
     assert isinstance(
         gr, GenomeRange), "The input gr should be type GenomeRange"
     df = self.fetch_plot_data(gr)
     df_ = df[np.bitwise_and(df['flag'], 0b100) == 0]
     len_thresh = self.properties["length_ratio_thresh"]
     df_ = df_[df_['seq'].str.len() > (gr.length * len_thresh)]
     if df_.shape[0] <= 0:
         return
     rev_flag = np.bitwise_and(df['flag'], 0b10000) != 0
     features = []
     for idx, row in df_.iterrows():
         start = row['pos'] - gr.start
         end = row['pos'] + len(row['seq']) - gr.start
         strand = -1 if rev_flag.iloc[idx] else 1
         gf = GraphicFeature(
             start=start,
             end=end,
             strand=strand,
             color=self.properties['color'],
         )
         features.append(gf)
     record = GraphicRecord(sequence_length=gr.length, features=features)
     record.plot(ax=ax, with_ruler=False, draw_line=False)
示例#13
0
def contigend_visualization(contig_array, end_direction, genome):

    Features = []
    temp_array_totrack_length = []

    contig_array.reset_index(drop=True, inplace=True)

    if end_direction == "upward":
        b = GraphicFeature(start=contig_array["GeneStart"][0] - 1500,
                           end=contig_array["GeneStart"][0],
                           strand=+1,
                           color="#0A090A",
                           label="Contig_Ends")
        Features.append(b)
        temp_array_totrack_length.append((contig_array["GeneStart"][0] - 1500,
                                          contig_array["GeneStart"][0]))

    for i in range(len(contig_array)):
        #print(contig_array["GeneStart"][i])
        a = GraphicFeature(start=contig_array["GeneStart"][i],
                           end=contig_array["GeneEnd"][i],
                           strand=contig_array["Strand"][i],
                           color=contig_array["Genecolor"][i],
                           label=str(contig_array["GeneName"][i]))
        Features.append(a)
        temp_array_totrack_length.append(
            (contig_array["GeneStart"][i], contig_array["GeneEnd"][i]))

    if end_direction == "downward":
        b = GraphicFeature(start=contig_array["GeneEnd"].iloc[-1] + 1500,
                           end=contig_array["GeneEnd"].iloc[-1] + 2500,
                           strand=+1,
                           color="#0A090A",
                           label="Contig_Ends")
        Features.append(b)
        temp_array_totrack_length.append(
            (contig_array["GeneEnd"].iloc[-1] + 1500,
             contig_array["GeneEnd"].iloc[-1] + 2500))

    length = temp_array_totrack_length[-1][1] - temp_array_totrack_length[0][0]

    record = GraphicRecord(first_index=temp_array_totrack_length[0][0],
                           sequence_length=length,
                           features=Features)
    ax, _ = record.plot(figure_width=20, strand_in_label_threshold=7)
    name = str(genome) + ".png"
    ax.figure.savefig(name)
示例#14
0
def create_dna_structure(file_name):
   results = request.get_json()
   features = []
   for i, spacerRepeat in enumerate(results['spacerRepeats']):
      features.append(GraphicFeature(start=spacerRepeat['position'], end=spacerRepeat['position']+len(spacerRepeat['repeat']), 
                  strand=+1, color="#cffccc", label="Repeat_"+str(i+1)))
      if 'spacer' in spacerRepeat:
         features.append(GraphicFeature(start=spacerRepeat['position']+len(spacerRepeat['repeat'])+1, 
         end=spacerRepeat['position']+len(spacerRepeat['repeat'])+spacerRepeat['lengths'][1], strand=+1, color="#ccccff",
                   label="Spacer_"+str(i+1)))
   record = GraphicRecord(sequence_length=results['length'], features=features)
   record = record.crop((results['spacerRepeats'][0]['position']-50, 
      results['spacerRepeats'][len(results['spacerRepeats'])-1]['position']+
      len(results['spacerRepeats'][len(results['spacerRepeats'])-1]['repeat'])+50))
   ax, _ = record.plot(figure_width=10)
   ax.figure.savefig('static/logos/'+str(file_name)+'.png', bbox_inches='tight')
   return jsonify('{"success":1}')
示例#15
0
def haplotype_blocks_fig(model, ref_seq):
    s1, s2 = model.align_alleles()
    record = GraphicRecord(sequence=ref_seq,
                           sequence_length=len(ref_seq),
                           features=[
                               GraphicFeature(start=0,
                                              end=len(s1),
                                              strand=+1,
                                              color='#ffcccc'),
                               GraphicFeature(start=0,
                                              end=len(s2),
                                              strand=+1,
                                              color='#cffccc')
                           ])
    ax, _ = record.plot(figure_width=5)
    record.plot_sequence(ax)
    record.plot_translation(ax, (8, 23), fontdict={'weight': 'bold'})
    ax.figure.savefig('haplotypes.png', bbox_inches='tight')
示例#16
0
    def visualize_mrna_strand(self, dpi=120, cmap='viridis'):
        features = [
            GraphicFeature(start=0,
                           end=self.tag_length,
                           color=self._colors[0],
                           label='Tag'),
            GraphicFeature(start=self.tag_length,
                           end=self.total_length,
                           color=self._colors[1],
                           label='Protein'),
        ]

        probe = self.probe_loc
        cmap = cm.get_cmap(cmap)
        color = np.where(probe == 1)[0]
        location = np.where(probe == 1)[1]

        ncolors = probe.shape[0]
        colors = cmap(np.linspace(.01, .95, ncolors))
        colorlabels = ['Color %d' % i for i in range(ncolors)]
        for c, loc in zip(color, location):
            features = features + [
                GraphicFeature(start=loc,
                               end=loc + 2,
                               color=colors[c],
                               linecolor=colors[c]),
            ]

        record = GraphicRecord(sequence_length=self.total_length,
                               features=features)

        fig, ax = plt.subplots(1, dpi=dpi)

        for c in range(ncolors):
            ax.plot([0, 0], [0, 0], color=colors[c])  #fix the legend colors

        colorlabels = ['Color %d' % i for i in range(ncolors)]
        ax, _ = record.plot(figure_width=6, ax=ax)

        ax.axes.legend(colorlabels, loc=7)
        ax.text(0, 5, 'Transcript Name: %s' % self.name)
        ax.text(0, 4, 'Total Length: %d codons' % self.total_length)
        ax.text(0, 3, 'Seq: %s ...' % self.aa_seq[:10])
        fig.show()
示例#17
0
def test_sequence_and_translation_plotting():
    from dna_features_viewer import (
        GraphicFeature,
        GraphicRecord,
        CircularGraphicRecord,
    )

    features = [
        GraphicFeature(
            start=5, end=10, strand=+1, color="#ffd700", label="bbS-1"
        ),
        GraphicFeature(
            start=8, end=15, strand=+1, color="#ffcccc", label="CrC"
        ),
    ]

    record = GraphicRecord(sequence=7 * "ATGC", features=features)
    ax, _ = record.plot(figure_width=5)
    record.plot_sequence(ax)
    record.plot_translation(ax, (8, 23), fontdict={"weight": "bold"})
        output_file_utr
    ]),
                         shell=True,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.STDOUT)
    p.wait()
    features = []

    with open(output_file_utr) as fp:
        next(fp)
        for line in fp:
            print(line)
            content = line.split("\t")
            features.append(
                GraphicFeature(start=int(content[3]),
                               end=int(content[4]),
                               strand=+1,
                               color=get_color(content[8]),
                               label=re.sub(r'mmu-', '', content[1])))

    record = GraphicRecord(sequence_length=len(str(utr.seq)),
                           features=features)  # Circular
    record.plot(figure_width=12)
    plt.title(' '.join([utr.id, 'sequence']))
    patch1 = mpatches.Patch(color="#00ff99", label='6mer')
    patch2 = mpatches.Patch(color="#9999ff", label='7mer-1a')
    patch3 = mpatches.Patch(color="#ff66cc", label='7mer-m8')
    patch4 = mpatches.Patch(color="#ff0000", label='8mer-1a')
    plt.legend(handles=[patch1, patch2, patch3, patch4])
    plt.show()
def probe_graph(file_name, path):
    #file_name = "results_riftl_test.txt"
    #file_name = sys.argv[1]
    f = open(file_name, "r")
    probes = []
    for line in f:
        print(line)
        line_info = line.split(':')
        #print(line)
        #print(line_info)
        if (line_info[0] == "Sequence"):
            sequence = line_info[1]
        elif (line_info[0] == "Initiator"):
            initiator1 = line_info[1].upper()
            initiator2 = line_info[2].upper()
        elif (line_info[0] == "Probes"):
            probes.append(line_info[1].upper())
            probes.append(line_info[3].upper())
            probes.append(line_info[2])
            probes.append(line_info[4])
        elif (line_info[0] == "Name"):
            name = line_info[1].rstrip()
    file_write = path + "/" + name + "_Probe_Set" ".txt"
    w = open(file_write, "w+")
    """print(sequence)
    print(initiator1)
    print(initiator2)
    print(probe1)
    print(probe2)
    print(start)"""
    x = 1
    print(probes)
    print(name)
    for a in range(0, len(probes), 4):
        gstart = 28
        gend = 3
        probe1 = probes[a]
        probe2 = probes[a + 1]
        probe1 = Seq(probe1)
        probe1 = probe1.complement()
        probe2 = Seq(probe2)
        probe2 = probe2.complement()
        probe1 = str(probe1)
        probe2 = str(probe2)
        start = probes[a + 2]
        end = probes[a + 3]
        start = int(start)
        start -= 4
        if start <= 0:
            gend = 0 - start - 1
            gstart = gend + 25
            start = 0
        end = int(end)
        subseq = sequence[start:end]
        subseq = subseq.upper()
        record = GraphicRecord(
            sequence=subseq,
            features=[
                GraphicFeature(start=gstart,
                               end=gend,
                               strand=+1,
                               color='#ffcccc',
                               label=probe1),
                GraphicFeature(start=gstart + 28,
                               end=gstart + 2,
                               strand=+1,
                               color='#ccccff',
                               label=probe2),
                GraphicFeature(start=gstart,
                               end=gstart,
                               strand=-1,
                               color='m',
                               label="space"),
                GraphicFeature(start=gstart + 1,
                               end=gstart + 1,
                               strand=-1,
                               color='m',
                               label="space"),
                GraphicFeature(start=gstart,
                               end=(gstart - len(initiator1)),
                               strand=-1,
                               color='y',
                               label=initiator1),
                GraphicFeature(start=gstart + 2,
                               end=(gstart + 2 + len(initiator1)),
                               strand=+1,
                               color='y',
                               label=initiator2)
            ])
        ax, _ = record.plot(figure_width=10)
        record.plot_sequence(ax)
        total1 = initiator1 + "TT" + probe1[::-1]
        total2 = probe2[::-1] + "TT" + initiator2
        w.write("PROBE SET" + str(x) + "\n")
        w.write("Probe1:" + total1 + "\n")
        w.write("Probe2:" + total2 + "\n")
        #print(total2)
        tosave = path + "/" + name + "Plots for Probes" + str(x)
        x += 1
        ax.figure.savefig(tosave, bbox_inches='tight')
        #break
    w.close()
示例#20
0
                                          strand=-1,
                                          color="#cffccc",
                                          label="Gene 2"),
                           GraphicFeature(start=600,
                                          end=900,
                                          strand=+1,
                                          color="#ccccff",
                                          label="Gene 3")
                       ])
zoom_start, zoom_end = 398, 428  # coordinates of the "detail"
cropped_record = record.crop((zoom_start, zoom_end))

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 3))

# PLOT THE WHOLE SEQUENCE

ax1.set_title("Whole sequence", loc='left', weight='bold')
record.plot(ax=ax1)
ax1.fill_between((zoom_start, zoom_end), +1000, -1000, alpha=0.15)

# PLOT THE SEQUENCE DETAILS

cropped_record.plot(ax=ax2)
cropped_record.plot_sequence(ax=ax2)
cropped_record.plot_translation(ax=ax2,
                                location=(408, 423),
                                fontdict={'weight': 'bold'})
ax2.set_title("Sequence detail", loc='left', weight='bold')

fig.savefig('overview_and_detail.png', bbox_inches='tight')
示例#21
0
def upload(request):
    posted = False
    sequences = []
    ids = []
    organisms = []
    gene_sequence = ''
    locations = []
    all_locations = []
    matched_sequences = []
    matched_organisms = []
    database = []
    matched_database = []
    matched_ids = []
    features = []
    zipped = {}
    figure_name = ''
    sequence_nos = []
    i = 0
    if request.method == 'POST':
        posted = True
        uploaded_file = request.FILES['document']
        position = int(request.POST.get('position'))
        #position = int(position)*3
        with open('myapp/Jaspar.txt', "r") as file:
            for line in file:
                line = line.split(';')
                ids.append(line[0])
                sequences.append(line[1])
                organisms.append(line[2])
                database.append(line[3].rstrip('\n'))
        for line in uploaded_file:
            line = line.decode('utf-8')
            gene_sequence += line
        gene_sequence = gene_sequence[0:position]
        for sequence in sequences:
            locations = [
                m.start() for m in re.finditer(sequence, gene_sequence)
            ]
            if locations != []:
                matched_sequences.append(sequence)
                all_locations.append(locations)
                matched_organisms.append(organisms[i])
                matched_database.append(database[i])
                matched_ids.append(ids[i])
                for location in locations:
                    features.append(
                        GraphicFeature(start=location,
                                       end=location + len(sequence),
                                       strand=+1,
                                       color="#ffd700",
                                       label=sequence))
            i += 1
        record = GraphicRecord(sequence_length=len(gene_sequence),
                               features=features)
        ax, _ = record.plot(figure_width=30)
        figure_name = uploaded_file.name + str(position) + '.png'
        ax.figure.savefig('myapp/static/' + uploaded_file.name +
                          str(position) + '.png',
                          bbox_inches='tight')
        print(matched_sequences)
        print(matched_organisms)
        print(all_locations)
        print(matched_database)
        sequence_nos = list(range(len(matched_sequences)))
        zipped = tuple(
            zip(matched_ids, matched_sequences, matched_organisms,
                matched_database, all_locations, sequence_nos))
    return render(
        request, 'upload.html', {
            'posted': posted,
            'zipped': zipped,
            'figure_name': figure_name,
            'gene_sequence': gene_sequence
        })
示例#22
0
                   strand=+1,
                   color="#ffcccc",
                   label="Gene 1 with a very long name"),
    GraphicFeature(start=400,
                   end=700,
                   strand=-1,
                   color="#cffccc",
                   label="Gene 2"),
    GraphicFeature(start=600,
                   end=900,
                   strand=+1,
                   color="#ccccff",
                   label="Gene 3")
]
record = GraphicRecord(sequence_length=1000, features=features)
record.plot(figure_width=5)

from dna_features_viewer import BiopythonTranslator
graphic_record = BiopythonTranslator().translate_record(
    "../Callithrix_Analysis/DATA/!CLEAN/YFV_polyprotein_AFH35044.gb")

ax, _ = graphic_record.plot(figure_width=10)

graphic_record.features = graphic_record.features[2:-1]

ax, _ = graphic_record.plot(figure_width=10)
dir(graphic_record.features[0])
graphic_record.features[0].label = "capsid"
graphic_record.features[0].color = "red"

graphic_record.features[1].label = "propep"
示例#23
0
def plot_bgc_genes(query_id,
                   BGCs_dict,
                   BGC_measure,
                   num_candidates=10,
                   sharex=True,
                   labels=False,
                   dist_method="centroid",
                   spacing=1):
    """ Plot bgc genes for visual comparison
    
    """

    # Select chosen distance methods
    if dist_method == "centroid":
        candidates_idx = BGC_measure.list_similars_ctr_idx[
            query_id, :num_candidates]
        candidates_dist = BGC_measure.list_similars_ctr[
            query_id, :num_candidates]
    elif dist_method == "pca":
        candidates_idx = BGC_measure.list_similars_pca_idx[
            query_id, :num_candidates]
        candidates_dist = BGC_measure.list_similars_pca[
            query_id, :num_candidates]
    elif dist_method == "autoencoder":
        candidates_idx = BGC_measure.list_similars_ae_idx[
            query_id, :num_candidates]
        candidates_dist = BGC_measure.list_similars_ae[
            query_id, :num_candidates]
    elif dist_method == "lda":
        candidates_idx = BGC_measure.list_similars_lda_idx[
            query_id, :num_candidates]
        candidates_dist = BGC_measure.list_similars_lda[
            query_id, :num_candidates]
    elif dist_method == "lsi":
        candidates_idx = BGC_measure.list_similars_lsi_idx[
            query_id, :num_candidates]
        candidates_dist = BGC_measure.list_similars_lsi[
            query_id, :num_candidates]
    elif dist_method == "doc2vec":
        candidates_idx = BGC_measure.list_similars_d2v_idx[
            query_id, :num_candidates]
        candidates_dist = BGC_measure.list_similars_d2v[
            query_id, :num_candidates]
    else:
        print("Chosen distance measuring method not found.")

    keys = []
    for key, value in BGCs_dict.items():
        keys.append(key)

    BGC_genes = []
    for i, candidate_id in enumerate(candidates_idx):
        key = keys[candidate_id]
        BGC_genes.append(BGCs_dict[key]["genes"])

    # Collect all notes and types of the bgcs
    found_types = []
    notes_found = []
    for genes in BGC_genes:
        for feature in genes:
            found_types.append(feature[3])
            if feature[2] != []:
                note = feature[2].replace(":", " ").split()
                note = [note[1], note[2]]
                notes_found.append(note)
    notes_unique = list(set(list(zip(*notes_found))[0]))
    selected_colors = get_spaced_colors(len(notes_unique) + 1)

    #    fig = plt.figure(figsize=(8, 3.*num_plots))
    fig, ax0 = plt.subplots(len(BGC_genes),
                            1,
                            figsize=(10, spacing * num_candidates),
                            sharex=sharex)
    fig.suptitle("Gene feature comparison (similarity measure: " +
                 dist_method + ")")
    max_xlim = max([x[-1][1][1] for x in BGC_genes])

    for i, genes in enumerate(BGC_genes):
        record = []
        features = []
        for feature in genes:
            if feature[2] != []:
                color = selected_colors[notes_unique.index(feature[2].replace(
                    ":", " ").split()[1])]
            else:
                color = "black"

            if labels:
                label = feature[0]
            else:
                label = None
            features.append(
                GraphicFeature(start=feature[1][0],
                               end=feature[1][1],
                               strand=feature[1][2],
                               color=color,
                               label=label,
                               thickness=9,
                               linewidth=0.5,
                               fontdict={"size": 9}))

        record = GraphicRecord(sequence_length=features[-1].end,
                               features=features)

        record.plot(ax=ax0[i], with_ruler=True)
        #        ax0[i].set_title("BGC no. " + str(int(candidates["id"][i])) )
        info1 = "BGC no. %d     " % candidates_idx[i]
        info2 = dist_method + " similarity = %.3f" % candidates_dist[i]
        ax0[i].text(0.02,
                    0.75,
                    info1 + info2,
                    size=10,
                    ha="left",
                    transform=ax0[i].transAxes)
        if sharex:
            ax0[i].set_xlim([ax0[i].get_xlim()[0], max_xlim])
示例#24
0
def mibig_viewer(mibig_dir,
                 mibig_transporters,
                 bgc,
                 to_label=[
                     'ABC_tran', 'BPD_transp_1', 'TonB_dep_Rec',
                     'ABC_membrane', 'ACR_tran', 'FecCD', 'ABC2_membrane',
                     'MatE', 'OEP', 'FtsX', 'MFS_3', 'MFS_1',
                     'ABC2_membrane_3', 'MacB_PCD', 'ABC2_membrane_4', 'MMPL',
                     'BPD_transp_2', 'Peripla_BP_2', 'SBP_bac_1',
                     'Peripla_BP_4', 'SBP_bac_5', 'SBP_bac_8'
                 ]):
    from Bio import SeqIO
    from dna_features_viewer import GraphicFeature, GraphicRecord

    transporters = {}
    f = open(mibig_transporters)
    for line in f.readlines():
        if not line.startswith("#"):
            if line.split()[0] not in transporters:
                transporters[line.split()[0]] = line.split()[1]
            else:
                transporters[line.split(
                )[0]] = transporters[line.split()[0]] + " " + line.split()[1]
    f.close()

    features = []
    colors = {
        'biosynthetic': "#850000",
        'biosynthetic-additional': "#ea8686",
        'other': "#dbdbdb",
        'regulatory': "#7cd369",
        'resistance': "#307321",
        'transport': "#3c85cd"
    }

    genes = []
    length = 0
    last_end = 0
    i = 0
    for record in SeqIO.parse(
            mibig_dir.rstrip("/") + "/" + bgc + ".gbk", "genbank"):
        for feature in record.features:

            if feature.type == 'CDS':
                i += 1
                feature_name = bgc + "_" + str(i)

                try:
                    color = colors[feature.qualifiers['gene_kind'][0]]
                except:
                    color = colors['other']

                if feature.location.start < last_end:
                    start = last_end + 1
                else:
                    start = feature.location.start

                if feature_name in transporters:
                    found = False
                    for transporter in to_label:
                        if transporter in transporters[feature_name]:
                            genes.append(
                                GraphicFeature(
                                    start=start,
                                    end=feature.location.end,
                                    strand=feature.location.strand,
                                    color=color,
                                    label=transporters[feature_name]))
                            found = True
                            break
                    if not found:
                        genes.append(
                            GraphicFeature(start=start,
                                           end=feature.location.end,
                                           strand=feature.location.strand,
                                           color=color))
                else:
                    genes.append(
                        GraphicFeature(start=start,
                                       end=feature.location.end,
                                       strand=feature.location.strand,
                                       color=color))

                if feature.location.end > length:
                    length = feature.location.end
                last_end = feature.location.end
    record = GraphicRecord(sequence_length=length, features=genes)
    record.plot(figure_width=15)
示例#25
0
def show_crispr_grna_results(
    sequence: str,
    guides: List[dict],
    indexes: Optional[List[int]] = None,
    scoreField: str = "onTargetScore",
):
    """Shows guide rnas results for CRISPR.

    Args:
        sequence (str): A string containing the complete organism sequence

        guides (dict): A table on 'records' format that contains guides info.\
            The required fields are `start` (int), `end` (ind), indicating the limits of the guide in sequence's index.

        indexes(list): Indexes (start and end) of the targeted sequence within the complete sequence. \
            If not set, the targeting sequence is not shown.

        scoreField (str): Select which score from GRNA tool show in the chart. \
            Available scores are "onTargetScore" (default) and "offTargetScore"
    """
    targeting_seq_feat = []
    # Show main targeted sequence if index are set. If not, we calculate indexes to limit plot range at the `crop`
    # instruction.
    if indexes is not None:
        targeting_seq_feat = [
            GraphicFeature(
                start=indexes[0],
                end=indexes[1],
                color="#cffccc",
                label="Sequence",
                strand=+1,
            ),
        ]
    else:
        # TODO(diegovalenzuelaiturra): Check behavior is the same when using generators instead of lists.
        # indexes = [min([x['start'] for x in guides]), max([x['end'] for x in guides])]
        indexes = [
            min(x['start'] for x in guides),
            max(x['end'] for x in guides)
        ]

    # Plot records
    record = GraphicRecord(
        sequence=sequence,
        features=targeting_seq_feat + [
            GraphicFeature(
                start=x['start'],
                end=x['end'] + 1,
                color="#ffcccc",
                label=f"{scoreField}: {x[scoreField]}",
                strand=+1 if x['forward'] else -1,
            ) for x in guides
        ],
    )

    # Limit plot range
    record = record.crop((indexes[0] - 10, indexes[1] + 11))  # crop

    # Plot and set to show sequence
    ax, _ = record.plot(figure_width=20)

    record.plot_sequence(ax)
示例#26
0
#!/usr/bin/env python36
# -*- coding: UTF-8 -*-

from dna_features_viewer import GraphicFeature, GraphicRecord

startP = 29909037
seq_len = 4625
inputA = 'ENST00000396634_HLA-A_exons.txt'
seqFeat = []

fileH = open(inputA, 'r')
for line in fileH:
    x = line.rstrip().split('\t')
    chrName = x[0]
    s = int(x[3]) - startP
    e = int(x[4]) - startP

    seqFeat.append(
        GraphicFeature(start=s, end=e + 1, strand=+1, color='#ffcccc'))
fileH.close()
#record = GraphicRecord(sequence= seq.replace('\n', ''),  features= seqFeat)
record = GraphicRecord(sequence_length=seq_len, features=seqFeat)

#ax,_ = record.plot(figure_width= 120)
ax, _ = record.plot(figure_width=18)
#record.plot_sequence(ax)
record.plot(ax)
ax.figure.savefig('Gene_sequence_hlaA_exons.png', bbox_inches='tight')
示例#27
0
import matplotlib.pyplot as plt
from dna_features_viewer import (GraphicFeature, GraphicRecord,
                                 CircularGraphicRecord)
features = [
    GraphicFeature(start=20,
                   end=500,
                   strand=+1,
                   color="#ffcccc",
                   label="Gene 1 with a name"),
    GraphicFeature(start=400,
                   end=700,
                   strand=-1,
                   color="#cffccc",
                   label="Gene 2"),
    GraphicFeature(start=600,
                   end=900,
                   strand=+1,
                   color="#0000ff",
                   label="Gene 3"),
]

record = GraphicRecord(sequence_length=1000, features=features)
record.default_box_color = None
record.default_font_family = 'Walter Turncoat'

with plt.xkcd():
    plt.rcParams["font.family"] = 'Permanent Marker'  # ruler font
    plt.rcParams["xtick.labelsize"] = 'small'
    ax, _ = record.plot(figure_width=5, annotate_inline=False)
    ax.figure.tight_layout()
    ax.figure.savefig("cartoon_style.png", dpi=200)
示例#28
0
def main():

        st.cache(persist=True)
        def Promoter_Selection(strength):
        	'''Loads the Anderson promoters' data as a pd.DataFrame, with a given strength computes the difference with the reported strength, checks for compability with the selected standards and returns the Id, Sequence and relative strength as dict'''
        	promoter_df=pd.read_csv('Andersonpromoters.csv')
        	promoter_df=promoter_df.dropna(axis=0,how='any')
        	promoter_df['Distance']=abs(promoter_df['Measured Strengthb']-strength)
        	while True:
        		promoter_index=promoter_df['Distance'].idxmin(axis=1)
        		if test_standard(promoter_df.loc[promoter_index,'Sequencea'],enzyme_set):
        			promoter_data={'id':promoter_df.loc[promoter_index,'Identifier'],'seq':promoter_df.loc[promoter_index,'Sequencea'],'Strength':promoter_df.loc[promoter_index,'Measured Strengthb']}
        			return promoter_data
        		else:
        			promoter_df=promoter_df.drop(promoter_index,axis=0)

        st.cache(persist=True)
        def RBS_Selection(strength):
        	'''Loads a data set with data from selected BioBrick RBS as a pd.DataFrame, with a given strength computes the difference with the reported strength, checks for compability with the selected standards and returns the Id, Sequence and relative strength as dict'''
        	RBS_df=pd.read_csv('RBS.csv')
        	RBS_df=RBS_df.dropna(axis=0,how='any')
        	RBS_df['Distance']=abs(RBS_df['Strength']-strength)
        	while True:
        		RBS_index=RBS_df['Distance'].idxmin(axis=1)
        		if test_standard(RBS_df.loc[RBS_index,'Sequence'],enzyme_set):
        			RBS_data={'id':RBS_df.loc[RBS_index,'Identifier'],'seq':RBS_df.loc[RBS_index,'Sequence'],'Strength':RBS_df.loc[RBS_index,'Strength']}
        			return RBS_data
        		else:
        			RBS_df=RBS_df.drop(RBS_index)

        def MFE_Toehold(seq):
        	'''Return the MFE (kcal/mol) of the RNA secondary strucure of a given sequence'''
        	MFE_calc=Popen('C:\Program Files (x86)\ViennaRNA Package\RNAfold.exe', stdin=PIPE, stdout=PIPE)
        	Result=MFE_calc.communicate(seq.encode())
        	return float(Result[0][-9:-3])

        def MFE_Hybridization(seq_toehold,seq_target):
        	'''Return the MFE (kcal/mol) of the hybridization of two given RNA sequences'''
        	MFE_calc=Popen('C:\Program Files (x86)\ViennaRNA Package\RNAcofold.exe', stdin=PIPE, stdout=PIPE)
        	Input='>Seq_toehold\n'+seq_toehold+'\n>Seq_target\n'+seq_target
        	Result=MFE_calc.communicate(Input.encode())
        	return float(Result[0][-9:-3])

        @st.cache(persist=True)
        def train_classifier(treshold):
            toehold_df=pd.read_excel('Toehold_Data_Processed.xlsx')
            toehold_df['Class']=np.where(toehold_df['On/Off ratio']>=treshold, 1,0)
            X_values=toehold_df.loc[:,'MFE Toehold':'MFE Hybridization']
            y_values=toehold_df['Class']
            clf=LogisticRegression().fit(X_values, y_values)
            return clf
        
        #@st.cache(persist=True)
        def ToeholdSequence_gen(seq,pool,rbs,treshold):
        	'''From a given DNA sequences generates a pool of n random subsequences, checks them for standard compability and specificity, assembles the toehold switch according to the specification by Green et al., 2014, calculares the MFE of the secondary structure and hybridation, predicts is it would be over the minimum (treshold) On/Off ratio and returns the best candidate'''
        	seq=seq.lower()
        	targets=[]
        	toeholds=[]
        	reversed_targets=[]
        	mutated_targets=[]
        	for _ in range(pool):
        		n=randint(0,len(seq)-30)
        		target=seq[n:n+30]
        		target_dna=Seq(target,generic_dna)
        		target_reverse=str(target_dna.reverse_complement())
        		target_mutated=target[:6]+'ATG'+target[9:18]
        		target_assembled=target_reverse+'CAAG'+rbs['seq']+target_mutated+'AACCTGGCGGCAGCGCAAAAG'
        		if test_standard(target_assembled,enzyme_set):
        			if BLAST_test(target,organism):
        				targets.append(target)
        				toeholds.append(target_assembled)
        				reversed_targets.append(target_reverse)
        				mutated_targets.append(target_mutated)
        	toehold_df=pd.DataFrame({'Target':targets,'Toehold':toeholds,'Reversed':reversed_targets,'Mutated':mutated_targets})
        	Toehold_values=[]
        	Hibrid_values=[]
        	for ind in list(toehold_df.index.values):
        		Toehold_values.append(MFE_Toehold(toehold_df.loc[ind,'Toehold']))
        		Hibrid_values.append(MFE_Hybridization(toehold_df.loc[ind,'Toehold'],toehold_df.loc[ind,'Target']))
        	toehold_df['MFE Toehold']=Toehold_values
        	toehold_df['MFE Hybridization']=Hibrid_values
        	x=toehold_df.loc[:,'MFE Toehold':'MFE Hybridization']
        	clf=train_classifier(treshold)
        	y_pred=clf.predict_proba(x)
        	probabilities=[c[1] for c in y_pred]
        	toehold_df['class']=probabilities
        	toehold_index=toehold_df['class'].idxmin(axis=1)
        	toehold={'Toehold_seq':toehold_df.loc[toehold_index,'Toehold'],'seq_target':toehold_df.loc[toehold_index,'Target'],'id_reversed':'NA','seq_reversed':toehold_df.loc[toehold_index,'Reversed'],'id_spacer':'NA','seq_spacer':'CAAG','id_mutated':'NA','seq_mutated':toehold_df.loc[toehold_index,'Mutated'],'id_linker':'NA','seq_linker':'AACCTGGCGGCAGCGCAAAAG'}
        	return toehold
        
        @st.cache(persist=True)
        def get_standard(standard_list):
        	'''Create a set containing the prohibited restriction sites (subsequences) from a givin standard list'''
        	enzyme_set=set()
        	if 'RFC10' in standard_list:
        		enzyme_set.update(['gaattc','tctaga','actagt','ctgcag','gcggccgc'])
        	if 'RFC12' in standard_list:
        		enzyme_set.update(['gaattc','actagt','gctagc','ctgcag','gcggccgc','cagctg','ctcgag','tctaga','gctcttc','gaagagc'])
        	if 'RFC21' in standard_list:
        		enzyme_set.update(['gaattc','agatct','ggatcc','ctcgag'])
        	return enzyme_set

        def test_standard(seq,enzyme_set):
        	'''Given a DNA sequence and a set of prohibited restriction site, checks for the prohibited sites within the sequence, returns True if no prohibited sites where detected'''
        	seq=seq.lower()
        	for enz in enzyme_set:
        		if enz in seq:
        			return False
        	return True

        def BLAST_test(seq,organism):
        	'''Runs a BLAST to test wether a given sequence is present at a given organism, returns False if it's present'''
        	organism=organism.lower()
        	result_handle=NCBIWWW.qblast('blastn','nt',seq)
        	blast_record=NCBIXML.read(result_handle)
        	for alignment in blast_record.alignments:
        		for hsp in alignment.hsps:
        			if int(hsp.identities)==len(seq):
        				seq_title=alignment.title
        				seq_title=seq_title.lower()
        				if organism in seq_title:
        					return False
        	return True

        @st.cache(persist=True)
        def Check_DNA(seq):
            '''Check if a given sequence is a valid DNA sequence, returns True if it is a valid DNA sequence'''
            seq=seq.lower()
            for base in seq:
            	if base not in 'atcg':
            		return False
            return True

        @st.cache(persist=True)
        def load_reporters(standard_list):
            ''' Loads a selected reporter proetin dataset as pd.DataFrame, checks or standard compliances and drop the noncompliant instances, returns a pd.DataFrame with the compliant instances'''
            standard_list=list(standard_list)
            reporter_df=pd.read_excel('Reporters.xlsx')
            reporter_df=reporter_df.drop_duplicates('Sequence')
            for ind in list(reporter_df.index.values):
            	Not_complient=False
            	for standard in standard_list:
            		if standard not in reporter_df.loc[ind,'Standard']:
            			Not_complient=True
            			break
            		if Not_complient:
            			reporter_df.drop(ind)
            return reporter_df

        @st.cache(persist=True)
        def Assembler(promoter,rbs,toehold,output_df):
        	'''Assemble the BioBrick from the given parts, returns the assembled sequence and the annotation table'''
        	output_df=output_df.reset_index()
        	types=['BioBrick prefix','Promoter',"Trigger'",'Spacer','RBS','Toehold complement','Linker','Output','Terminator','BioBrick suffix']
        	seq='GAATTCGCGGCCGCTTCTAGAG'+promoter['seq']+'TACTAGAG'+toehold['Toehold_seq']+output_df.loc[0,'Sequence']+'TACTAGAG'+'tcacactggctcaccttcgggtgggcctttctgcgtttatatactagagagagaatataaaaagccagattattaatccggcttttttattattt'+'TACTAGTAGCGGCCGCTGCAG'
        	ids=['BioBrick prefix',promoter['id'],toehold['id_reversed'],toehold['id_spacer'],rbs['id'],toehold['id_mutated'],toehold['id_linker'],output_df.loc[0,'Id'],'BBa_B0014','BioBrick suffix']
        	Starts=[1,24]
        	Ends=[23]
        	MoreInfo=['NA','Strength: '+str(promoter['Strength']),'NA','Green et al., 2014','Strength: '+str(rbs['Strength']),'NA','Green et al., 2014',output_df.loc[0,'Description'],'Double E.coli terminator, widely used','NA']
        	Sequences=['GAATTCGCGGCCGCTTCTAGAG',promoter['seq'],toehold['seq_reversed'],toehold['seq_spacer'],rbs['seq'],toehold['seq_mutated'],toehold['seq_linker'],output_df.loc[0,'Sequence'],'tcacactggctcaccttcgggtgggcctttctgcgtttatatactagagagagaatataaaaagccagattattaatccggcttttttattattt','TACTAGTAGCGGCCGCTGCAG']
        	Ends.append(Starts[-1]+len(promoter['seq']))
        	Starts.append(Ends[-1]+9)
        	Ends.append(Starts[-1]+len(toehold['seq_reversed']))
        	Starts.append(Ends[-1]+1)
        	Ends.append(Starts[-1]+len(toehold['seq_spacer']))
        	Starts.append(Ends[-1]+1)
        	Ends.append(Starts[-1]+len(rbs['seq']))
        	Starts.append(Ends[-1]+1)
        	Ends.append(Starts[-1]+len(toehold['seq_mutated']))
        	Starts.append(Ends[-1]+1)
        	Ends.append(Starts[-1]+len(toehold['seq_linker']))
        	Starts.append(Ends[-1]+1)
        	Ends.append(Starts[-1]+len(output_df.loc[0,'Sequence']))
        	Starts.append(Ends[-1]+9)
        	Ends.append(Starts[-1]+95)
        	Starts.append(Ends[-1]+1)
        	Ends.append(Starts[-1]+21)
        	assembled_df=pd.DataFrame({'Type':types,'BioBrick Id':ids,'Start':Starts,'End':Ends,'More Information':MoreInfo,'Sequence':Sequences})
        	return seq, assembled_df

        @st.cache(persist=True)
        def to_excel(df):
            output = BytesIO()
            writer = pd.ExcelWriter(output, engine='xlsxwriter')
            df.to_excel(writer, sheet_name='Sheet1')
            writer.save()
            processed_data = output.getvalue()
            return processed_data

        @st.cache(persist=True)
        def get_table_download_link(df):
                """Generates a link allowing the data in a given panda dataframe to be downloaded
                in:  dataframe
                out: href string
                """
                val = to_excel(df)
                b64 = base64.b64encode(val)  # val looks like b'...'
                return f'<a href="data:application/octet-stream;base64,{b64.decode()}" download="Toeholdswitch.xlsx">Download as Excel</a>'

        st.title('BioBrick Builder: Toeholdswitch designer')
        st.sidebar.title('BioBrick Builder: Toeholdswitch designer')
        st.sidebar.markdown('Fill out the fields with your preferences for your toeholdswitch part')
        st.sidebar.subheader('For which organism is your part?')
        organism=st.sidebar.selectbox('Organism',('Escherichia coli','Saccharomyces cerevisiae','Insects','Arabidopsis thaliana'))
        st.sidebar.markdown('')
        st.sidebar.subheader('Standards')
        standards=st.sidebar.multiselect('Select the standards that you part should be compatible with',('RFC10','RFC21'))
        enzyme_set=get_standard(standards)
        st.sidebar.markdown('')
        st.sidebar.subheader('Promoter')
        promoter_strength=st.sidebar.slider('Desired promoter strength (relative to BBa_J23100)',0.0,1.0,key='promoter_strength')
        st.sidebar.markdown('')
        st.sidebar.subheader('RBS')
        RBS_strength=st.sidebar.slider('Desired RBS strength (relative to BBa_B0034)',0.0,1.0,key='RBS_strength')
        st.sidebar.markdown('')
        st.sidebar.subheader('Target Sequence')
        target_sequence=st.sidebar.text_area('Enter your target sequence')
        if not Check_DNA(target_sequence):
                st.sidebar.markdown('Please enter a valid target sequence')
        st.sidebar.markdown('Enter the size of the pool to generate the toeholdswitch')
        pool=st.sidebar.number_input('Size of the pool to generate',3,10000,step=10)
        st.sidebar.markdown('Enter the minimum acceptable On/Off ratio')
        treshold=st.sidebar.number_input('Minimum acceptable On/Off ratio',10.0,70.0,step=5.0)
        st.sidebar.markdown('')
        st.sidebar.subheader('Output fron the toholdswitch')
        output_type=st.sidebar.selectbox('Type of output',('Reported BioBrick reporter protein','Your own output protein'))
        if output_type=='Your own output protein':
                output_sequence=st.sidebar.text_area('Enter your output sequence')
                if not Check_DNA(output_sequence):
                        st.sidebar.markdown('Please enter a valid output sequence')
                elif not test_standard(output_sequence,enzyme_set):
                        st.sidebar.markdown('Sequence not compatible with the selected standards')
                else:
                        output_df=pd.DataFrame({'Id':'NA','Description':'User-defined output sequence','Sequence':output_sequence},index=[0])
        else:
                reporter_df=load_reporters(standards)
                reporters=tuple(reporter_df['Description'])
                st.sidebar.markdown('Select the desired reporter protein')
                output_name=st.sidebar.selectbox('Reporter protein',reporters)
                reporter_index=reporter_df.index[reporter_df['Description']==output_name].tolist()
                reporter_index=int(reporter_index[0])
                output_df=reporter_df[reporter_index:reporter_index+1]
                output_df.columns=['Id','Description','Sequence','Standard']
        if st.sidebar.button('Assemble BioBrick',key='Assemble BioBrick'):
                promoter=Promoter_Selection(promoter_strength)
                rbs=RBS_Selection(RBS_strength)
                toehold=ToeholdSequence_gen(target_sequence,pool,rbs,treshold)
                final_seq,result_df=Assembler(promoter,rbs,toehold,output_df)
                st.subheader('Results')
                st.markdown('Plain Sequence')
                st.markdown('')
                st.write("5'-"+final_seq[:int(len(final_seq)/2)])
                st.write(final_seq[int(len(final_seq)/2):]+"-3'")
                st.markdown('')
                st.markdown('Annotation Table')
                st.write(result_df.set_index('Type'))
                st.markdown(get_table_download_link(result_df), unsafe_allow_html=True)
                st.markdown('Map')
                colors=['#ADD8E6','#00FF00','#00FFFF','#FFFFFF','#008000','#0000FF','#FFA500','#800080','#FF0000','#ADD8E6']
                features=[]
                for ind in list(result_df.index.values):
                        features.append(GraphicFeature(start=result_df.loc[ind,'Start'], end=result_df.loc[ind,'End'], strand=+1,color=colors[ind],label=result_df.loc[ind,'Type']))
                record=GraphicRecord(sequence_length=len(final_seq), features=features)
                record.plot(figure_width=5)
                st.pyplot()
from dna_features_viewer import GraphicFeature, GraphicRecord

record = GraphicRecord(sequence="ATGCATGCATGCATGCATGCATGCATGC", features=[
    GraphicFeature(start=5, end=10, strand=+1, color='#ffcccc'),
    GraphicFeature(start=8, end=15, strand=+1, color='#ccccff')
])

ax, _ = record.plot(figure_width=5)
record.plot_sequence(ax)
record.plot_translation(ax, (8, 23), fontdict={'weight': 'bold'})
ax.figure.savefig('sequence_and_translation.png', bbox_inches='tight')
示例#30
0
def get_map(phage_id, UPLOAD_FOLDER):
    """Creates and returns a map of the genome.

    Args:
        UPLOAD_FOLDER:
            The folder containing all of the uploaded files.

    Returns:
        A dictionary containing an image of the genome map.

    """
    features = []
    for cds in db.session.query(Annotations).filter_by(
            phage_id=phage_id).order_by(Annotations.left):
        if cds.function != '@DELETED' and cds.status != 'trnaDELETED':
            if cds.strand == '+':
                if cds.status == "tRNA":
                    features.append(
                        GraphicFeature(start=cds.left,
                                       end=cds.right,
                                       strand=+1,
                                       color="#7570b3",
                                       label=cds.id))
                else:
                    features.append(
                        GraphicFeature(start=cds.left,
                                       end=cds.right,
                                       strand=+1,
                                       color="#1b9e77",
                                       label=cds.id))
            else:
                if cds.status == "tRNA":
                    features.append(
                        GraphicFeature(start=cds.left,
                                       end=cds.right,
                                       strand=-1,
                                       color="#7570b3",
                                       label=cds.id))
                else:
                    features.append(
                        GraphicFeature(start=cds.left,
                                       end=cds.right,
                                       strand=-1,
                                       color="#d95f02",
                                       label=cds.id))
    fasta_file = helper.get_file_path("fasta", UPLOAD_FOLDER)
    genome = SeqIO.read(fasta_file, "fasta").seq
    sequence = str(genome)
    record = GraphicRecord(sequence_length=len(sequence), features=features)
    ax, _ = record.plot(figure_width=len(sequence) / 1000)
    ax.figure.savefig(os.path.join(UPLOAD_FOLDER,
                                   'sequence_and_translation.png'),
                      bbox_inches='tight')
    image_byte_string = ""
    with open(os.path.join(UPLOAD_FOLDER, 'sequence_and_translation.png'),
              "rb") as image_file:
        image_byte_string = base64.b64encode(image_file.read())
    response_object = {}
    response_object['status'] = "success"
    response_object['image'] = str(image_byte_string)
    return response_object
示例#31
0
def main(args=None):
    # Path info
    script_directory = os.path.dirname(os.path.abspath(__file__))
    script_filename = sys.argv[0].split("/")[-1]
    description = """
    Running: {} v{} via Python v{} | {}\n
    Versatile command-line tool designed to create publication quality genomic neighborhood plots built on top of DnaFeaturesViewer

    [Example: --annotation_table ./Data/annotation_table.xlsx]
    # Note: Columns are not case sensitive
    Group	Locus_Tag	Color
    9	MASE_00925	#FFFFFF
    9	MASE_00930	#FFFFFF
    9	MASE_00935	#FF0000
    9	MASE_00940	#FFFFFF

    [Example: --feature_table ./Data/CP003841.1.gff3]
    ##sequence-region CP003841.1 1 4653851
    ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=529120
    CP003841.1	Genbank	region	1	4653851	.	+	.	ID=CP003841.1:1..4653851;Dbxref=taxon:529120;Is_circular=true;Name=ANONYMOUS;country=Pacific Ocean: near Hawaii;gbkey=Src;genome=chromosome;isolation-source=seawater surface;mol_type=genomic DNA;old-lineage=Bacteria%3B Proteobacteria%3B Gammaproteobacteria%3B Alteromonadales%3B Alteromonadaceae%3B Alteromonas;strain=ATCC 27126
    CP003841.1	Genbank	gene	473	2065	.	+	.	ID=gene-MASE_00005;Name=MASE_00005;gbkey=Gene;gene_biotype=protein_coding;locus_tag=MASE_00005
    CP003841.1	Genbank	CDS	473	2065	.	+	0	ID=cds-AFS35578.1;Parent=gene-MASE_00005;Dbxref=NCBI_GP:AFS35578.1;Name=AFS35578.1;Note=COG0593 ATPase involved in DNA replication initiation;gbkey=CDS;locus_tag=MASE_00005;product=chromosomal replication initiator protein dnaA;protein_id=AFS35578.1;transl_table=11
    CP003841.1	Genbank	gene	2098	3198	.	+	.	ID=gene-MASE_00010;Name=MASE_00010;gbkey=Gene;gene_biotype=protein_coding;locus_tag=MASE_00010
    CP003841.1	Genbank	CDS	2098	3198	.	+	0	ID=cds-AFS35579.1;Parent=gene-MASE_00010;Dbxref=NCBI_GP:AFS35579.1;Name=AFS35579.1;Note=COG0592 DNA polymerase sliding clamp subunit (PCNA homolog);gbkey=CDS;locus_tag=MASE_00010;product=DNA polymerase III subunit beta;protein_id=AFS35579.1;transl_table=11
    CP003841.1	Genbank	gene	3324	4412	.	+	.	ID=gene-MASE_00015;Name=MASE_00015;gbkey=Gene;gene_biotype=protein_coding;locus_tag=MASE_00015
    CP003841.1	Genbank	CDS	3324	4412	.	+	0	ID=cds-AFS35580.1;Parent=gene-MASE_00015;Dbxref=NCBI_GP:AFS35580.1;Name=AFS35580.1;Note=COG1195 Recombinational DNA repair ATPase (RecF pathway);gbkey=CDS;locus_tag=MASE_00015;product=recombinational DNA repair ATPase;protein_id=AFS35580.1;transl_table=11
    CP003841.1	Genbank	gene	4421	6841	.	+	.	ID=gene-MASE_00020;Name=MASE_00020;gbkey=Gene;gene_biotype=protein_coding;locus_tag=MASE_00020

    [Example commands:]
    ./genomic_neighborhood.py -f ./Data/CP003841.1.gff3 -a ./Data/annotation_table.xlsx -o genomic_neighborhood_output --sheet_name "Fe Responsive"
    """.format(__program__, __version__,
               sys.version.split(" ")[0], sys.executable)
    usage = "{} -f <feature_table> -a <annotation_table> -o <output_directory>".format(
        __program__)
    epilog = "Copyright 2020 Josh L. Espinoza ([email protected]) [BSD-3 License]"

    # Parser
    parser = argparse.ArgumentParser(
        description=description,
        usage=usage,
        epilog=epilog,
        formatter_class=argparse.RawTextHelpFormatter)

    # Features Table
    parser_features = parser.add_argument_group('Feature table arguments')
    parser_features.add_argument(
        "-f",
        "--feature_table",
        type=str,
        help=
        "path/to/feature_table.[gff3,gtf][.gz,.bz2,.zip] (e.g. feature_table.gff3[.gz]) {gff3,gtf}"
    )
    parser_features.add_argument(
        "--field",
        type=str,
        default="locus_tag",
        help=
        "Query feature.  Note: locus_tag is the only feature accepted with current version. [Default: locus_tag]"
    )
    parser_features.add_argument(
        "--feature_format",
        type=str,
        default="infer",
        help="Feature format. [Default: infer] {gff3,gtf}")

    # Annotation Table
    parser_annotations = parser.add_argument_group(
        'Annotation table arguments')
    parser_annotations.add_argument(
        "-a",
        "--annotation_table",
        type=str,
        help=
        "path/to/annotation_table.[ext][.compression] (e.g. annotation_table.tsv[.gz]).  Usable columns are [group, <--field>, color ].  Required column is [<--field>] (e.g. [locus_tag]) {tsv,csv,xlsx}"
    )
    parser_annotations.add_argument(
        "--excel",
        type=str,
        default="infer",
        help="Input table is excel format {true, false, infer} [Default: infer]"
    )
    parser_annotations.add_argument(
        "--sep",
        type=str,
        default="\t",
        help="Separator for input table [Default: '\\t']")
    parser_annotations.add_argument("--sheet_name",
                                    type=str,
                                    help="Sheetname if using excel")

    # Image
    parser_images = parser.add_argument_group('Image arguments')
    parser_images.add_argument("-o",
                               "--output_directory",
                               type=str,
                               help="Output direcotyr [Default: {}]".format(
                                   os.getcwd()))
    parser_images.add_argument(
        "--feature_color",
        type=str,
        default="gray",
        help=
        "Feature color as a hexcode (#929591) or named color (gray).  Note this is the default color and will be overrided if a 'Color' column is provided for `--anotation_table`\nReference: https://matplotlib.org/3.1.0/gallery/color/named_colors.html"
    )
    parser_images.add_argument(
        "--feature_opacity",
        type=str,
        default=0.85,
        help="Feature color opacity. [Default: 0.85] [0.0,..,1.0]")
    parser_images.add_argument(
        "--image_format",
        type=str,
        default="svg",
        help="Image format [Default: svg] {svg,png,pdf}")
    parser_images.add_argument(
        "--show_sequence_record",
        type=str,
        default="f",
        help="Add sequence record title. [Default: false]")
    parser_images.add_argument("--figure_width",
                               type=float,
                               default=20.0,
                               help="Width of figures [Default: 20]")
    parser_images.add_argument(
        "--draw_reference_line",
        type=str,
        default="t",
        help="Draw reference line for features [Default: true]")

    parser_utility = parser.add_argument_group('Utility arguments')
    parser_utility.add_argument("-v",
                                "--version",
                                action='version',
                                version="{} v{}".format(
                                    __program__, __version__))
    parser_utility.add_argument(
        "--citation",
        action='store_true',
        help="If you use this software, please cite the following sources:\n{}"
        .format(__cite__))

    # Options
    opts = parser.parse_args()
    opts.script_directory = script_directory
    opts.script_filename = script_filename
    if opts.citation:
        print(__cite__, file=sys.stderr)
        sys.exit(0)

    print(format_header(__program__), file=sys.stdout)

    # Read in annotations
    if opts.sep in {"comma", "csv"}:
        opts.sep = ","
    if opts.sep in {"tab", "tsv", "t"}:
        opts.sep = "\t"
    if opts.sep in {"\s", "space"}:
        opts.sep = " "

    opts.field = opts.field.lower()
    assert opts.field == "locus_tag", "Currently only `locus_tag` is supported for --field"

    df_annotations = read_dataframe(
        path=opts.annotation_table,
        sep=opts.sep,
        excel=opts.excel,
        sheet_name=opts.sheet_name,
        index_col=None,
    )
    df_annotations.columns = df_annotations.columns.map(
        lambda x: x.strip().lower())
    print("Reading annotation table: {} | {}".format(opts.annotation_table,
                                                     df_annotations.shape),
          file=sys.stderr)

    assert opts.field in df_annotations.columns, "--field ({}) not --feature_table columns".format(
        opts.field)
    if "group" not in df_annotations.columns:
        df_annotations["group"] = None
    if "color" not in df_annotations.columns:
        df_annotations["color"] = opts.feature_color

    # Read in feature table
    opts.feature_format = opts.feature_format.lower()
    if opts.feature_format == "infer":
        if any(x in opts.feature_table for x in {".gff", ".gff3"}):
            opts.feature_format = "gff3"
        if ".gtf" in opts.feature_table:
            opts.feature_format = "gtf"
    assert opts.feature_format != "infer", "Could not infer `feature_format`.  Please specify either {gff3, gtf}"
    assert_acceptable_arguments(opts.feature_format, {"gff", "gff3", "gtf"})
    if opts.feature_format in {"gff", "gff3"}:
        df_features = read_gff3(opts.feature_table)
    if opts.feature_format in {"gtf"}:
        df_features = read_gtf(opts.feature_table)
    print("Reading features table: {} | {}".format(opts.feature_table,
                                                   df_features.shape),
          file=sys.stderr)

    df = df_features.query("seq_type == 'region'")
    d_seq_length = dict(zip(df["seq_record"], df["pos_end"].astype(int)))
    df_features = df_features.loc[
        df_features["seq_type"][lambda x: x == "CDS"].index, :].reset_index()

    # Output directory
    os.makedirs(opts.output_directory, exist_ok=True)

    # Genomic neighborhoods
    d_id_group = dict(zip(df_annotations[opts.field], df_annotations["group"]))
    d_id_color = dict(zip(df_annotations[opts.field], df_annotations["color"]))

    d_group_features = defaultdict(list)
    d_group_positions = defaultdict(list)
    positions = list()
    n_peripheral = 10

    for i, data in df_features.iterrows():
        seq_record = data["seq_record"]
        sequence_length = d_seq_length[seq_record]

        start = int(data["pos_start"])
        end = int(data["pos_end"])
        strand = {"+": +1, "-": -1}[data["sense"]]
        label = data[opts.field]
        if label in d_id_color:
            group = d_id_group[label]
            feature = GraphicFeature(start=start,
                                     end=end,
                                     strand=strand,
                                     label=label,
                                     color=(*to_rgb(d_id_color[label]),
                                            opts.feature_opacity))
            d_group_features[group].append(feature)
            d_group_positions[group] += [start, end]

    lengths = dict()
    limits = dict()
    for group, positions in d_group_positions.items():
        lengths[group] = max(positions) - min(positions)
        limits[group] = (min(positions), max(positions))

    max_length = max(lengths.values())
    pad = max_length // 2 + 10
    for group in d_group_features:
        features = d_group_features[group]
        positions = d_group_positions[group]
        midpoint = np.mean(limits[group])
        record = GraphicRecord(sequence_length=sequence_length,
                               features=features).crop(
                                   (max(midpoint - pad, 0), midpoint + pad))

        ax, results = record.plot(figure_width=opts.figure_width,
                                  draw_line=boolean(opts.draw_reference_line))
        if boolean(opts.show_sequence_record):
            ax.set_title("{} [{}..{}]".format(seq_record, limits[group][0],
                                              limits[group[1]]),
                         fontsize=15,
                         fontweight="bold")
        if group is not None:
            output_filepath = os.path.join(
                opts.output_directory,
                "{}__{}.{}".format(seq_record, group, opts.image_format))
        else:
            output_filepath = os.path.join(
                opts.output_directory, "{}.{}".format(seq_record,
                                                      opts.image_format))

        ax.figure.savefig(output_filepath, dpi=300, bbox_inches="tight")