def plot(self, ax, gr: GenomeRange, **kwargs): self.ax = ax df = self.fetch_plot_data(gr) if self.has_prop("row_filter"): filters = self.properties["row_filter"] for filter_ in filters.split(";"): try: op_idx = list(re.finditer("[=><!]", filter_))[0].start() l_ = filter_[:op_idx].strip() r_ = filter_[op_idx:] df = eval(f'df[df["{l_}"]{r_}]') except IndexError: log.warning(f"row filter {filter_} is not valid.") region_length = gr.end - gr.start len_ratio_th = self.properties["length_ratio_thresh"] df = df[(df["end"] - df["start"]) > region_length * len_ratio_th] features = [] for _, row in df.iterrows(): gf = GraphicFeature( start=row['start'], end=row['end'], strand=(1 if row['strand'] == '+' else -1), label=row['gene_name'], color=random.choice(self.colors), ) features.append(gf) record = GraphicRecord(sequence_length=gr.end - gr.start, features=features, first_index=gr.start) record.plot(ax=ax, with_ruler=False, draw_line=False) self.plot_label()
def vis_pegRNA2(df,genome_fasta=None,**kwargs): """Given one instance of easy-prime prediction (rawX format), generate DNA visualization Input -------- the data frame contains 4 rows: RTT, PBS, sgRNA, ngRNA """ pegRNA_id = df.index.tolist()[0] variant_id = pegRNA_id.split("_")[0] chr = df['CHROM'][0] start = df['start'].min() start -= start%10 start -= 1 end = df['end'].max() end -= end%10 end += 10 variant_pos = df.POS.min() ref = df.REF[0] alt = df.ALT[0] predicted_efficiency = df.predicted_efficiency[0]*100 pos = variant_pos-start sequence = get_fasta_single(chr,start,end,genome_fasta).upper() fig,ax = plt.subplots() feature_list = [] for s,r in df.iterrows(): r_start = r.start-start r_end = r_start+(r.end-r.start) r_strand = get_strand(r.strand) gf = GraphicFeature(start=r_start, end=r_end, strand=r_strand, color=my_colors[r.type],label=r.type) feature_list.append(gf) record = GraphicRecord(sequence=sequence, features=feature_list) # ax, _ = record.plot(figure_width=int(len(sequence)/5)) record.plot(ax=ax,figure_width=int(len(sequence)/5)) return 0 record.plot_sequence(ax) ax.fill_between((pos-1.5, pos-0.5), +1000, -1000, alpha=0.5,color=my_colors['variant']) locs, labels = plt.xticks() new_labels = [] flag = True for i in locs: if flag: new_labels.append("%s %s"%(chr,int(start+i+1))) flag=False else: new_labels.append(int(start+i+1)) plt.xticks(locs,new_labels) plt.title("ID: %s, CHR: %s, POS: %s, REF: %s, ALT: %s \n Predicted efficiency: %.1f"%(variant_id,chr,variant_pos,ref,alt,predicted_efficiency)+"%") my_stringIObytes = io.BytesIO() ax.figure.savefig(my_stringIObytes, format='png',bbox_inches='tight') my_stringIObytes.seek(0) img_string = base64.b64encode(my_stringIObytes.read()) return "data:image/png;base64,%s"%(img_string.decode("utf-8"))
def demo_dna_features_viewer(): features=[ GraphicFeature(start=0, end=20, strand=+1, color="#ffd700", label="Small feature"), GraphicFeature(start=20, end=500, strand=+1, color="#ffcccc", label="Gene 1 with a very long name"), GraphicFeature(start=400, end=700, strand=-1, color="#cffccc", label="Gene 2"), GraphicFeature(start=600, end=900, strand=+1, color="#ccccff", label="Gene 3") ] record = GraphicRecord(sequence_length=1000, features=features) record.plot(figure_width=5) return plt
def Visualiser_sekvens(gen): import warnings from Bio import BiopythonParserWarning warnings.simplefilter('ignore', BiopythonParserWarning) if gen == 'alle': fil = 'Artemisia%20annua.gb' graphic_record = ChangeFeatures().translate_record(fil) ax, _ = graphic_record.plot(figure_width=20) ax.figure.tight_layout() elif gen == "aldh1": sequence = "CTGTGTCTAGATTTACGGTTTTGTTGAGTATGGAGTATTTATCCCTGTGTCTAGATTTACGGTTTGAAGACTCAGGAAACTCTCATTAAGCGATCAACGTAGCATGATCATCAAAAGCATGGTTTTGTAAACTCGACATGTCAATGTACCAGCCGATCCAAGTATCCAAGCAATTGGTTCACCACACCAAAAGAGTTTTACACTTAAAAACAACAATTAATTCTAAATAGTCTATGTAATGAAATATGTTTTGTGTGGGTTAGTTTAGTTCATAGTTGCGCCATAAGTATTTACAGCAA" record = GraphicRecord(sequence=sequence, features=[ GraphicFeature(start=0, end=28, strand=+1, color='#ffd700', label="Promotor"), GraphicFeature(start=29, end=299, strand=+1, color="#ffcccc", label="aldh1") ]) ax, _ = record.plot(figure_width=50) record.plot_sequence(ax) record.plot_translation(ax, (29, 299), fontdict={'weight': 'bold'}) elif gen == 'CYP71AV1': sequence = "ATTTTTGGGGGCCCCCCCCCATTTTTTGGGGGGCGCGCGATGAAGTTGGTCATTCGAAATATACTTCCAAAATATGAAGTTGGTCATTCGAAATATACTTCCAAACAACCGAGCTGGTCAGGTAGATTTTGTTTCAGATGAAGATGCAATCCACCGTTGGGGGAGTTTCATGAATAACAATCGCAAATAAGATATATTGTTGATTCTTGATGATGTTTGGTCTGATACCATCATCACCGACCTCCAATTCAGGTCACGTGGATACAAGATCCTCGTGACCTCTGAAACAACCTTTAAGAGATTCGATACATATAAAGTGAGACCTCTCAGTGTTCAAGATGCCATCAATCTGTTATGCTATTCAACACTTTCGGAGCGTGCAAGTCAAGCCACAAATGACATACAGACCTTGTTGACAAGGTGAAATTTCAAATTATTCCAAGATTCATGTTTCATACCTTTATAAGAAAGTAATATCTAAACCATATTAACAAATACTAACAATTAACTTTCAAATGTTTTTGTAGTTAACCAAATGTTGCAAGAAGAATCCGCTCGCCTTAAGTGTCATTGGTGGTCGCCTAAAGGGGACACAAATGGAAAGTTGGCATCATACACTGAAAAAGCTATCTCAAGCCACACACCCTCTTATCGACCTTCCTTTGGATGAGGCAAACAGATTTCATCTCGCAAGAGCTCTCGGTTTACTCAAAGATGATGAACGCAACAGCCCCAGAAGTTCAACCTCGAAATTGACCCGATCTTACCAAGTCA" record = GraphicRecord(sequence=sequence, features=[ GraphicFeature(start=1, end=38, strand=+1, color='#cffccc', label="Promotor"), GraphicFeature(start=39, end=774, strand=+1, color="#cff77d", label="CYP71AV1") ]) ax, _ = record.plot(figure_width=100) record.plot_sequence(ax) record.plot_translation(ax, (39, 774), fontdict={'weight': 'bold'}) return
def contig_visualization_onefile(contig_array, genome, drug, rgi): save_path = "contigend_visualizations_single_genome/" Features = [] temp_array_totrack_length = [] contig_array.reset_index(drop=True, inplace=True) for i in range(len(contig_array)): #print(contig_array["GeneStart"][i]) a = GraphicFeature(start=contig_array["GeneStart"][i], end=contig_array["GeneEnd"][i], strand=contig_array["Strand"][i], color=contig_array["Genecolor"][i], label=str(contig_array["GeneName"][i])) Features.append(a) temp_array_totrack_length.append( (contig_array["GeneStart"][i], contig_array["GeneEnd"][i])) length = temp_array_totrack_length[-1][1] - temp_array_totrack_length[0][0] record = GraphicRecord(first_index=temp_array_totrack_length[0][0], sequence_length=length, features=Features) ax, _ = record.plot(figure_width=20, strand_in_label_threshold=7) temp_name = drug + "_" + str(rgi) + ".png" title = drug + "_" + str(rgi) ax.set_title(title) name = os.path.join(save_path, temp_name) ax.figure.savefig(name)
def contigend_visualization(contig_array, end_direction, genome, reverse_term, drug, rgi_gene): Features = [] temp_array_totrack_length = [] contig_array.reset_index(drop=True, inplace=True) if end_direction == "upward": b = GraphicFeature(start=contig_array["GeneStart"][0] - 1500, end=contig_array["GeneStart"][0], strand=+1, color="#0A090A", label="Contig_Ends") Features.append(b) temp_array_totrack_length.append((contig_array["GeneStart"][0] - 1500, contig_array["GeneStart"][0])) for i in range(len(contig_array)): #print(contig_array["GeneStart"][i]) a = GraphicFeature(start=contig_array["GeneStart"][i], end=contig_array["GeneEnd"][i], strand=contig_array["Strand"][i], color=contig_array["Genecolor"][i], label=str(contig_array["GeneName"][i])) Features.append(a) temp_array_totrack_length.append( (contig_array["GeneStart"][i], contig_array["GeneEnd"][i])) if end_direction == "downward": b = GraphicFeature(start=contig_array["GeneEnd"].iloc[-1] + 1500, end=contig_array["GeneEnd"].iloc[-1] + 1500, strand=+1, color="#0A090A", label="Contig_Ends") Features.append(b) temp_array_totrack_length.append( (contig_array["GeneEnd"].iloc[-1] + 1500, contig_array["GeneEnd"].iloc[-1] + 1500)) length = temp_array_totrack_length[-1][1] - temp_array_totrack_length[0][0] record = GraphicRecord(first_index=temp_array_totrack_length[0][0], sequence_length=length, features=Features) ax, _ = record.plot(figure_width=20, strand_in_label_threshold=7) if (reverse_term == "-1"): ax.invert_xaxis() title = drug + "_" + genome ax.set_title(title) temp_name = str(genome) + ".png" save_path = "contigend_visualizations/" name = os.path.join(save_path, temp_name) ax.figure.savefig(name)
def show_feature(self, figure_width=8, xlabel=""): if len(self._features) < 1: print("No feautres to show") return record = GraphicRecord(sequence_length=self._max_length, features=self._features) ax, _ = record.plot(figure_width=figure_width) ax.set_xlabel(xlabel, fontweight="bold", fontsize=16) return ax
def test_by_hand(tmpdir): """Test building a GraphicRecord "by hand" """ features = [ GraphicFeature(start=5, end=20, strand=+1, color="#ffd700", label="Small feature"), GraphicFeature( start=20, end=500, strand=+1, color="#ffcccc", label="Gene 1 with a very long name", ), GraphicFeature(start=400, end=700, strand=-1, color="#cffccc", label="Gene 2"), GraphicFeature(start=600, end=900, strand=+1, color="#ccccff", label="Gene 3"), ] # PLOT AND EXPORT A LINEAR VIEW OF THE CONSTRUCT record = GraphicRecord(sequence_length=1000, features=features) record.plot(figure_width=5, with_ruler=False) # lazy, just for coverage ax, _ = record.plot(figure_width=5) target_file = os.path.join(str(tmpdir), "by_hand.png") ax.figure.savefig(target_file) # PLOT AND EXPORT A CIRCULAR VIEW OF THE CONSTRUCT circular_rec = CircularGraphicRecord(sequence_length=1000, features=features) ax2, _ = circular_rec.plot(figure_width=4) ax2.figure.tight_layout() target_file = os.path.join(str(tmpdir), "by_hand_circular.png") ax2.figure.savefig(target_file, bbox_inches="tight")
def Protein_structure(ID,exons,domains,path,trID,exons_in_interface): #save Image of protein Structure features1,features2,fend=Visualize_transciript(exons,domains,exons_in_interface) fig, (ax1, ax2) = plt.subplots( 2, 1,figsize=(16, 3.5)) record = GraphicRecord(sequence_length=fend, features=features1,) record.plot(ax=ax1,figure_width=23,with_ruler=False) record = GraphicRecord(sequence_length=fend, features=features2,) record.plot(ax=ax2,figure_width=23,with_ruler=True,annotate_inline=True) ax1.title.set_text('Coding Exons') ax1.title.set_position([.5, -0.4]) ax2.title.set_text('Pfam Domains') ax2.title.set_position([.5, -0.5]) fig.savefig(path+trID, bbox_inches='tight') return
def plot_align(self, ax, genome_range): gr = genome_range df = self.fetch_intervals(gr) df_ = df[np.bitwise_and(df['flag'], 0b100) == 0] len_thresh = self.properties.get("length_ratio_thresh", 0.005) df_ = df_[df_['seq'].str.len() > (gr.length * len_thresh)] if df_.shape[0] <= 0: return rev_flag = np.bitwise_and(df['flag'], 0b10000) != 0 features = [] for idx, row in df_.iterrows(): start = row['pos'] - gr.start end = row['pos'] + len(row['seq']) - gr.start strand = -1 if rev_flag.iloc[idx] else 1 gf = GraphicFeature( start=start, end=end, strand=strand, color=self.properties['color'], ) features.append(gf) record = GraphicRecord(sequence_length=gr.length, features=features) record.plot(ax=ax, with_ruler=False, draw_line=False)
def vis_sccmec(faa_file_sccmec, annotation_file, length_sccmec, core_proteins, blastp): # use faa file from prokka annotation on sccmec faa_dict_sccmec = fasta2dict(faa_file_sccmec) # update annotation based on core proteins in cluster datafile = annotation_data(annotation_file) update_datafile = update_annotation(datafile, blastp, faa_dict_sccmec, core_proteins) # create features object to visualisation using dna_features_viewer features = [] for line in update_datafile: id_, sense, start, end, size, length, gene = line if gene == 'core-proteins': color = '#ff8848' label = None else: try: color = colors[gene] except KeyError: color = 'grey' try: label = labels[gene] except KeyError: label = None if '-' in sense: features.append( GraphicFeature(start=int(start), end=int(end), strand=-1, color=color, label=label)) if '+' in sense: features.append( GraphicFeature(start=int(start), end=int(end), strand=+1, color=color, label=label)) record = GraphicRecord(sequence_length=length_sccmec, features=features) ax, _ = record.plot(figure_width=20) id_ = annotation_file.split('_')[-1].split('.')[0] filename = 'SCCmec_{}.png'.format(id_) ax.figure.savefig(filename, dpi=300)
def plot_align(self, ax, gr: GenomeRange): assert isinstance( gr, GenomeRange), "The input gr should be type GenomeRange" df = self.fetch_plot_data(gr) df_ = df[np.bitwise_and(df['flag'], 0b100) == 0] len_thresh = self.properties["length_ratio_thresh"] df_ = df_[df_['seq'].str.len() > (gr.length * len_thresh)] if df_.shape[0] <= 0: return rev_flag = np.bitwise_and(df['flag'], 0b10000) != 0 features = [] for idx, row in df_.iterrows(): start = row['pos'] - gr.start end = row['pos'] + len(row['seq']) - gr.start strand = -1 if rev_flag.iloc[idx] else 1 gf = GraphicFeature( start=start, end=end, strand=strand, color=self.properties['color'], ) features.append(gf) record = GraphicRecord(sequence_length=gr.length, features=features) record.plot(ax=ax, with_ruler=False, draw_line=False)
def contigend_visualization(contig_array, end_direction, genome): Features = [] temp_array_totrack_length = [] contig_array.reset_index(drop=True, inplace=True) if end_direction == "upward": b = GraphicFeature(start=contig_array["GeneStart"][0] - 1500, end=contig_array["GeneStart"][0], strand=+1, color="#0A090A", label="Contig_Ends") Features.append(b) temp_array_totrack_length.append((contig_array["GeneStart"][0] - 1500, contig_array["GeneStart"][0])) for i in range(len(contig_array)): #print(contig_array["GeneStart"][i]) a = GraphicFeature(start=contig_array["GeneStart"][i], end=contig_array["GeneEnd"][i], strand=contig_array["Strand"][i], color=contig_array["Genecolor"][i], label=str(contig_array["GeneName"][i])) Features.append(a) temp_array_totrack_length.append( (contig_array["GeneStart"][i], contig_array["GeneEnd"][i])) if end_direction == "downward": b = GraphicFeature(start=contig_array["GeneEnd"].iloc[-1] + 1500, end=contig_array["GeneEnd"].iloc[-1] + 2500, strand=+1, color="#0A090A", label="Contig_Ends") Features.append(b) temp_array_totrack_length.append( (contig_array["GeneEnd"].iloc[-1] + 1500, contig_array["GeneEnd"].iloc[-1] + 2500)) length = temp_array_totrack_length[-1][1] - temp_array_totrack_length[0][0] record = GraphicRecord(first_index=temp_array_totrack_length[0][0], sequence_length=length, features=Features) ax, _ = record.plot(figure_width=20, strand_in_label_threshold=7) name = str(genome) + ".png" ax.figure.savefig(name)
def create_dna_structure(file_name): results = request.get_json() features = [] for i, spacerRepeat in enumerate(results['spacerRepeats']): features.append(GraphicFeature(start=spacerRepeat['position'], end=spacerRepeat['position']+len(spacerRepeat['repeat']), strand=+1, color="#cffccc", label="Repeat_"+str(i+1))) if 'spacer' in spacerRepeat: features.append(GraphicFeature(start=spacerRepeat['position']+len(spacerRepeat['repeat'])+1, end=spacerRepeat['position']+len(spacerRepeat['repeat'])+spacerRepeat['lengths'][1], strand=+1, color="#ccccff", label="Spacer_"+str(i+1))) record = GraphicRecord(sequence_length=results['length'], features=features) record = record.crop((results['spacerRepeats'][0]['position']-50, results['spacerRepeats'][len(results['spacerRepeats'])-1]['position']+ len(results['spacerRepeats'][len(results['spacerRepeats'])-1]['repeat'])+50)) ax, _ = record.plot(figure_width=10) ax.figure.savefig('static/logos/'+str(file_name)+'.png', bbox_inches='tight') return jsonify('{"success":1}')
def haplotype_blocks_fig(model, ref_seq): s1, s2 = model.align_alleles() record = GraphicRecord(sequence=ref_seq, sequence_length=len(ref_seq), features=[ GraphicFeature(start=0, end=len(s1), strand=+1, color='#ffcccc'), GraphicFeature(start=0, end=len(s2), strand=+1, color='#cffccc') ]) ax, _ = record.plot(figure_width=5) record.plot_sequence(ax) record.plot_translation(ax, (8, 23), fontdict={'weight': 'bold'}) ax.figure.savefig('haplotypes.png', bbox_inches='tight')
def visualize_mrna_strand(self, dpi=120, cmap='viridis'): features = [ GraphicFeature(start=0, end=self.tag_length, color=self._colors[0], label='Tag'), GraphicFeature(start=self.tag_length, end=self.total_length, color=self._colors[1], label='Protein'), ] probe = self.probe_loc cmap = cm.get_cmap(cmap) color = np.where(probe == 1)[0] location = np.where(probe == 1)[1] ncolors = probe.shape[0] colors = cmap(np.linspace(.01, .95, ncolors)) colorlabels = ['Color %d' % i for i in range(ncolors)] for c, loc in zip(color, location): features = features + [ GraphicFeature(start=loc, end=loc + 2, color=colors[c], linecolor=colors[c]), ] record = GraphicRecord(sequence_length=self.total_length, features=features) fig, ax = plt.subplots(1, dpi=dpi) for c in range(ncolors): ax.plot([0, 0], [0, 0], color=colors[c]) #fix the legend colors colorlabels = ['Color %d' % i for i in range(ncolors)] ax, _ = record.plot(figure_width=6, ax=ax) ax.axes.legend(colorlabels, loc=7) ax.text(0, 5, 'Transcript Name: %s' % self.name) ax.text(0, 4, 'Total Length: %d codons' % self.total_length) ax.text(0, 3, 'Seq: %s ...' % self.aa_seq[:10]) fig.show()
def test_sequence_and_translation_plotting(): from dna_features_viewer import ( GraphicFeature, GraphicRecord, CircularGraphicRecord, ) features = [ GraphicFeature( start=5, end=10, strand=+1, color="#ffd700", label="bbS-1" ), GraphicFeature( start=8, end=15, strand=+1, color="#ffcccc", label="CrC" ), ] record = GraphicRecord(sequence=7 * "ATGC", features=features) ax, _ = record.plot(figure_width=5) record.plot_sequence(ax) record.plot_translation(ax, (8, 23), fontdict={"weight": "bold"})
output_file_utr ]), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p.wait() features = [] with open(output_file_utr) as fp: next(fp) for line in fp: print(line) content = line.split("\t") features.append( GraphicFeature(start=int(content[3]), end=int(content[4]), strand=+1, color=get_color(content[8]), label=re.sub(r'mmu-', '', content[1]))) record = GraphicRecord(sequence_length=len(str(utr.seq)), features=features) # Circular record.plot(figure_width=12) plt.title(' '.join([utr.id, 'sequence'])) patch1 = mpatches.Patch(color="#00ff99", label='6mer') patch2 = mpatches.Patch(color="#9999ff", label='7mer-1a') patch3 = mpatches.Patch(color="#ff66cc", label='7mer-m8') patch4 = mpatches.Patch(color="#ff0000", label='8mer-1a') plt.legend(handles=[patch1, patch2, patch3, patch4]) plt.show()
def probe_graph(file_name, path): #file_name = "results_riftl_test.txt" #file_name = sys.argv[1] f = open(file_name, "r") probes = [] for line in f: print(line) line_info = line.split(':') #print(line) #print(line_info) if (line_info[0] == "Sequence"): sequence = line_info[1] elif (line_info[0] == "Initiator"): initiator1 = line_info[1].upper() initiator2 = line_info[2].upper() elif (line_info[0] == "Probes"): probes.append(line_info[1].upper()) probes.append(line_info[3].upper()) probes.append(line_info[2]) probes.append(line_info[4]) elif (line_info[0] == "Name"): name = line_info[1].rstrip() file_write = path + "/" + name + "_Probe_Set" ".txt" w = open(file_write, "w+") """print(sequence) print(initiator1) print(initiator2) print(probe1) print(probe2) print(start)""" x = 1 print(probes) print(name) for a in range(0, len(probes), 4): gstart = 28 gend = 3 probe1 = probes[a] probe2 = probes[a + 1] probe1 = Seq(probe1) probe1 = probe1.complement() probe2 = Seq(probe2) probe2 = probe2.complement() probe1 = str(probe1) probe2 = str(probe2) start = probes[a + 2] end = probes[a + 3] start = int(start) start -= 4 if start <= 0: gend = 0 - start - 1 gstart = gend + 25 start = 0 end = int(end) subseq = sequence[start:end] subseq = subseq.upper() record = GraphicRecord( sequence=subseq, features=[ GraphicFeature(start=gstart, end=gend, strand=+1, color='#ffcccc', label=probe1), GraphicFeature(start=gstart + 28, end=gstart + 2, strand=+1, color='#ccccff', label=probe2), GraphicFeature(start=gstart, end=gstart, strand=-1, color='m', label="space"), GraphicFeature(start=gstart + 1, end=gstart + 1, strand=-1, color='m', label="space"), GraphicFeature(start=gstart, end=(gstart - len(initiator1)), strand=-1, color='y', label=initiator1), GraphicFeature(start=gstart + 2, end=(gstart + 2 + len(initiator1)), strand=+1, color='y', label=initiator2) ]) ax, _ = record.plot(figure_width=10) record.plot_sequence(ax) total1 = initiator1 + "TT" + probe1[::-1] total2 = probe2[::-1] + "TT" + initiator2 w.write("PROBE SET" + str(x) + "\n") w.write("Probe1:" + total1 + "\n") w.write("Probe2:" + total2 + "\n") #print(total2) tosave = path + "/" + name + "Plots for Probes" + str(x) x += 1 ax.figure.savefig(tosave, bbox_inches='tight') #break w.close()
strand=-1, color="#cffccc", label="Gene 2"), GraphicFeature(start=600, end=900, strand=+1, color="#ccccff", label="Gene 3") ]) zoom_start, zoom_end = 398, 428 # coordinates of the "detail" cropped_record = record.crop((zoom_start, zoom_end)) fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 3)) # PLOT THE WHOLE SEQUENCE ax1.set_title("Whole sequence", loc='left', weight='bold') record.plot(ax=ax1) ax1.fill_between((zoom_start, zoom_end), +1000, -1000, alpha=0.15) # PLOT THE SEQUENCE DETAILS cropped_record.plot(ax=ax2) cropped_record.plot_sequence(ax=ax2) cropped_record.plot_translation(ax=ax2, location=(408, 423), fontdict={'weight': 'bold'}) ax2.set_title("Sequence detail", loc='left', weight='bold') fig.savefig('overview_and_detail.png', bbox_inches='tight')
def upload(request): posted = False sequences = [] ids = [] organisms = [] gene_sequence = '' locations = [] all_locations = [] matched_sequences = [] matched_organisms = [] database = [] matched_database = [] matched_ids = [] features = [] zipped = {} figure_name = '' sequence_nos = [] i = 0 if request.method == 'POST': posted = True uploaded_file = request.FILES['document'] position = int(request.POST.get('position')) #position = int(position)*3 with open('myapp/Jaspar.txt', "r") as file: for line in file: line = line.split(';') ids.append(line[0]) sequences.append(line[1]) organisms.append(line[2]) database.append(line[3].rstrip('\n')) for line in uploaded_file: line = line.decode('utf-8') gene_sequence += line gene_sequence = gene_sequence[0:position] for sequence in sequences: locations = [ m.start() for m in re.finditer(sequence, gene_sequence) ] if locations != []: matched_sequences.append(sequence) all_locations.append(locations) matched_organisms.append(organisms[i]) matched_database.append(database[i]) matched_ids.append(ids[i]) for location in locations: features.append( GraphicFeature(start=location, end=location + len(sequence), strand=+1, color="#ffd700", label=sequence)) i += 1 record = GraphicRecord(sequence_length=len(gene_sequence), features=features) ax, _ = record.plot(figure_width=30) figure_name = uploaded_file.name + str(position) + '.png' ax.figure.savefig('myapp/static/' + uploaded_file.name + str(position) + '.png', bbox_inches='tight') print(matched_sequences) print(matched_organisms) print(all_locations) print(matched_database) sequence_nos = list(range(len(matched_sequences))) zipped = tuple( zip(matched_ids, matched_sequences, matched_organisms, matched_database, all_locations, sequence_nos)) return render( request, 'upload.html', { 'posted': posted, 'zipped': zipped, 'figure_name': figure_name, 'gene_sequence': gene_sequence })
strand=+1, color="#ffcccc", label="Gene 1 with a very long name"), GraphicFeature(start=400, end=700, strand=-1, color="#cffccc", label="Gene 2"), GraphicFeature(start=600, end=900, strand=+1, color="#ccccff", label="Gene 3") ] record = GraphicRecord(sequence_length=1000, features=features) record.plot(figure_width=5) from dna_features_viewer import BiopythonTranslator graphic_record = BiopythonTranslator().translate_record( "../Callithrix_Analysis/DATA/!CLEAN/YFV_polyprotein_AFH35044.gb") ax, _ = graphic_record.plot(figure_width=10) graphic_record.features = graphic_record.features[2:-1] ax, _ = graphic_record.plot(figure_width=10) dir(graphic_record.features[0]) graphic_record.features[0].label = "capsid" graphic_record.features[0].color = "red" graphic_record.features[1].label = "propep"
def plot_bgc_genes(query_id, BGCs_dict, BGC_measure, num_candidates=10, sharex=True, labels=False, dist_method="centroid", spacing=1): """ Plot bgc genes for visual comparison """ # Select chosen distance methods if dist_method == "centroid": candidates_idx = BGC_measure.list_similars_ctr_idx[ query_id, :num_candidates] candidates_dist = BGC_measure.list_similars_ctr[ query_id, :num_candidates] elif dist_method == "pca": candidates_idx = BGC_measure.list_similars_pca_idx[ query_id, :num_candidates] candidates_dist = BGC_measure.list_similars_pca[ query_id, :num_candidates] elif dist_method == "autoencoder": candidates_idx = BGC_measure.list_similars_ae_idx[ query_id, :num_candidates] candidates_dist = BGC_measure.list_similars_ae[ query_id, :num_candidates] elif dist_method == "lda": candidates_idx = BGC_measure.list_similars_lda_idx[ query_id, :num_candidates] candidates_dist = BGC_measure.list_similars_lda[ query_id, :num_candidates] elif dist_method == "lsi": candidates_idx = BGC_measure.list_similars_lsi_idx[ query_id, :num_candidates] candidates_dist = BGC_measure.list_similars_lsi[ query_id, :num_candidates] elif dist_method == "doc2vec": candidates_idx = BGC_measure.list_similars_d2v_idx[ query_id, :num_candidates] candidates_dist = BGC_measure.list_similars_d2v[ query_id, :num_candidates] else: print("Chosen distance measuring method not found.") keys = [] for key, value in BGCs_dict.items(): keys.append(key) BGC_genes = [] for i, candidate_id in enumerate(candidates_idx): key = keys[candidate_id] BGC_genes.append(BGCs_dict[key]["genes"]) # Collect all notes and types of the bgcs found_types = [] notes_found = [] for genes in BGC_genes: for feature in genes: found_types.append(feature[3]) if feature[2] != []: note = feature[2].replace(":", " ").split() note = [note[1], note[2]] notes_found.append(note) notes_unique = list(set(list(zip(*notes_found))[0])) selected_colors = get_spaced_colors(len(notes_unique) + 1) # fig = plt.figure(figsize=(8, 3.*num_plots)) fig, ax0 = plt.subplots(len(BGC_genes), 1, figsize=(10, spacing * num_candidates), sharex=sharex) fig.suptitle("Gene feature comparison (similarity measure: " + dist_method + ")") max_xlim = max([x[-1][1][1] for x in BGC_genes]) for i, genes in enumerate(BGC_genes): record = [] features = [] for feature in genes: if feature[2] != []: color = selected_colors[notes_unique.index(feature[2].replace( ":", " ").split()[1])] else: color = "black" if labels: label = feature[0] else: label = None features.append( GraphicFeature(start=feature[1][0], end=feature[1][1], strand=feature[1][2], color=color, label=label, thickness=9, linewidth=0.5, fontdict={"size": 9})) record = GraphicRecord(sequence_length=features[-1].end, features=features) record.plot(ax=ax0[i], with_ruler=True) # ax0[i].set_title("BGC no. " + str(int(candidates["id"][i])) ) info1 = "BGC no. %d " % candidates_idx[i] info2 = dist_method + " similarity = %.3f" % candidates_dist[i] ax0[i].text(0.02, 0.75, info1 + info2, size=10, ha="left", transform=ax0[i].transAxes) if sharex: ax0[i].set_xlim([ax0[i].get_xlim()[0], max_xlim])
def mibig_viewer(mibig_dir, mibig_transporters, bgc, to_label=[ 'ABC_tran', 'BPD_transp_1', 'TonB_dep_Rec', 'ABC_membrane', 'ACR_tran', 'FecCD', 'ABC2_membrane', 'MatE', 'OEP', 'FtsX', 'MFS_3', 'MFS_1', 'ABC2_membrane_3', 'MacB_PCD', 'ABC2_membrane_4', 'MMPL', 'BPD_transp_2', 'Peripla_BP_2', 'SBP_bac_1', 'Peripla_BP_4', 'SBP_bac_5', 'SBP_bac_8' ]): from Bio import SeqIO from dna_features_viewer import GraphicFeature, GraphicRecord transporters = {} f = open(mibig_transporters) for line in f.readlines(): if not line.startswith("#"): if line.split()[0] not in transporters: transporters[line.split()[0]] = line.split()[1] else: transporters[line.split( )[0]] = transporters[line.split()[0]] + " " + line.split()[1] f.close() features = [] colors = { 'biosynthetic': "#850000", 'biosynthetic-additional': "#ea8686", 'other': "#dbdbdb", 'regulatory': "#7cd369", 'resistance': "#307321", 'transport': "#3c85cd" } genes = [] length = 0 last_end = 0 i = 0 for record in SeqIO.parse( mibig_dir.rstrip("/") + "/" + bgc + ".gbk", "genbank"): for feature in record.features: if feature.type == 'CDS': i += 1 feature_name = bgc + "_" + str(i) try: color = colors[feature.qualifiers['gene_kind'][0]] except: color = colors['other'] if feature.location.start < last_end: start = last_end + 1 else: start = feature.location.start if feature_name in transporters: found = False for transporter in to_label: if transporter in transporters[feature_name]: genes.append( GraphicFeature( start=start, end=feature.location.end, strand=feature.location.strand, color=color, label=transporters[feature_name])) found = True break if not found: genes.append( GraphicFeature(start=start, end=feature.location.end, strand=feature.location.strand, color=color)) else: genes.append( GraphicFeature(start=start, end=feature.location.end, strand=feature.location.strand, color=color)) if feature.location.end > length: length = feature.location.end last_end = feature.location.end record = GraphicRecord(sequence_length=length, features=genes) record.plot(figure_width=15)
def show_crispr_grna_results( sequence: str, guides: List[dict], indexes: Optional[List[int]] = None, scoreField: str = "onTargetScore", ): """Shows guide rnas results for CRISPR. Args: sequence (str): A string containing the complete organism sequence guides (dict): A table on 'records' format that contains guides info.\ The required fields are `start` (int), `end` (ind), indicating the limits of the guide in sequence's index. indexes(list): Indexes (start and end) of the targeted sequence within the complete sequence. \ If not set, the targeting sequence is not shown. scoreField (str): Select which score from GRNA tool show in the chart. \ Available scores are "onTargetScore" (default) and "offTargetScore" """ targeting_seq_feat = [] # Show main targeted sequence if index are set. If not, we calculate indexes to limit plot range at the `crop` # instruction. if indexes is not None: targeting_seq_feat = [ GraphicFeature( start=indexes[0], end=indexes[1], color="#cffccc", label="Sequence", strand=+1, ), ] else: # TODO(diegovalenzuelaiturra): Check behavior is the same when using generators instead of lists. # indexes = [min([x['start'] for x in guides]), max([x['end'] for x in guides])] indexes = [ min(x['start'] for x in guides), max(x['end'] for x in guides) ] # Plot records record = GraphicRecord( sequence=sequence, features=targeting_seq_feat + [ GraphicFeature( start=x['start'], end=x['end'] + 1, color="#ffcccc", label=f"{scoreField}: {x[scoreField]}", strand=+1 if x['forward'] else -1, ) for x in guides ], ) # Limit plot range record = record.crop((indexes[0] - 10, indexes[1] + 11)) # crop # Plot and set to show sequence ax, _ = record.plot(figure_width=20) record.plot_sequence(ax)
#!/usr/bin/env python36 # -*- coding: UTF-8 -*- from dna_features_viewer import GraphicFeature, GraphicRecord startP = 29909037 seq_len = 4625 inputA = 'ENST00000396634_HLA-A_exons.txt' seqFeat = [] fileH = open(inputA, 'r') for line in fileH: x = line.rstrip().split('\t') chrName = x[0] s = int(x[3]) - startP e = int(x[4]) - startP seqFeat.append( GraphicFeature(start=s, end=e + 1, strand=+1, color='#ffcccc')) fileH.close() #record = GraphicRecord(sequence= seq.replace('\n', ''), features= seqFeat) record = GraphicRecord(sequence_length=seq_len, features=seqFeat) #ax,_ = record.plot(figure_width= 120) ax, _ = record.plot(figure_width=18) #record.plot_sequence(ax) record.plot(ax) ax.figure.savefig('Gene_sequence_hlaA_exons.png', bbox_inches='tight')
import matplotlib.pyplot as plt from dna_features_viewer import (GraphicFeature, GraphicRecord, CircularGraphicRecord) features = [ GraphicFeature(start=20, end=500, strand=+1, color="#ffcccc", label="Gene 1 with a name"), GraphicFeature(start=400, end=700, strand=-1, color="#cffccc", label="Gene 2"), GraphicFeature(start=600, end=900, strand=+1, color="#0000ff", label="Gene 3"), ] record = GraphicRecord(sequence_length=1000, features=features) record.default_box_color = None record.default_font_family = 'Walter Turncoat' with plt.xkcd(): plt.rcParams["font.family"] = 'Permanent Marker' # ruler font plt.rcParams["xtick.labelsize"] = 'small' ax, _ = record.plot(figure_width=5, annotate_inline=False) ax.figure.tight_layout() ax.figure.savefig("cartoon_style.png", dpi=200)
def main(): st.cache(persist=True) def Promoter_Selection(strength): '''Loads the Anderson promoters' data as a pd.DataFrame, with a given strength computes the difference with the reported strength, checks for compability with the selected standards and returns the Id, Sequence and relative strength as dict''' promoter_df=pd.read_csv('Andersonpromoters.csv') promoter_df=promoter_df.dropna(axis=0,how='any') promoter_df['Distance']=abs(promoter_df['Measured Strengthb']-strength) while True: promoter_index=promoter_df['Distance'].idxmin(axis=1) if test_standard(promoter_df.loc[promoter_index,'Sequencea'],enzyme_set): promoter_data={'id':promoter_df.loc[promoter_index,'Identifier'],'seq':promoter_df.loc[promoter_index,'Sequencea'],'Strength':promoter_df.loc[promoter_index,'Measured Strengthb']} return promoter_data else: promoter_df=promoter_df.drop(promoter_index,axis=0) st.cache(persist=True) def RBS_Selection(strength): '''Loads a data set with data from selected BioBrick RBS as a pd.DataFrame, with a given strength computes the difference with the reported strength, checks for compability with the selected standards and returns the Id, Sequence and relative strength as dict''' RBS_df=pd.read_csv('RBS.csv') RBS_df=RBS_df.dropna(axis=0,how='any') RBS_df['Distance']=abs(RBS_df['Strength']-strength) while True: RBS_index=RBS_df['Distance'].idxmin(axis=1) if test_standard(RBS_df.loc[RBS_index,'Sequence'],enzyme_set): RBS_data={'id':RBS_df.loc[RBS_index,'Identifier'],'seq':RBS_df.loc[RBS_index,'Sequence'],'Strength':RBS_df.loc[RBS_index,'Strength']} return RBS_data else: RBS_df=RBS_df.drop(RBS_index) def MFE_Toehold(seq): '''Return the MFE (kcal/mol) of the RNA secondary strucure of a given sequence''' MFE_calc=Popen('C:\Program Files (x86)\ViennaRNA Package\RNAfold.exe', stdin=PIPE, stdout=PIPE) Result=MFE_calc.communicate(seq.encode()) return float(Result[0][-9:-3]) def MFE_Hybridization(seq_toehold,seq_target): '''Return the MFE (kcal/mol) of the hybridization of two given RNA sequences''' MFE_calc=Popen('C:\Program Files (x86)\ViennaRNA Package\RNAcofold.exe', stdin=PIPE, stdout=PIPE) Input='>Seq_toehold\n'+seq_toehold+'\n>Seq_target\n'+seq_target Result=MFE_calc.communicate(Input.encode()) return float(Result[0][-9:-3]) @st.cache(persist=True) def train_classifier(treshold): toehold_df=pd.read_excel('Toehold_Data_Processed.xlsx') toehold_df['Class']=np.where(toehold_df['On/Off ratio']>=treshold, 1,0) X_values=toehold_df.loc[:,'MFE Toehold':'MFE Hybridization'] y_values=toehold_df['Class'] clf=LogisticRegression().fit(X_values, y_values) return clf #@st.cache(persist=True) def ToeholdSequence_gen(seq,pool,rbs,treshold): '''From a given DNA sequences generates a pool of n random subsequences, checks them for standard compability and specificity, assembles the toehold switch according to the specification by Green et al., 2014, calculares the MFE of the secondary structure and hybridation, predicts is it would be over the minimum (treshold) On/Off ratio and returns the best candidate''' seq=seq.lower() targets=[] toeholds=[] reversed_targets=[] mutated_targets=[] for _ in range(pool): n=randint(0,len(seq)-30) target=seq[n:n+30] target_dna=Seq(target,generic_dna) target_reverse=str(target_dna.reverse_complement()) target_mutated=target[:6]+'ATG'+target[9:18] target_assembled=target_reverse+'CAAG'+rbs['seq']+target_mutated+'AACCTGGCGGCAGCGCAAAAG' if test_standard(target_assembled,enzyme_set): if BLAST_test(target,organism): targets.append(target) toeholds.append(target_assembled) reversed_targets.append(target_reverse) mutated_targets.append(target_mutated) toehold_df=pd.DataFrame({'Target':targets,'Toehold':toeholds,'Reversed':reversed_targets,'Mutated':mutated_targets}) Toehold_values=[] Hibrid_values=[] for ind in list(toehold_df.index.values): Toehold_values.append(MFE_Toehold(toehold_df.loc[ind,'Toehold'])) Hibrid_values.append(MFE_Hybridization(toehold_df.loc[ind,'Toehold'],toehold_df.loc[ind,'Target'])) toehold_df['MFE Toehold']=Toehold_values toehold_df['MFE Hybridization']=Hibrid_values x=toehold_df.loc[:,'MFE Toehold':'MFE Hybridization'] clf=train_classifier(treshold) y_pred=clf.predict_proba(x) probabilities=[c[1] for c in y_pred] toehold_df['class']=probabilities toehold_index=toehold_df['class'].idxmin(axis=1) toehold={'Toehold_seq':toehold_df.loc[toehold_index,'Toehold'],'seq_target':toehold_df.loc[toehold_index,'Target'],'id_reversed':'NA','seq_reversed':toehold_df.loc[toehold_index,'Reversed'],'id_spacer':'NA','seq_spacer':'CAAG','id_mutated':'NA','seq_mutated':toehold_df.loc[toehold_index,'Mutated'],'id_linker':'NA','seq_linker':'AACCTGGCGGCAGCGCAAAAG'} return toehold @st.cache(persist=True) def get_standard(standard_list): '''Create a set containing the prohibited restriction sites (subsequences) from a givin standard list''' enzyme_set=set() if 'RFC10' in standard_list: enzyme_set.update(['gaattc','tctaga','actagt','ctgcag','gcggccgc']) if 'RFC12' in standard_list: enzyme_set.update(['gaattc','actagt','gctagc','ctgcag','gcggccgc','cagctg','ctcgag','tctaga','gctcttc','gaagagc']) if 'RFC21' in standard_list: enzyme_set.update(['gaattc','agatct','ggatcc','ctcgag']) return enzyme_set def test_standard(seq,enzyme_set): '''Given a DNA sequence and a set of prohibited restriction site, checks for the prohibited sites within the sequence, returns True if no prohibited sites where detected''' seq=seq.lower() for enz in enzyme_set: if enz in seq: return False return True def BLAST_test(seq,organism): '''Runs a BLAST to test wether a given sequence is present at a given organism, returns False if it's present''' organism=organism.lower() result_handle=NCBIWWW.qblast('blastn','nt',seq) blast_record=NCBIXML.read(result_handle) for alignment in blast_record.alignments: for hsp in alignment.hsps: if int(hsp.identities)==len(seq): seq_title=alignment.title seq_title=seq_title.lower() if organism in seq_title: return False return True @st.cache(persist=True) def Check_DNA(seq): '''Check if a given sequence is a valid DNA sequence, returns True if it is a valid DNA sequence''' seq=seq.lower() for base in seq: if base not in 'atcg': return False return True @st.cache(persist=True) def load_reporters(standard_list): ''' Loads a selected reporter proetin dataset as pd.DataFrame, checks or standard compliances and drop the noncompliant instances, returns a pd.DataFrame with the compliant instances''' standard_list=list(standard_list) reporter_df=pd.read_excel('Reporters.xlsx') reporter_df=reporter_df.drop_duplicates('Sequence') for ind in list(reporter_df.index.values): Not_complient=False for standard in standard_list: if standard not in reporter_df.loc[ind,'Standard']: Not_complient=True break if Not_complient: reporter_df.drop(ind) return reporter_df @st.cache(persist=True) def Assembler(promoter,rbs,toehold,output_df): '''Assemble the BioBrick from the given parts, returns the assembled sequence and the annotation table''' output_df=output_df.reset_index() types=['BioBrick prefix','Promoter',"Trigger'",'Spacer','RBS','Toehold complement','Linker','Output','Terminator','BioBrick suffix'] seq='GAATTCGCGGCCGCTTCTAGAG'+promoter['seq']+'TACTAGAG'+toehold['Toehold_seq']+output_df.loc[0,'Sequence']+'TACTAGAG'+'tcacactggctcaccttcgggtgggcctttctgcgtttatatactagagagagaatataaaaagccagattattaatccggcttttttattattt'+'TACTAGTAGCGGCCGCTGCAG' ids=['BioBrick prefix',promoter['id'],toehold['id_reversed'],toehold['id_spacer'],rbs['id'],toehold['id_mutated'],toehold['id_linker'],output_df.loc[0,'Id'],'BBa_B0014','BioBrick suffix'] Starts=[1,24] Ends=[23] MoreInfo=['NA','Strength: '+str(promoter['Strength']),'NA','Green et al., 2014','Strength: '+str(rbs['Strength']),'NA','Green et al., 2014',output_df.loc[0,'Description'],'Double E.coli terminator, widely used','NA'] Sequences=['GAATTCGCGGCCGCTTCTAGAG',promoter['seq'],toehold['seq_reversed'],toehold['seq_spacer'],rbs['seq'],toehold['seq_mutated'],toehold['seq_linker'],output_df.loc[0,'Sequence'],'tcacactggctcaccttcgggtgggcctttctgcgtttatatactagagagagaatataaaaagccagattattaatccggcttttttattattt','TACTAGTAGCGGCCGCTGCAG'] Ends.append(Starts[-1]+len(promoter['seq'])) Starts.append(Ends[-1]+9) Ends.append(Starts[-1]+len(toehold['seq_reversed'])) Starts.append(Ends[-1]+1) Ends.append(Starts[-1]+len(toehold['seq_spacer'])) Starts.append(Ends[-1]+1) Ends.append(Starts[-1]+len(rbs['seq'])) Starts.append(Ends[-1]+1) Ends.append(Starts[-1]+len(toehold['seq_mutated'])) Starts.append(Ends[-1]+1) Ends.append(Starts[-1]+len(toehold['seq_linker'])) Starts.append(Ends[-1]+1) Ends.append(Starts[-1]+len(output_df.loc[0,'Sequence'])) Starts.append(Ends[-1]+9) Ends.append(Starts[-1]+95) Starts.append(Ends[-1]+1) Ends.append(Starts[-1]+21) assembled_df=pd.DataFrame({'Type':types,'BioBrick Id':ids,'Start':Starts,'End':Ends,'More Information':MoreInfo,'Sequence':Sequences}) return seq, assembled_df @st.cache(persist=True) def to_excel(df): output = BytesIO() writer = pd.ExcelWriter(output, engine='xlsxwriter') df.to_excel(writer, sheet_name='Sheet1') writer.save() processed_data = output.getvalue() return processed_data @st.cache(persist=True) def get_table_download_link(df): """Generates a link allowing the data in a given panda dataframe to be downloaded in: dataframe out: href string """ val = to_excel(df) b64 = base64.b64encode(val) # val looks like b'...' return f'<a href="data:application/octet-stream;base64,{b64.decode()}" download="Toeholdswitch.xlsx">Download as Excel</a>' st.title('BioBrick Builder: Toeholdswitch designer') st.sidebar.title('BioBrick Builder: Toeholdswitch designer') st.sidebar.markdown('Fill out the fields with your preferences for your toeholdswitch part') st.sidebar.subheader('For which organism is your part?') organism=st.sidebar.selectbox('Organism',('Escherichia coli','Saccharomyces cerevisiae','Insects','Arabidopsis thaliana')) st.sidebar.markdown('') st.sidebar.subheader('Standards') standards=st.sidebar.multiselect('Select the standards that you part should be compatible with',('RFC10','RFC21')) enzyme_set=get_standard(standards) st.sidebar.markdown('') st.sidebar.subheader('Promoter') promoter_strength=st.sidebar.slider('Desired promoter strength (relative to BBa_J23100)',0.0,1.0,key='promoter_strength') st.sidebar.markdown('') st.sidebar.subheader('RBS') RBS_strength=st.sidebar.slider('Desired RBS strength (relative to BBa_B0034)',0.0,1.0,key='RBS_strength') st.sidebar.markdown('') st.sidebar.subheader('Target Sequence') target_sequence=st.sidebar.text_area('Enter your target sequence') if not Check_DNA(target_sequence): st.sidebar.markdown('Please enter a valid target sequence') st.sidebar.markdown('Enter the size of the pool to generate the toeholdswitch') pool=st.sidebar.number_input('Size of the pool to generate',3,10000,step=10) st.sidebar.markdown('Enter the minimum acceptable On/Off ratio') treshold=st.sidebar.number_input('Minimum acceptable On/Off ratio',10.0,70.0,step=5.0) st.sidebar.markdown('') st.sidebar.subheader('Output fron the toholdswitch') output_type=st.sidebar.selectbox('Type of output',('Reported BioBrick reporter protein','Your own output protein')) if output_type=='Your own output protein': output_sequence=st.sidebar.text_area('Enter your output sequence') if not Check_DNA(output_sequence): st.sidebar.markdown('Please enter a valid output sequence') elif not test_standard(output_sequence,enzyme_set): st.sidebar.markdown('Sequence not compatible with the selected standards') else: output_df=pd.DataFrame({'Id':'NA','Description':'User-defined output sequence','Sequence':output_sequence},index=[0]) else: reporter_df=load_reporters(standards) reporters=tuple(reporter_df['Description']) st.sidebar.markdown('Select the desired reporter protein') output_name=st.sidebar.selectbox('Reporter protein',reporters) reporter_index=reporter_df.index[reporter_df['Description']==output_name].tolist() reporter_index=int(reporter_index[0]) output_df=reporter_df[reporter_index:reporter_index+1] output_df.columns=['Id','Description','Sequence','Standard'] if st.sidebar.button('Assemble BioBrick',key='Assemble BioBrick'): promoter=Promoter_Selection(promoter_strength) rbs=RBS_Selection(RBS_strength) toehold=ToeholdSequence_gen(target_sequence,pool,rbs,treshold) final_seq,result_df=Assembler(promoter,rbs,toehold,output_df) st.subheader('Results') st.markdown('Plain Sequence') st.markdown('') st.write("5'-"+final_seq[:int(len(final_seq)/2)]) st.write(final_seq[int(len(final_seq)/2):]+"-3'") st.markdown('') st.markdown('Annotation Table') st.write(result_df.set_index('Type')) st.markdown(get_table_download_link(result_df), unsafe_allow_html=True) st.markdown('Map') colors=['#ADD8E6','#00FF00','#00FFFF','#FFFFFF','#008000','#0000FF','#FFA500','#800080','#FF0000','#ADD8E6'] features=[] for ind in list(result_df.index.values): features.append(GraphicFeature(start=result_df.loc[ind,'Start'], end=result_df.loc[ind,'End'], strand=+1,color=colors[ind],label=result_df.loc[ind,'Type'])) record=GraphicRecord(sequence_length=len(final_seq), features=features) record.plot(figure_width=5) st.pyplot()
from dna_features_viewer import GraphicFeature, GraphicRecord record = GraphicRecord(sequence="ATGCATGCATGCATGCATGCATGCATGC", features=[ GraphicFeature(start=5, end=10, strand=+1, color='#ffcccc'), GraphicFeature(start=8, end=15, strand=+1, color='#ccccff') ]) ax, _ = record.plot(figure_width=5) record.plot_sequence(ax) record.plot_translation(ax, (8, 23), fontdict={'weight': 'bold'}) ax.figure.savefig('sequence_and_translation.png', bbox_inches='tight')
def get_map(phage_id, UPLOAD_FOLDER): """Creates and returns a map of the genome. Args: UPLOAD_FOLDER: The folder containing all of the uploaded files. Returns: A dictionary containing an image of the genome map. """ features = [] for cds in db.session.query(Annotations).filter_by( phage_id=phage_id).order_by(Annotations.left): if cds.function != '@DELETED' and cds.status != 'trnaDELETED': if cds.strand == '+': if cds.status == "tRNA": features.append( GraphicFeature(start=cds.left, end=cds.right, strand=+1, color="#7570b3", label=cds.id)) else: features.append( GraphicFeature(start=cds.left, end=cds.right, strand=+1, color="#1b9e77", label=cds.id)) else: if cds.status == "tRNA": features.append( GraphicFeature(start=cds.left, end=cds.right, strand=-1, color="#7570b3", label=cds.id)) else: features.append( GraphicFeature(start=cds.left, end=cds.right, strand=-1, color="#d95f02", label=cds.id)) fasta_file = helper.get_file_path("fasta", UPLOAD_FOLDER) genome = SeqIO.read(fasta_file, "fasta").seq sequence = str(genome) record = GraphicRecord(sequence_length=len(sequence), features=features) ax, _ = record.plot(figure_width=len(sequence) / 1000) ax.figure.savefig(os.path.join(UPLOAD_FOLDER, 'sequence_and_translation.png'), bbox_inches='tight') image_byte_string = "" with open(os.path.join(UPLOAD_FOLDER, 'sequence_and_translation.png'), "rb") as image_file: image_byte_string = base64.b64encode(image_file.read()) response_object = {} response_object['status'] = "success" response_object['image'] = str(image_byte_string) return response_object
def main(args=None): # Path info script_directory = os.path.dirname(os.path.abspath(__file__)) script_filename = sys.argv[0].split("/")[-1] description = """ Running: {} v{} via Python v{} | {}\n Versatile command-line tool designed to create publication quality genomic neighborhood plots built on top of DnaFeaturesViewer [Example: --annotation_table ./Data/annotation_table.xlsx] # Note: Columns are not case sensitive Group Locus_Tag Color 9 MASE_00925 #FFFFFF 9 MASE_00930 #FFFFFF 9 MASE_00935 #FF0000 9 MASE_00940 #FFFFFF [Example: --feature_table ./Data/CP003841.1.gff3] ##sequence-region CP003841.1 1 4653851 ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=529120 CP003841.1 Genbank region 1 4653851 . + . ID=CP003841.1:1..4653851;Dbxref=taxon:529120;Is_circular=true;Name=ANONYMOUS;country=Pacific Ocean: near Hawaii;gbkey=Src;genome=chromosome;isolation-source=seawater surface;mol_type=genomic DNA;old-lineage=Bacteria%3B Proteobacteria%3B Gammaproteobacteria%3B Alteromonadales%3B Alteromonadaceae%3B Alteromonas;strain=ATCC 27126 CP003841.1 Genbank gene 473 2065 . + . ID=gene-MASE_00005;Name=MASE_00005;gbkey=Gene;gene_biotype=protein_coding;locus_tag=MASE_00005 CP003841.1 Genbank CDS 473 2065 . + 0 ID=cds-AFS35578.1;Parent=gene-MASE_00005;Dbxref=NCBI_GP:AFS35578.1;Name=AFS35578.1;Note=COG0593 ATPase involved in DNA replication initiation;gbkey=CDS;locus_tag=MASE_00005;product=chromosomal replication initiator protein dnaA;protein_id=AFS35578.1;transl_table=11 CP003841.1 Genbank gene 2098 3198 . + . ID=gene-MASE_00010;Name=MASE_00010;gbkey=Gene;gene_biotype=protein_coding;locus_tag=MASE_00010 CP003841.1 Genbank CDS 2098 3198 . + 0 ID=cds-AFS35579.1;Parent=gene-MASE_00010;Dbxref=NCBI_GP:AFS35579.1;Name=AFS35579.1;Note=COG0592 DNA polymerase sliding clamp subunit (PCNA homolog);gbkey=CDS;locus_tag=MASE_00010;product=DNA polymerase III subunit beta;protein_id=AFS35579.1;transl_table=11 CP003841.1 Genbank gene 3324 4412 . + . ID=gene-MASE_00015;Name=MASE_00015;gbkey=Gene;gene_biotype=protein_coding;locus_tag=MASE_00015 CP003841.1 Genbank CDS 3324 4412 . + 0 ID=cds-AFS35580.1;Parent=gene-MASE_00015;Dbxref=NCBI_GP:AFS35580.1;Name=AFS35580.1;Note=COG1195 Recombinational DNA repair ATPase (RecF pathway);gbkey=CDS;locus_tag=MASE_00015;product=recombinational DNA repair ATPase;protein_id=AFS35580.1;transl_table=11 CP003841.1 Genbank gene 4421 6841 . + . ID=gene-MASE_00020;Name=MASE_00020;gbkey=Gene;gene_biotype=protein_coding;locus_tag=MASE_00020 [Example commands:] ./genomic_neighborhood.py -f ./Data/CP003841.1.gff3 -a ./Data/annotation_table.xlsx -o genomic_neighborhood_output --sheet_name "Fe Responsive" """.format(__program__, __version__, sys.version.split(" ")[0], sys.executable) usage = "{} -f <feature_table> -a <annotation_table> -o <output_directory>".format( __program__) epilog = "Copyright 2020 Josh L. Espinoza ([email protected]) [BSD-3 License]" # Parser parser = argparse.ArgumentParser( description=description, usage=usage, epilog=epilog, formatter_class=argparse.RawTextHelpFormatter) # Features Table parser_features = parser.add_argument_group('Feature table arguments') parser_features.add_argument( "-f", "--feature_table", type=str, help= "path/to/feature_table.[gff3,gtf][.gz,.bz2,.zip] (e.g. feature_table.gff3[.gz]) {gff3,gtf}" ) parser_features.add_argument( "--field", type=str, default="locus_tag", help= "Query feature. Note: locus_tag is the only feature accepted with current version. [Default: locus_tag]" ) parser_features.add_argument( "--feature_format", type=str, default="infer", help="Feature format. [Default: infer] {gff3,gtf}") # Annotation Table parser_annotations = parser.add_argument_group( 'Annotation table arguments') parser_annotations.add_argument( "-a", "--annotation_table", type=str, help= "path/to/annotation_table.[ext][.compression] (e.g. annotation_table.tsv[.gz]). Usable columns are [group, <--field>, color ]. Required column is [<--field>] (e.g. [locus_tag]) {tsv,csv,xlsx}" ) parser_annotations.add_argument( "--excel", type=str, default="infer", help="Input table is excel format {true, false, infer} [Default: infer]" ) parser_annotations.add_argument( "--sep", type=str, default="\t", help="Separator for input table [Default: '\\t']") parser_annotations.add_argument("--sheet_name", type=str, help="Sheetname if using excel") # Image parser_images = parser.add_argument_group('Image arguments') parser_images.add_argument("-o", "--output_directory", type=str, help="Output direcotyr [Default: {}]".format( os.getcwd())) parser_images.add_argument( "--feature_color", type=str, default="gray", help= "Feature color as a hexcode (#929591) or named color (gray). Note this is the default color and will be overrided if a 'Color' column is provided for `--anotation_table`\nReference: https://matplotlib.org/3.1.0/gallery/color/named_colors.html" ) parser_images.add_argument( "--feature_opacity", type=str, default=0.85, help="Feature color opacity. [Default: 0.85] [0.0,..,1.0]") parser_images.add_argument( "--image_format", type=str, default="svg", help="Image format [Default: svg] {svg,png,pdf}") parser_images.add_argument( "--show_sequence_record", type=str, default="f", help="Add sequence record title. [Default: false]") parser_images.add_argument("--figure_width", type=float, default=20.0, help="Width of figures [Default: 20]") parser_images.add_argument( "--draw_reference_line", type=str, default="t", help="Draw reference line for features [Default: true]") parser_utility = parser.add_argument_group('Utility arguments') parser_utility.add_argument("-v", "--version", action='version', version="{} v{}".format( __program__, __version__)) parser_utility.add_argument( "--citation", action='store_true', help="If you use this software, please cite the following sources:\n{}" .format(__cite__)) # Options opts = parser.parse_args() opts.script_directory = script_directory opts.script_filename = script_filename if opts.citation: print(__cite__, file=sys.stderr) sys.exit(0) print(format_header(__program__), file=sys.stdout) # Read in annotations if opts.sep in {"comma", "csv"}: opts.sep = "," if opts.sep in {"tab", "tsv", "t"}: opts.sep = "\t" if opts.sep in {"\s", "space"}: opts.sep = " " opts.field = opts.field.lower() assert opts.field == "locus_tag", "Currently only `locus_tag` is supported for --field" df_annotations = read_dataframe( path=opts.annotation_table, sep=opts.sep, excel=opts.excel, sheet_name=opts.sheet_name, index_col=None, ) df_annotations.columns = df_annotations.columns.map( lambda x: x.strip().lower()) print("Reading annotation table: {} | {}".format(opts.annotation_table, df_annotations.shape), file=sys.stderr) assert opts.field in df_annotations.columns, "--field ({}) not --feature_table columns".format( opts.field) if "group" not in df_annotations.columns: df_annotations["group"] = None if "color" not in df_annotations.columns: df_annotations["color"] = opts.feature_color # Read in feature table opts.feature_format = opts.feature_format.lower() if opts.feature_format == "infer": if any(x in opts.feature_table for x in {".gff", ".gff3"}): opts.feature_format = "gff3" if ".gtf" in opts.feature_table: opts.feature_format = "gtf" assert opts.feature_format != "infer", "Could not infer `feature_format`. Please specify either {gff3, gtf}" assert_acceptable_arguments(opts.feature_format, {"gff", "gff3", "gtf"}) if opts.feature_format in {"gff", "gff3"}: df_features = read_gff3(opts.feature_table) if opts.feature_format in {"gtf"}: df_features = read_gtf(opts.feature_table) print("Reading features table: {} | {}".format(opts.feature_table, df_features.shape), file=sys.stderr) df = df_features.query("seq_type == 'region'") d_seq_length = dict(zip(df["seq_record"], df["pos_end"].astype(int))) df_features = df_features.loc[ df_features["seq_type"][lambda x: x == "CDS"].index, :].reset_index() # Output directory os.makedirs(opts.output_directory, exist_ok=True) # Genomic neighborhoods d_id_group = dict(zip(df_annotations[opts.field], df_annotations["group"])) d_id_color = dict(zip(df_annotations[opts.field], df_annotations["color"])) d_group_features = defaultdict(list) d_group_positions = defaultdict(list) positions = list() n_peripheral = 10 for i, data in df_features.iterrows(): seq_record = data["seq_record"] sequence_length = d_seq_length[seq_record] start = int(data["pos_start"]) end = int(data["pos_end"]) strand = {"+": +1, "-": -1}[data["sense"]] label = data[opts.field] if label in d_id_color: group = d_id_group[label] feature = GraphicFeature(start=start, end=end, strand=strand, label=label, color=(*to_rgb(d_id_color[label]), opts.feature_opacity)) d_group_features[group].append(feature) d_group_positions[group] += [start, end] lengths = dict() limits = dict() for group, positions in d_group_positions.items(): lengths[group] = max(positions) - min(positions) limits[group] = (min(positions), max(positions)) max_length = max(lengths.values()) pad = max_length // 2 + 10 for group in d_group_features: features = d_group_features[group] positions = d_group_positions[group] midpoint = np.mean(limits[group]) record = GraphicRecord(sequence_length=sequence_length, features=features).crop( (max(midpoint - pad, 0), midpoint + pad)) ax, results = record.plot(figure_width=opts.figure_width, draw_line=boolean(opts.draw_reference_line)) if boolean(opts.show_sequence_record): ax.set_title("{} [{}..{}]".format(seq_record, limits[group][0], limits[group[1]]), fontsize=15, fontweight="bold") if group is not None: output_filepath = os.path.join( opts.output_directory, "{}__{}.{}".format(seq_record, group, opts.image_format)) else: output_filepath = os.path.join( opts.output_directory, "{}.{}".format(seq_record, opts.image_format)) ax.figure.savefig(output_filepath, dpi=300, bbox_inches="tight")