def test_cropping(): features = [ GraphicFeature(start=5, end=20, strand=+1, color="#ffd700", label="Small feature"), GraphicFeature( start=20, end=500, strand=+1, color="#ffcccc", label="Gene 1 with a very long name", ), GraphicFeature(start=400, end=700, strand=-1, color="#cffccc", label="Gene 2"), GraphicFeature(start=600, end=900, strand=+1, color="#ccccff", label="Gene 3"), ] # PLOT AND EXPORT A LINEAR VIEW OF THE CONSTRUCT record = GraphicRecord(sequence_length=1000, features=features) cropped_record = record.crop((425, 650)) assert len(cropped_record.features) == 3
def contigend_visualization(contig_array, end_direction, genome, reverse_term, drug, rgi_gene): Features = [] temp_array_totrack_length = [] contig_array.reset_index(drop=True, inplace=True) if end_direction == "upward": b = GraphicFeature(start=contig_array["GeneStart"][0] - 1500, end=contig_array["GeneStart"][0], strand=+1, color="#0A090A", label="Contig_Ends") Features.append(b) temp_array_totrack_length.append((contig_array["GeneStart"][0] - 1500, contig_array["GeneStart"][0])) for i in range(len(contig_array)): #print(contig_array["GeneStart"][i]) a = GraphicFeature(start=contig_array["GeneStart"][i], end=contig_array["GeneEnd"][i], strand=contig_array["Strand"][i], color=contig_array["Genecolor"][i], label=str(contig_array["GeneName"][i])) Features.append(a) temp_array_totrack_length.append( (contig_array["GeneStart"][i], contig_array["GeneEnd"][i])) if end_direction == "downward": b = GraphicFeature(start=contig_array["GeneEnd"].iloc[-1] + 1500, end=contig_array["GeneEnd"].iloc[-1] + 1500, strand=+1, color="#0A090A", label="Contig_Ends") Features.append(b) temp_array_totrack_length.append( (contig_array["GeneEnd"].iloc[-1] + 1500, contig_array["GeneEnd"].iloc[-1] + 1500)) length = temp_array_totrack_length[-1][1] - temp_array_totrack_length[0][0] record = GraphicRecord(first_index=temp_array_totrack_length[0][0], sequence_length=length, features=Features) ax, _ = record.plot(figure_width=20, strand_in_label_threshold=7) if (reverse_term == "-1"): ax.invert_xaxis() title = drug + "_" + genome ax.set_title(title) temp_name = str(genome) + ".png" save_path = "contigend_visualizations/" name = os.path.join(save_path, temp_name) ax.figure.savefig(name)
def test_to_biopython_record(): record = GraphicRecord( sequence_length=50, features=[ GraphicFeature(start=5, end=20, strand=+1, label="a"), GraphicFeature(start=20, end=500, strand=+1, label="b"), GraphicFeature(start=400, end=700, strand=-1, label="c"), ], ) biopython_record = record.to_biopython_record(sequence=50 * "A") features = sorted([(f.location.start, f.location.end, f.qualifiers["label"]) for f in biopython_record.features]) assert features == [(5, 20, "a"), (20, 500, "b"), (400, 700, "c")]
def vis_sccmec(faa_file_sccmec, annotation_file, length_sccmec, core_proteins, blastp): # use faa file from prokka annotation on sccmec faa_dict_sccmec = fasta2dict(faa_file_sccmec) # update annotation based on core proteins in cluster datafile = annotation_data(annotation_file) update_datafile = update_annotation(datafile, blastp, faa_dict_sccmec, core_proteins) # create features object to visualisation using dna_features_viewer features = [] for line in update_datafile: id_, sense, start, end, size, length, gene = line if gene == 'core-proteins': color = '#ff8848' label = None else: try: color = colors[gene] except KeyError: color = 'grey' try: label = labels[gene] except KeyError: label = None if '-' in sense: features.append( GraphicFeature(start=int(start), end=int(end), strand=-1, color=color, label=label)) if '+' in sense: features.append( GraphicFeature(start=int(start), end=int(end), strand=+1, color=color, label=label)) record = GraphicRecord(sequence_length=length_sccmec, features=features) ax, _ = record.plot(figure_width=20) id_ = annotation_file.split('_')[-1].split('.')[0] filename = 'SCCmec_{}.png'.format(id_) ax.figure.savefig(filename, dpi=300)
def Visualiser_sekvens(gen): import warnings from Bio import BiopythonParserWarning warnings.simplefilter('ignore', BiopythonParserWarning) if gen == 'alle': fil = 'Artemisia%20annua.gb' graphic_record = ChangeFeatures().translate_record(fil) ax, _ = graphic_record.plot(figure_width=20) ax.figure.tight_layout() elif gen == "aldh1": sequence = "CTGTGTCTAGATTTACGGTTTTGTTGAGTATGGAGTATTTATCCCTGTGTCTAGATTTACGGTTTGAAGACTCAGGAAACTCTCATTAAGCGATCAACGTAGCATGATCATCAAAAGCATGGTTTTGTAAACTCGACATGTCAATGTACCAGCCGATCCAAGTATCCAAGCAATTGGTTCACCACACCAAAAGAGTTTTACACTTAAAAACAACAATTAATTCTAAATAGTCTATGTAATGAAATATGTTTTGTGTGGGTTAGTTTAGTTCATAGTTGCGCCATAAGTATTTACAGCAA" record = GraphicRecord(sequence=sequence, features=[ GraphicFeature(start=0, end=28, strand=+1, color='#ffd700', label="Promotor"), GraphicFeature(start=29, end=299, strand=+1, color="#ffcccc", label="aldh1") ]) ax, _ = record.plot(figure_width=50) record.plot_sequence(ax) record.plot_translation(ax, (29, 299), fontdict={'weight': 'bold'}) elif gen == 'CYP71AV1': sequence = "ATTTTTGGGGGCCCCCCCCCATTTTTTGGGGGGCGCGCGATGAAGTTGGTCATTCGAAATATACTTCCAAAATATGAAGTTGGTCATTCGAAATATACTTCCAAACAACCGAGCTGGTCAGGTAGATTTTGTTTCAGATGAAGATGCAATCCACCGTTGGGGGAGTTTCATGAATAACAATCGCAAATAAGATATATTGTTGATTCTTGATGATGTTTGGTCTGATACCATCATCACCGACCTCCAATTCAGGTCACGTGGATACAAGATCCTCGTGACCTCTGAAACAACCTTTAAGAGATTCGATACATATAAAGTGAGACCTCTCAGTGTTCAAGATGCCATCAATCTGTTATGCTATTCAACACTTTCGGAGCGTGCAAGTCAAGCCACAAATGACATACAGACCTTGTTGACAAGGTGAAATTTCAAATTATTCCAAGATTCATGTTTCATACCTTTATAAGAAAGTAATATCTAAACCATATTAACAAATACTAACAATTAACTTTCAAATGTTTTTGTAGTTAACCAAATGTTGCAAGAAGAATCCGCTCGCCTTAAGTGTCATTGGTGGTCGCCTAAAGGGGACACAAATGGAAAGTTGGCATCATACACTGAAAAAGCTATCTCAAGCCACACACCCTCTTATCGACCTTCCTTTGGATGAGGCAAACAGATTTCATCTCGCAAGAGCTCTCGGTTTACTCAAAGATGATGAACGCAACAGCCCCAGAAGTTCAACCTCGAAATTGACCCGATCTTACCAAGTCA" record = GraphicRecord(sequence=sequence, features=[ GraphicFeature(start=1, end=38, strand=+1, color='#cffccc', label="Promotor"), GraphicFeature(start=39, end=774, strand=+1, color="#cff77d", label="CYP71AV1") ]) ax, _ = record.plot(figure_width=100) record.plot_sequence(ax) record.plot_translation(ax, (39, 774), fontdict={'weight': 'bold'}) return
def print(self, ax=None, with_ruler=True, gfeatures=None, add_contig=True, figure_width=15, file_path=None): # mct.contig = contig_record.id record = self.translator.translate_record(self.seq_record) if add_contig: r = GraphicFeature(start=0, end=len(self.seq_record), strand=+1, color="yellow", label=self.seq_record.id) record.features.append(r) if gfeatures: for gf in gfeatures: record.features.append(gf) cropped_record = record.crop((self.start(), self.end())) if not ax: ax, _ = cropped_record.plot(figure_width=figure_width, with_ruler=with_ruler) else: cropped_record.plot(ax=ax, figure_width=figure_width, with_ruler=with_ruler) if file_path: ax.figure.savefig(file_path)
def plot(self, ax, gr: GenomeRange, **kwargs): self.ax = ax df = self.fetch_plot_data(gr) if self.has_prop("row_filter"): filters = self.properties["row_filter"] for filter_ in filters.split(";"): try: op_idx = list(re.finditer("[=><!]", filter_))[0].start() l_ = filter_[:op_idx].strip() r_ = filter_[op_idx:] df = eval(f'df[df["{l_}"]{r_}]') except IndexError: log.warning(f"row filter {filter_} is not valid.") region_length = gr.end - gr.start len_ratio_th = self.properties["length_ratio_thresh"] df = df[(df["end"] - df["start"]) > region_length * len_ratio_th] features = [] for _, row in df.iterrows(): gf = GraphicFeature( start=row['start'], end=row['end'], strand=(1 if row['strand'] == '+' else -1), label=row['gene_name'], color=random.choice(self.colors), ) features.append(gf) record = GraphicRecord(sequence_length=gr.end - gr.start, features=features, first_index=gr.start) record.plot(ax=ax, with_ruler=False, draw_line=False) self.plot_label()
def contig_visualization_onefile(contig_array, genome, drug, rgi): save_path = "contigend_visualizations_single_genome/" Features = [] temp_array_totrack_length = [] contig_array.reset_index(drop=True, inplace=True) for i in range(len(contig_array)): #print(contig_array["GeneStart"][i]) a = GraphicFeature(start=contig_array["GeneStart"][i], end=contig_array["GeneEnd"][i], strand=contig_array["Strand"][i], color=contig_array["Genecolor"][i], label=str(contig_array["GeneName"][i])) Features.append(a) temp_array_totrack_length.append( (contig_array["GeneStart"][i], contig_array["GeneEnd"][i])) length = temp_array_totrack_length[-1][1] - temp_array_totrack_length[0][0] record = GraphicRecord(first_index=temp_array_totrack_length[0][0], sequence_length=length, features=Features) ax, _ = record.plot(figure_width=20, strand_in_label_threshold=7) temp_name = drug + "_" + str(rgi) + ".png" title = drug + "_" + str(rgi) ax.set_title(title) name = os.path.join(save_path, temp_name) ax.figure.savefig(name)
def contigend_visualization(contig_array, end_direction, genome): Features = [] temp_array_totrack_length = [] contig_array.reset_index(drop=True, inplace=True) if end_direction == "upward": b = GraphicFeature(start=contig_array["GeneStart"][0] - 1500, end=contig_array["GeneStart"][0], strand=+1, color="#0A090A", label="Contig_Ends") Features.append(b) temp_array_totrack_length.append((contig_array["GeneStart"][0] - 1500, contig_array["GeneStart"][0])) for i in range(len(contig_array)): #print(contig_array["GeneStart"][i]) a = GraphicFeature(start=contig_array["GeneStart"][i], end=contig_array["GeneEnd"][i], strand=contig_array["Strand"][i], color=contig_array["Genecolor"][i], label=str(contig_array["GeneName"][i])) Features.append(a) temp_array_totrack_length.append( (contig_array["GeneStart"][i], contig_array["GeneEnd"][i])) if end_direction == "downward": b = GraphicFeature(start=contig_array["GeneEnd"].iloc[-1] + 1500, end=contig_array["GeneEnd"].iloc[-1] + 2500, strand=+1, color="#0A090A", label="Contig_Ends") Features.append(b) temp_array_totrack_length.append( (contig_array["GeneEnd"].iloc[-1] + 1500, contig_array["GeneEnd"].iloc[-1] + 2500)) length = temp_array_totrack_length[-1][1] - temp_array_totrack_length[0][0] record = GraphicRecord(first_index=temp_array_totrack_length[0][0], sequence_length=length, features=Features) ax, _ = record.plot(figure_width=20, strand_in_label_threshold=7) name = str(genome) + ".png" ax.figure.savefig(name)
def add_mutation_feature(self, start, end, label, color="#FF1700"): self._features.append( GraphicFeature(start=start, end=end, strand=+1, color=color, label=label)) self._max_length = max(self._max_length, end)
def create_dna_structure(file_name): results = request.get_json() features = [] for i, spacerRepeat in enumerate(results['spacerRepeats']): features.append(GraphicFeature(start=spacerRepeat['position'], end=spacerRepeat['position']+len(spacerRepeat['repeat']), strand=+1, color="#cffccc", label="Repeat_"+str(i+1))) if 'spacer' in spacerRepeat: features.append(GraphicFeature(start=spacerRepeat['position']+len(spacerRepeat['repeat'])+1, end=spacerRepeat['position']+len(spacerRepeat['repeat'])+spacerRepeat['lengths'][1], strand=+1, color="#ccccff", label="Spacer_"+str(i+1))) record = GraphicRecord(sequence_length=results['length'], features=features) record = record.crop((results['spacerRepeats'][0]['position']-50, results['spacerRepeats'][len(results['spacerRepeats'])-1]['position']+ len(results['spacerRepeats'][len(results['spacerRepeats'])-1]['repeat'])+50)) ax, _ = record.plot(figure_width=10) ax.figure.savefig('static/logos/'+str(file_name)+'.png', bbox_inches='tight') return jsonify('{"success":1}')
def draw_by_dfv(): from dna_features_viewer import GraphicFeature, CircularGraphicRecord import matplotlib.pyplot as plt _feat = lambda name, it: GraphicFeature(it[0], it[1], +1, name) features = [_feat(name, it) for name, it in zip(names, intervals)] record = CircularGraphicRecord(phase_1 + phase_2, features) import ipdb ipdb.set_trace()
def vis_pegRNA2(df,genome_fasta=None,**kwargs): """Given one instance of easy-prime prediction (rawX format), generate DNA visualization Input -------- the data frame contains 4 rows: RTT, PBS, sgRNA, ngRNA """ pegRNA_id = df.index.tolist()[0] variant_id = pegRNA_id.split("_")[0] chr = df['CHROM'][0] start = df['start'].min() start -= start%10 start -= 1 end = df['end'].max() end -= end%10 end += 10 variant_pos = df.POS.min() ref = df.REF[0] alt = df.ALT[0] predicted_efficiency = df.predicted_efficiency[0]*100 pos = variant_pos-start sequence = get_fasta_single(chr,start,end,genome_fasta).upper() fig,ax = plt.subplots() feature_list = [] for s,r in df.iterrows(): r_start = r.start-start r_end = r_start+(r.end-r.start) r_strand = get_strand(r.strand) gf = GraphicFeature(start=r_start, end=r_end, strand=r_strand, color=my_colors[r.type],label=r.type) feature_list.append(gf) record = GraphicRecord(sequence=sequence, features=feature_list) # ax, _ = record.plot(figure_width=int(len(sequence)/5)) record.plot(ax=ax,figure_width=int(len(sequence)/5)) return 0 record.plot_sequence(ax) ax.fill_between((pos-1.5, pos-0.5), +1000, -1000, alpha=0.5,color=my_colors['variant']) locs, labels = plt.xticks() new_labels = [] flag = True for i in locs: if flag: new_labels.append("%s %s"%(chr,int(start+i+1))) flag=False else: new_labels.append(int(start+i+1)) plt.xticks(locs,new_labels) plt.title("ID: %s, CHR: %s, POS: %s, REF: %s, ALT: %s \n Predicted efficiency: %.1f"%(variant_id,chr,variant_pos,ref,alt,predicted_efficiency)+"%") my_stringIObytes = io.BytesIO() ax.figure.savefig(my_stringIObytes, format='png',bbox_inches='tight') my_stringIObytes.seek(0) img_string = base64.b64encode(my_stringIObytes.read()) return "data:image/png;base64,%s"%(img_string.decode("utf-8"))
def haplotype_blocks_fig(model, ref_seq): s1, s2 = model.align_alleles() record = GraphicRecord(sequence=ref_seq, sequence_length=len(ref_seq), features=[ GraphicFeature(start=0, end=len(s1), strand=+1, color='#ffcccc'), GraphicFeature(start=0, end=len(s2), strand=+1, color='#cffccc') ]) ax, _ = record.plot(figure_width=5) record.plot_sequence(ax) record.plot_translation(ax, (8, 23), fontdict={'weight': 'bold'}) ax.figure.savefig('haplotypes.png', bbox_inches='tight')
def visualize_mrna_strand(self, dpi=120, cmap='viridis'): features = [ GraphicFeature(start=0, end=self.tag_length, color=self._colors[0], label='Tag'), GraphicFeature(start=self.tag_length, end=self.total_length, color=self._colors[1], label='Protein'), ] probe = self.probe_loc cmap = cm.get_cmap(cmap) color = np.where(probe == 1)[0] location = np.where(probe == 1)[1] ncolors = probe.shape[0] colors = cmap(np.linspace(.01, .95, ncolors)) colorlabels = ['Color %d' % i for i in range(ncolors)] for c, loc in zip(color, location): features = features + [ GraphicFeature(start=loc, end=loc + 2, color=colors[c], linecolor=colors[c]), ] record = GraphicRecord(sequence_length=self.total_length, features=features) fig, ax = plt.subplots(1, dpi=dpi) for c in range(ncolors): ax.plot([0, 0], [0, 0], color=colors[c]) #fix the legend colors colorlabels = ['Color %d' % i for i in range(ncolors)] ax, _ = record.plot(figure_width=6, ax=ax) ax.axes.legend(colorlabels, loc=7) ax.text(0, 5, 'Transcript Name: %s' % self.name) ax.text(0, 4, 'Total Length: %d codons' % self.total_length) ax.text(0, 3, 'Seq: %s ...' % self.aa_seq[:10]) fig.show()
def test_split_overflowing_features(): features = [ GraphicFeature(start=10, end=20, strand=+1, label="a"), GraphicFeature(start=40, end=55, strand=+1, label="b"), GraphicFeature(start=-20, end=2, strand=+1, label="c"), ] # PLOT AND EXPORT A LINEAR VIEW OF THE CONSTRUCT record = GraphicRecord(sequence_length=50, features=features) record.split_overflowing_features_circularly() new_features_locations_and_labels = sorted([(f.start, f.end, f.label) for f in record.features]) assert new_features_locations_and_labels == [ (0, 2, "c"), (0, 5, "b"), (10, 20, "a"), (30, 49, "c"), (40, 49, "b"), ]
def test_sequence_and_translation_plotting(): from dna_features_viewer import ( GraphicFeature, GraphicRecord, CircularGraphicRecord, ) features = [ GraphicFeature( start=5, end=10, strand=+1, color="#ffd700", label="bbS-1" ), GraphicFeature( start=8, end=15, strand=+1, color="#ffcccc", label="CrC" ), ] record = GraphicRecord(sequence=7 * "ATGC", features=features) ax, _ = record.plot(figure_width=5) record.plot_sequence(ax) record.plot_translation(ax, (8, 23), fontdict={"weight": "bold"})
def Visualize_transciript(exon_table,domain_table,exons_in_interface): features1=[] features2=[] for st,e,rank,idd in zip(exon_table["CDS start"],exon_table["CDS end"],exon_table["Exon rank in transcript"],exon_table["Exon stable ID"]): if not np.isnan(st): if idd not in exons_in_interface: features1.append( GraphicFeature(ax=1,start=st/3, end=e/3, color="#ffd700",label=str(rank))) fend=e/3 else: features1.append( GraphicFeature(ax=1,start=st/3, end=e/3, color="#FF9200",label=str(rank))) fend=e/3 domain_table=domain_table[["Pfam ID","Pfam start","Pfam end","Interactions mediated by the domain"]] domain_table=domain_table.drop_duplicates() for st,e,i in zip(domain_table["Pfam start"],domain_table["Pfam end"],domain_table["Pfam ID"]): if not np.isnan(st): features2.append( GraphicFeature(ax=2,start=st, end=e, color="#ffcccc",label=i)) return features1,features2,fend
def test_by_hand(tmpdir): """Test building a GraphicRecord "by hand" """ features = [ GraphicFeature(start=5, end=20, strand=+1, color="#ffd700", label="Small feature"), GraphicFeature( start=20, end=500, strand=+1, color="#ffcccc", label="Gene 1 with a very long name", ), GraphicFeature(start=400, end=700, strand=-1, color="#cffccc", label="Gene 2"), GraphicFeature(start=600, end=900, strand=+1, color="#ccccff", label="Gene 3"), ] # PLOT AND EXPORT A LINEAR VIEW OF THE CONSTRUCT record = GraphicRecord(sequence_length=1000, features=features) record.plot(figure_width=5, with_ruler=False) # lazy, just for coverage ax, _ = record.plot(figure_width=5) target_file = os.path.join(str(tmpdir), "by_hand.png") ax.figure.savefig(target_file) # PLOT AND EXPORT A CIRCULAR VIEW OF THE CONSTRUCT circular_rec = CircularGraphicRecord(sequence_length=1000, features=features) ax2, _ = circular_rec.plot(figure_width=4) ax2.figure.tight_layout() target_file = os.path.join(str(tmpdir), "by_hand_circular.png") ax2.figure.savefig(target_file, bbox_inches="tight")
def plot_plasmid_features( plasmid_length: int, features: List[Dict[str, Any]], figure_width: int = 5, palette: Optional[Palette] = None, ) -> Tuple[SubplotBase, Tuple[Any, Any]]: """Plots features in a circular dna sequence. Args: plasmid_length (int): Number of nucleotide bases of the plasmid sequence features (List[Dict[str, Any]]): Features as obtained from TeselaGen DNA Sequence object figure_width (int, optional): Width size of figure. Defaults to 5. palette (Optional[Palette], optional): A SecretColors color palette. \ Defaults to None, meaning `Palette("material")` will be used. Returns: Tuple[AxesSubplot, Tuple[Any, Any]]: Axes and a tuple with Graphic features data """ # Define random color palette if palette is None: palette = Palette("material") colors = palette.cycle() # From 'forward' create a 'strand' field if does not exist if 'strand' not in features[0]: features = deepcopy(features) for feat in features: feat.update({'strand': 1 * feat['forward']}) # Create feat objects plot_feats = [ GraphicFeature( start=feat['start'], end=feat['end'], strand=feat['strand'], label=feat['name'], color=next(colors), ) for i, feat in enumerate(features) ] # Make graphic record and plot record = CircularGraphicRecord(sequence_length=plasmid_length, features=plot_feats) ax, _ = record.plot(figure_width=figure_width) # return ax, (features_levels, labels_data) return record.plot(ax)
def add_feature(self, palette='tab10'): if self.features.empty: print("No found genes. do search again") return self._features = [] ft = self.features['type'].unique() colors = sns.color_palette(palette=palette, n_colors=len(ft)).as_hex() self.features['color'] = self.features['type'].map( {t: c for t, c in zip(ft, colors)}) for i, row in self.features.iterrows(): f = GraphicFeature(start=row.start, end=row.end, strand=+1, color=row.color, label=row.group) self._features.append(f)
def visualize_markup(self, index=1): sequences = [] for seq in self.seq: gff_record = next( (x for x in self.gff if str(x.id) == str(seq.id)), None) if gff_record: # Filter 'gene' features genes = [x for x in gff_record.features if x.type == 'gene'] features = [] for gene in genes: start, end, strand = gene.location.start, gene.location.end, gene.location.strand features.append( GraphicFeature(start, end, strand, label=gene.qualifiers['Name'], color="#cffccc")) sequences.append(features) output_file("test.html") record = GraphicRecord(sequence_length=1000, features=sequences[index]) show(record.plot_with_bokeh(figure_width=5))
def plot_align(self, ax, genome_range): gr = genome_range df = self.fetch_intervals(gr) df_ = df[np.bitwise_and(df['flag'], 0b100) == 0] len_thresh = self.properties.get("length_ratio_thresh", 0.005) df_ = df_[df_['seq'].str.len() > (gr.length * len_thresh)] if df_.shape[0] <= 0: return rev_flag = np.bitwise_and(df['flag'], 0b10000) != 0 features = [] for idx, row in df_.iterrows(): start = row['pos'] - gr.start end = row['pos'] + len(row['seq']) - gr.start strand = -1 if rev_flag.iloc[idx] else 1 gf = GraphicFeature( start=start, end=end, strand=strand, color=self.properties['color'], ) features.append(gf) record = GraphicRecord(sequence_length=gr.length, features=features) record.plot(ax=ax, with_ruler=False, draw_line=False)
def plot_align(self, ax, gr: GenomeRange): assert isinstance( gr, GenomeRange), "The input gr should be type GenomeRange" df = self.fetch_plot_data(gr) df_ = df[np.bitwise_and(df['flag'], 0b100) == 0] len_thresh = self.properties["length_ratio_thresh"] df_ = df_[df_['seq'].str.len() > (gr.length * len_thresh)] if df_.shape[0] <= 0: return rev_flag = np.bitwise_and(df['flag'], 0b10000) != 0 features = [] for idx, row in df_.iterrows(): start = row['pos'] - gr.start end = row['pos'] + len(row['seq']) - gr.start strand = -1 if rev_flag.iloc[idx] else 1 gf = GraphicFeature( start=start, end=end, strand=strand, color=self.properties['color'], ) features.append(gf) record = GraphicRecord(sequence_length=gr.length, features=features) record.plot(ax=ax, with_ruler=False, draw_line=False)
def get_map(phage_id, UPLOAD_FOLDER): """Creates and returns a map of the genome. Args: UPLOAD_FOLDER: The folder containing all of the uploaded files. Returns: A dictionary containing an image of the genome map. """ features = [] for cds in db.session.query(Annotations).filter_by( phage_id=phage_id).order_by(Annotations.left): if cds.function != '@DELETED' and cds.status != 'trnaDELETED': if cds.strand == '+': if cds.status == "tRNA": features.append( GraphicFeature(start=cds.left, end=cds.right, strand=+1, color="#7570b3", label=cds.id)) else: features.append( GraphicFeature(start=cds.left, end=cds.right, strand=+1, color="#1b9e77", label=cds.id)) else: if cds.status == "tRNA": features.append( GraphicFeature(start=cds.left, end=cds.right, strand=-1, color="#7570b3", label=cds.id)) else: features.append( GraphicFeature(start=cds.left, end=cds.right, strand=-1, color="#d95f02", label=cds.id)) fasta_file = helper.get_file_path("fasta", UPLOAD_FOLDER) genome = SeqIO.read(fasta_file, "fasta").seq sequence = str(genome) record = GraphicRecord(sequence_length=len(sequence), features=features) ax, _ = record.plot(figure_width=len(sequence) / 1000) ax.figure.savefig(os.path.join(UPLOAD_FOLDER, 'sequence_and_translation.png'), bbox_inches='tight') image_byte_string = "" with open(os.path.join(UPLOAD_FOLDER, 'sequence_and_translation.png'), "rb") as image_file: image_byte_string = base64.b64encode(image_file.read()) response_object = {} response_object['status'] = "success" response_object['image'] = str(image_byte_string) return response_object
from dna_features_viewer import GraphicFeature, GraphicRecord record = GraphicRecord(sequence="ATGCATGCATGCATGCATGCATGCATGC", features=[ GraphicFeature(start=5, end=10, strand=+1, color='#ffcccc'), GraphicFeature(start=8, end=15, strand=+1, color='#ccccff') ]) ax, _ = record.plot(figure_width=5) record.plot_sequence(ax) record.plot_translation(ax, (8, 23), fontdict={'weight': 'bold'}) ax.figure.savefig('sequence_and_translation.png', bbox_inches='tight')
def plot(self, ax=None, plot_coverage=True, plot_reference=False, reference_ax=None, figsize="auto", features_filters=(), features_properties=None, reference_reads_shares="auto"): """Plot the sequencing matches. Useful to get a general overview of the sequencing (coverage, mutations etc.) Parameters ---------- ax Matplotlib ax on which to plot the alignments. If None, one will be automatically created. plot_coverage If True, the plots will display in the background a filled blue line indicating how many times each nucleotide of the sequence is covered by the succesfull alignments. plot_reference If True, a schema of the reference record will be plotted, by default above the reads plot. reference_ax If provided and plot_reference is True, the reference record will be plotted on this ax. figsize Size of the final figure. Leave it to 'auto' for a figure of width 12 and automatically chosen height. Or e.g. (16, 'auto') for a figure of width 12 and automatically chosen height features_filters List of functions (feature=>True/False). Features for which at least one test is False will not appear in the reference record plot. features_properties DNA Features Viewer property functions that can be used to change the appearance of the reference record. reference_reads_shares Relative shares of the pictures that should be occupied by the reference and by the reads. It is an experimental parameter so leave it to 'auto' for now. """ class AnnotationsGraphicTranslator(BiopythonTranslator): def compute_feature_color(self, f): return "#f9d277" def compute_feature_label(self, f): return BiopythonTranslator.compute_feature_label(f)[:20] def compute_filtered_features(self, features): def is_not_parameter(f): label = "".join(f.qualifiers.get('label', '')) return label not in ('cover', 'no_primer') return [f for f in features if is_not_parameter(f)] if plot_reference: translator = AnnotationsGraphicTranslator( features_filters=features_filters, features_properties=features_properties) grecord = translator.translate_record(self.reference) if not self.linear: grecord.split_overflowing_features_circularly() if figsize == "auto": figsize = (12, "auto") if figsize[1] == "auto": sequencing_ax_height = 2 + 0.35 * len(self.read_reference_matches) if not plot_reference: figure_height = sequencing_ax_height else: ref_ax, _ = grecord.plot(with_ruler=False, figure_width=figsize[0]) ref_fig_height = ref_ax.figure.get_size_inches()[1] figure_height = sequencing_ax_height + ref_fig_height if reference_reads_shares == "auto": reference_reads_shares = (int(100 * ref_fig_height), int(100 * sequencing_ax_height)) plt.close(ref_ax.figure) figsize = (figsize[0], figure_height) elif reference_reads_shares == "auto": reference_reads_shares = (1, 2) if plot_reference: if reference_ax is None: gs = gridspec.GridSpec(sum(reference_reads_shares), 1) fig = plt.figure(figsize=figsize, facecolor="w") reference_ax = fig.add_subplot(gs[:reference_reads_shares[0]]) ax = fig.add_subplot(gs[reference_reads_shares[0]:]) grecord.plot(reference_ax, with_ruler=False, annotate_inline=True) self.plot(ax=ax, plot_coverage=plot_coverage, plot_reference=False) ax.set_xlim(reference_ax.get_xlim()) return ax # so the first read in the list gets displayed on top read_reference_matches = OrderedDict( [item for item in list(self.read_reference_matches.items())[::-1]]) L = len(self.reference) if ax is None: fig, ax = plt.subplots(1, figsize=figsize) ax.set_xlim(-2, L) ax.set_ylim(0, len(read_reference_matches) + 2) ax.set_yticks(range(1, len(read_reference_matches) + 1)) ax.set_yticklabels([name for name in read_reference_matches]) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom') gr_record = GraphicRecord(sequence_length=L, features=[]) for i, (read_name, matches) in enumerate(read_reference_matches.items()): y = i + 1 ax.axhline(y, ls=":", lw=0.5, color="#aaaaaa", zorder=-1000) if matches.primer.metadata.get('available', False): color = '#f7a3f6' else: color = "#a3c3f7" for match in matches.read_matches: gr_record.features = [ GraphicFeature(start=match.start, end=match.end, strand=match.strand, color=color) ] gr_record.split_overflowing_features_circularly() for feature in gr_record.features: gr_record.plot_feature(ax, feature, y, linewidth=0.2) for match in matches.primer_matches: feature = GraphicFeature(start=match.start, end=match.end, strand=match.strand, color="#e85558") gr_record.plot_feature(ax, feature, y, linewidth=0.2) if plot_coverage: ax.fill_between(range(len(self.coverage)), self.coverage, zorder=-2000, alpha=0.2, facecolor="#a3c3f7") return ax
def probe_graph(file_name, path): #file_name = "results_riftl_test.txt" #file_name = sys.argv[1] f = open(file_name, "r") probes = [] for line in f: print(line) line_info = line.split(':') #print(line) #print(line_info) if (line_info[0] == "Sequence"): sequence = line_info[1] elif (line_info[0] == "Initiator"): initiator1 = line_info[1].upper() initiator2 = line_info[2].upper() elif (line_info[0] == "Probes"): probes.append(line_info[1].upper()) probes.append(line_info[3].upper()) probes.append(line_info[2]) probes.append(line_info[4]) elif (line_info[0] == "Name"): name = line_info[1].rstrip() file_write = path + "/" + name + "_Probe_Set" ".txt" w = open(file_write, "w+") """print(sequence) print(initiator1) print(initiator2) print(probe1) print(probe2) print(start)""" x = 1 print(probes) print(name) for a in range(0, len(probes), 4): gstart = 28 gend = 3 probe1 = probes[a] probe2 = probes[a + 1] probe1 = Seq(probe1) probe1 = probe1.complement() probe2 = Seq(probe2) probe2 = probe2.complement() probe1 = str(probe1) probe2 = str(probe2) start = probes[a + 2] end = probes[a + 3] start = int(start) start -= 4 if start <= 0: gend = 0 - start - 1 gstart = gend + 25 start = 0 end = int(end) subseq = sequence[start:end] subseq = subseq.upper() record = GraphicRecord( sequence=subseq, features=[ GraphicFeature(start=gstart, end=gend, strand=+1, color='#ffcccc', label=probe1), GraphicFeature(start=gstart + 28, end=gstart + 2, strand=+1, color='#ccccff', label=probe2), GraphicFeature(start=gstart, end=gstart, strand=-1, color='m', label="space"), GraphicFeature(start=gstart + 1, end=gstart + 1, strand=-1, color='m', label="space"), GraphicFeature(start=gstart, end=(gstart - len(initiator1)), strand=-1, color='y', label=initiator1), GraphicFeature(start=gstart + 2, end=(gstart + 2 + len(initiator1)), strand=+1, color='y', label=initiator2) ]) ax, _ = record.plot(figure_width=10) record.plot_sequence(ax) total1 = initiator1 + "TT" + probe1[::-1] total2 = probe2[::-1] + "TT" + initiator2 w.write("PROBE SET" + str(x) + "\n") w.write("Probe1:" + total1 + "\n") w.write("Probe2:" + total2 + "\n") #print(total2) tosave = path + "/" + name + "Plots for Probes" + str(x) x += 1 ax.figure.savefig(tosave, bbox_inches='tight') #break w.close()
"""Generate a 2-plot figure with full sequence on the left, detail of a sub-segment on the right.""" from dna_features_viewer import GraphicFeature, GraphicRecord import matplotlib.pyplot as plt record = GraphicRecord(sequence=250 * "ATGC", features=[ GraphicFeature(start=5, end=20, strand=+1, color="#ffd700", label="Small feature"), GraphicFeature( start=20, end=500, strand=+1, color="#ffcccc", label="Gene 1 with a very long name"), GraphicFeature(start=400, end=700, strand=-1, color="#cffccc", label="Gene 2"), GraphicFeature(start=600, end=900, strand=+1, color="#ccccff", label="Gene 3") ]) zoom_start, zoom_end = 398, 428 # coordinates of the "detail"
print(miR.name) print(str(mir_seed)) for utr in SeqIO.parse(utr_Database, "fasta"): pos = 0 for seq in window(str(utr.seq), len(str(mir_seed))): if (hamming2(str(seq.upper()), str(mir_seed.back_transcribe().reverse_complement())) <= nb_max_mismatch): f1.write(utr.id + "\t" + str(pos) + "\t" + str(pos + len(str(mir_seed))) + "\t" + miR.id + "\t" + str( hamming2( str(seq.upper()), str(mir_seed.back_transcribe(). reverse_complement()))) + "\t" + "+" + "\t" + str(seq.upper()) + "\n") features.append( GraphicFeature(start=pos, end=pos + len(str(mir_seed)), strand=+1, color="#ccccff", label=re.sub(r'mmu-', '', miR.id))) pos = pos + 1 #print(pos) f1.close() record = CircularGraphicRecord(sequence_length=1100, features=features) # record.plot(figure_width=2) plt.show()