def weblogoPOIM(logofile, poim, max_len): """instead of plotting the POIM heatmap, create a weblogo from the 1st-degree poim""" warnings.filterwarnings('ignore', ' This call to matplotlib.use()*') from corebio.data import rna_letters, dna_letters, amino_acid_letters from weblogolib import LogoData, LogoOptions, LogoFormat, classic, png_print_formatter #print "WEBLOGO!" #print "Writing ", logofile #print poim[0] positive_logo = [] negative_logo = [] for i in xrange(len(poim[0])): positive_logo.append([]) negative_logo.append([]) for j in xrange(len(poim[0][i])): if poim[0][i][j] < 0: positive_logo[i].append(0) negative_logo[i].append(poim[0][i][j] * -10000) else: negative_logo[i].append(0) positive_logo[i].append(poim[0][i][j] * 1000) #print "Positive logo: ", positive_logo #print "Negative logo: ", negative_logo pos_data = LogoData.from_counts('ACGT', numpy.array(positive_logo).T, None) neg_data = LogoData.from_counts("ACGT", numpy.array(negative_logo).T, None) neg_opt = LogoOptions() neg_opt.fineprint += " from KIRMES POIM data" #logoopt.number_interval = 5 neg_opt.small_fontsize = 4 neg_opt.title_fontsize = 8 neg_opt.scale_width = False title = os.path.split(logofile)[1] title = title[:title.rfind(".")] if "_" in title: title = title[title.rfind("_") + 1:] neg_opt.logo_title = title + " Negative Logo" neg_format = LogoFormat(neg_data, neg_opt) pos_opt = LogoOptions() #pos_opt.show_ends = True pos_opt.scale_width = False pos_opt.logo_title = title + " Positive Sequence Logo" pos_opt.show_fineprint = False pos_opt.color_scheme = classic pos_format = LogoFormat(pos_data, pos_opt) neg_logo = open(logofile + "n.png", 'w') png_print_formatter(neg_data, neg_format, neg_logo) neg_logo.close() pos_logo = open(logofile + "p.png", 'w') png_print_formatter(pos_data, pos_format, pos_logo) pos_logo.close() concatPNG(logofile, (logofile + "p.png", logofile + "n.png")) os.remove(logofile + "n.png") os.remove(logofile + "p.png")
def create_logo(self, seqs=[]): """Create sequence logo for input sequences.""" # seperate headers headers, instances = [list(x) for x in zip(*seqs)] if self.options.sequence_type is 'rna': alphabet = Alphabet('ACGU') elif self.options.sequence_type is 'protein': alphabet = Alphabet('ACDEFGHIKLMNPQRSTVWY') else: alphabet = Alphabet('AGCT') motif_corebio = SeqList(alist=instances, alphabet=alphabet) data = wbl.LogoData().from_seqs(motif_corebio) format = wbl.LogoFormat(data, self.options) if self.output_format == 'png': return wbl.png_formatter(data, format) elif self.output_format == 'png_print': return wbl.png_print_formatter(data, format) elif self.output_format == 'jpeg': return wbl.jpeg_formatter(data, format) else: return wbl.eps_formatter(data, format)
def generate_logos(motifs, foldername, filetype='png'): options = wl.LogoOptions() options.color_scheme = wl.std_color_schemes["chemistry"] for i, motif in enumerate(motifs): if motif: my_format = wl.LogoFormat(motif.data, options) if 'png' in filetype: to_write = wl.png_print_formatter(motif.data, my_format) foo = open( os.path.join(foldername, str(i) + '_' + str(len(motif.seqs)) + ".png"), "wb") foo.write(to_write) foo.close() if 'pdf' in filetype: to_write = wl.pdf_formatter(motif.data, my_format) foo = open( os.path.join(foldername, str(i) + '_' + str(len(motif.seqs)) + ".pdf"), "wb") foo.write(to_write) foo.close() if 'txt' in filetype: to_write = ''.join(["%s\n" % str(seq) for seq in motif.seqs]) foo = open( os.path.join(foldername, str(i) + '_' + str(len(motif.seqs)) + ".txt"), "w") foo.write(to_write) foo.close() else: raise ValueError( 'Invalid filetype. Available options: png, pdf or txt. ')
def create_logo(input_seqs_fname, logo_fname, options): """ Create a logo plot png using weblogo from a fasta created with write_logo_input() """ with open(input_seqs_fname, "rU") as f: seqs = w.read_seq_data(f) data = w.LogoData.from_seqs(seqs) subprocess.check_call(("rm " + input_seqs_fname).split()) format = w.LogoFormat(data, options) with open(logo_fname, "w") as f: f.write(w.png_print_formatter(data, format))
def write_weblogo(self, filepath): matrix_tuple = [] for distribution in self.values: matrix_tuple.append(tuple(distribution)) dataArray = np.array(tuple(matrix_tuple)) alph = Alphabet(''.join(self.alphabet)) weblogoData = LogoData.from_counts(alph, dataArray) weblogoOptions = LogoOptions(color_scheme=classic) weblogoOptions.title = "PWM" weblogoFormat = LogoFormat(weblogoData, weblogoOptions) weblogo_file = open(filepath, 'w') weblogo_file.write(png_print_formatter(weblogoData, weblogoFormat)) weblogo_file.close()
with open(args.output_base + ".fasta", "rU") as f: seqs = w.read_seq_data(f) data = w.LogoData.from_seqs(seqs) subprocess.check_call(("rm " + args.output_base + ".fasta").split()) options = w.LogoOptions() options.unit_name = "probability" options.yaxis_label = "Probability" options.xaxis_label = "Site Position" options.show_fineprint = False options.stacks_per_line = 500 options.tic_length = 10 format = w.LogoFormat(data, options) with open(args.output_base + ".png", 'w') as f: f.write(w.png_print_formatter(data, format)) aa_naive_seqs_c = Counter(aa_naive_seqs) num_trees = len(aa_naive_seqs) aa_naive_seqs_d = OrderedDict( ("naive_" + str(i) + "_" + str(float(count) / num_trees), seq) for i, (seq, count) in enumerate(aa_naive_seqs_c.most_common(None)) ) write_to_fasta(aa_naive_seqs_d, args.output_base + ".fasta") aa_dna_naive_seqs_d = {} for k, g in groupby(naive_seqs, lambda seq: translate(seq)): if k in aa_dna_naive_seqs_d: aa_dna_naive_seqs_d[k].update(g) else: aa_dna_naive_seqs_d[k] = Counter(g)