def generate_base_weblogo(options, allsequences, output): import weblogolib as wl from weblogolib.colorscheme import ColorScheme, ColorGroup from corebio import seq_io from StringIO import StringIO ecliptic_color_scheme = ColorScheme([ ColorGroup('A', iwork_colors.blue), ColorGroup('C', iwork_colors.green), ColorGroup('G', iwork_colors.yellow), ColorGroup('UT', iwork_colors.red), ]) fastainput = StringIO(''.join('>%d\n%s\n' % s for s in enumerate(allsequences))) rna = wl.std_alphabets['rna'] seqs = wl.read_seq_data(fastainput, alphabet=rna) logo = wl.LogoData.from_seqs(seqs) logooptions = wl.LogoOptions() logooptions.unit_name = 'probability' logooptions.color_scheme = ecliptic_color_scheme logooptions.show_fineprint = False logooptions.first_index = -options.flanking_window logooptions.title = "Ecliptic" format = wl.LogoFormat(logo, logooptions) wl.pdf_formatter(logo, format, open(output, 'w'))
def printlogo(pwm, filename, alphabet="ACGT", mode="pdf"): myAlphabet = {"A": 0, "C": 1, "G": 2, "T": 3} translate = np.array([myAlphabet[i] for i in alphabet]) pwm = pwm[:, translate] "Prints logo from nucleotides as a pdf" import cPickle cPickle.dump(pwm, open(filename + ".pkl", 'wb'), -1) import weblogolib as wl # @UnresolvedImport PWMdata = np.array(pwm) data = wl.LogoData.from_counts(wl.std_alphabets["dna"], PWMdata) options = wl.LogoOptions(resolution=300) options.title = filename options.color_scheme = wl.colorscheme.nucleotide formatt = wl.LogoFormat(data, options) if mode == "pdf": fout = open(filename + ".pdf", 'wb') fout.write(wl.pdf_formatter(data, formatt)) elif mode == "png": fout = open(filename + ".png", 'wb') fout.write(wl.png_formatter(data, formatt)) else: fout = open(filename + ".{0}".format(mode), 'wb') exec("""fout.write(wl.{0}_formatter(data, format))""".format(mode)) fout.close()
def generate_logos(motifs, foldername, filetype='png'): options = wl.LogoOptions() options.color_scheme = wl.std_color_schemes["chemistry"] for i, motif in enumerate(motifs): if motif: my_format = wl.LogoFormat(motif.data, options) if 'png' in filetype: to_write = wl.png_print_formatter(motif.data, my_format) foo = open( os.path.join(foldername, str(i) + '_' + str(len(motif.seqs)) + ".png"), "wb") foo.write(to_write) foo.close() if 'pdf' in filetype: to_write = wl.pdf_formatter(motif.data, my_format) foo = open( os.path.join(foldername, str(i) + '_' + str(len(motif.seqs)) + ".pdf"), "wb") foo.write(to_write) foo.close() if 'txt' in filetype: to_write = ''.join(["%s\n" % str(seq) for seq in motif.seqs]) foo = open( os.path.join(foldername, str(i) + '_' + str(len(motif.seqs)) + ".txt"), "w") foo.write(to_write) foo.close() else: raise ValueError( 'Invalid filetype. Available options: png, pdf or txt. ')
def printlogo(pwm, filename, alphabet="ACGT", mode="pdf"): myAlphabet = {"A":0, "C":1, "G":2, "T":3} translate = np.array([myAlphabet[i] for i in alphabet]) pwm = pwm[:, translate] "Prints logo from nucleotides as a pdf" import cPickle cPickle.dump(pwm, open(filename + ".pkl", 'wb'), -1) import weblogolib as wl # @UnresolvedImport PWMdata = np.array(pwm) data = wl.LogoData.from_counts(wl.std_alphabets["dna"], PWMdata) options = wl.LogoOptions(resolution=300) options.title = filename options.color_scheme = wl.colorscheme.nucleotide formatt = wl.LogoFormat(data, options) if mode == "pdf": fout = open(filename + ".pdf", 'wb') fout.write(wl.pdf_formatter(data, formatt)) elif mode == "png": fout = open(filename + ".png", 'wb') fout.write(wl.png_formatter(data, formatt)) else: fout = open(filename + ".{0}".format(mode), 'wb') exec("""fout.write(wl.{0}_formatter(data, format))""".format(mode)) fout.close()
def main(): args = docopt.docopt(__doc__) root = args['<workspace>'] round = args['<round>'] output_path = args['<pdf_output>'] # Right now I'm looking at validated designs by default, but the user may # be interested in fixbb designs or restrained models as well. workspace = pipeline.ValidatedDesigns(root, round) workspace.check_paths() designs = [structures.Design(x) for x in workspace.output_subdirs] sequences = corebio.seq.SeqList( [corebio.seq.Seq(x.resfile_sequence) for x in designs], alphabet=corebio.seq.unambiguous_protein_alphabet, ) logo_data = weblogo.LogoData.from_seqs(sequences) logo_options = weblogo.LogoOptions() logo_options.title = workspace.focus_dir logo_format = weblogo.LogoFormat(logo_data, logo_options) with open(output_path, 'wb') as logo_file: document = weblogo.pdf_formatter(logo_data, logo_format) logo_file.write(document)
def mutect_weblogo_sub(sampN, inFileN, outFileN, pdfFileN): inFile = open(inFileN, 'r') inFile.readline() #comment line headerL = inFile.readline().rstrip().split('\t') idxH = {} for i in range(len(headerL)): idxH[headerL[i]] = i outFile = open(outFileN, 'w') for line in inFile: colL = line.rstrip().split('\t') context = colL[idxH['context']] ref = colL[idxH['ref_allele']] alt = colL[idxH['alt_allele']] status = colL[idxH['judgement']] if status == 'REJECT': continue head = context[:3] tail = context[-3:] context = head + ref + tail if ref not in ['C', 'T']: context = mybasic.rc(context) ref = mybasic.rc(ref) alt = mybasic.rc(alt) if ref == 'C' and alt == 'T': ## TMZ context only outFile.write('%s\n' % context) outFile.flush() outFile.close() fin = open(outFileN, 'r') seqs = weblogolib.read_seq_data(fin) data = weblogolib.LogoData.from_seqs(seqs) options = weblogolib.LogoOptions() options.show_fineprint = False options.first_index = -3 options.logo_title = sampN format = weblogolib.LogoFormat(data, options) fout = open(pdfFileN, 'w') weblogolib.pdf_formatter(data, format, fout)
def mutect_weblogo_sub(sampN, inFileN, outFileN, pdfFileN): inFile = open(inFileN, 'r') inFile.readline() #comment line headerL = inFile.readline().rstrip().split('\t') idxH = {} for i in range(len(headerL)): idxH[headerL[i]] = i outFile = open(outFileN,'w') for line in inFile: colL = line.rstrip().split('\t') context = colL[idxH['context']] ref = colL[idxH['ref_allele']] alt = colL[idxH['alt_allele']] status = colL[idxH['judgement']] if status == 'REJECT': continue head = context[:3] tail = context[-3:] context = head + ref + tail if ref not in ['C','T']: context = mybasic.rc(context) ref = mybasic.rc(ref) alt = mybasic.rc(alt) if ref == 'C' and alt == 'T':## TMZ context only outFile.write('%s\n' % context) outFile.flush() outFile.close() fin = open(outFileN,'r') seqs = weblogolib.read_seq_data(fin) data = weblogolib.LogoData.from_seqs(seqs) options = weblogolib.LogoOptions() options.show_fineprint = False options.first_index = -3 options.logo_title = sampN format = weblogolib.LogoFormat(data, options) fout = open(pdfFileN, 'w') weblogolib.pdf_formatter(data, format, fout)
def plot_motif_from_sites( sites, img_format='png', smallText=None ): ldata = wl.LogoData.from_seqs(wl.SeqList(sites, wl.unambiguous_dna_alphabet)) options = wl.LogoOptions() if smallText is not None: options.fineprint = smallText ##os.path.dirname(self.dbfile) + ' ' + self.organism format = wl.LogoFormat(ldata, options) format.color_scheme = wl.classic format.resolution = 150 if img_format == 'png': tmp = wl.png_formatter( ldata, format ) output = cStringIO.StringIO(tmp) img = mpimg.imread(output) plt.axis('off') imgplot = plt.imshow( img ) return plt elif img_format == 'svg': tmp = wl.svg_formatter( ldata, format ) return tmp elif img_format == 'pdf': tmp = wl.pdf_formatter( ldata, format ) return tmp
def do_seqlogo(self, input_fasta, output_seqlogo, col_list): """ create sequence logo of given fasta file and then label it with col_list """ import weblogolib import corebio from weblogolib.colorscheme import ColorScheme from weblogolib.colorscheme import ColorGroup #assigning colors hydrophobicity = ColorScheme([ ColorGroup("RKDENQ", "blue"), ColorGroup("SGHTAP", "green"), ColorGroup("YVMCLFIW", "black"), ColorGroup("O", "dark orange") ]) #Here i changed this so that the column begins with 1 not 0 col_list2 = [i+1 for i in col_list] fin = open(input_fasta) seqs = weblogolib.read_seq_data(fin) seqs.alphabet = corebio.seq.Alphabet('ACDEFGHIKLMNPQRSTVWYO') data = weblogolib.LogoData.from_seqs(seqs) data.composition = 'equiprobable' options = weblogolib.LogoOptions() options.yaxis_scale = 4.5 options.yaxis_tic_interval = 2.25 options.annotate = col_list2 options.stack_width = 30 options.stacks_per_line = 35 options.scale_width = 'No' options.color_scheme = hydrophobicity format_logo = weblogolib.LogoFormat(data, options) fout = open(output_seqlogo, 'w') pdf_logo = weblogolib.pdf_formatter(data, format_logo) fout.write(pdf_logo) fout.close()
def draw_logo(df,seq_dict,inv_dict,dicttype): '''Draw logo of sequences.''' #Set Logo options '''stack width in points, not default size of 10.8, but set to this in weblogo call below''' stackwidth = 9.5 barheight = 5.5 # height of bars in points if using overlay barspacing = 2.0 # spacing between bars in points if using overlay '''ratio of stack height:width, doesn't count part going over maximum value of 1''' stackaspectratio = 4.4 ymax = 1.0 logo_options = weblogolib.LogoOptions() logo_options.fineprint = None #logo_options.stacks_per_line = nperline logo_options.stack_aspect_ratio = stackaspectratio logo_options.show_errorbars = True logo_options.errorbar_fraction = .75 logo_options.errorbar_gray = .9 logo_options.errorbar_width_fraction = .9 logo_options.stack_width = stackwidth #Command to uncomment if you want each column to have height = 1 #logo_options.unit_name = 'probability' logo_options.show_yaxis = False #logo_options.yaxis_scale = ymax #for dna if dicttype == 'dna': al = weblogolib.unambiguous_dna_alphabet column_headers = ['freq_' + inv_dict[i] for i in range(len(seq_dict))] counts_arr = np.array(df[column_headers]) data = weblogolib.LogoData.from_counts(al,counts_arr) colormapping = {} colormapping['A'] = '#008000' colormapping['T'] = '#FF0000' colormapping['C'] = '#0000FF' colormapping['G'] = '#FFA500' color_scheme = weblogolib.colorscheme.ColorScheme() for x in [inv_dict[i] for i in range(len(seq_dict))]: if hasattr(color_scheme, 'rules'): color_scheme.rules.append(weblogolib.colorscheme.SymbolColor(x, colormapping[x], "'%s'" % x)) else: # this part is needed for weblogo 3.4 color_scheme.groups.append( weblogolib.colorscheme.ColorGroup(x, colormapping[x], "'%s'" % x)) logo_options.color_scheme = color_scheme #for protein if dicttype == 'protein': al = weblogolib.unambiguous_protein_alphabet column_headers = ['freq_' + inv_dict[i] for i in range(len(seq_dict))] counts_arr = np.array(df[column_headers]) data = weblogolib.LogoData.from_counts(al,counts_arr) #for rna if dicttype == 'rna': al = weblogolib.unambiguous_rna_alphabet column_headers = ['freq_' + inv_dict[i] for i in range(len(seq_dict))] counts_arr = np.array(df[column_headers]) data = weblogolib.LogoData.from_counts(al,counts_arr) #set logo format and output myformat = weblogolib.LogoFormat(data,logo_options) myimage = weblogolib.pdf_formatter(data,myformat) return myimage
import weblogolib as wll import pickle seqs = ['attcgtgatagctgtcgtaaag','ttttgttacctgcctctaactt','aagtgtgacgccgtgcaaataa','tgccgtgattatagacactttt','atttgcgatgcgtcgcgcattt','taatgagattcagatcacatat','taatgtgacgtcctttgcatac','gaaggcgacctgggtcatgctg','aggtgttaaattgatcacgttt','cgatgcgaggcggatcgaaaaa','aaattcaatattcatcacactt','agatgtgagccagctcaccata','agatgtgattagattattattc','aattgtgatgtgtatcgaagtg','ttatttgaaccagatcgcatta','aaatgtaagctgtgccacgttt','aagtgtgacatggaataaatta','ttgtttgatttcgcgcatattc','aaacgtgatttcatgcgtcatt','atgtgtgcggcaattcacattt','taatgttatacatatcactcta','ttttatgacgaggcacacacat','aagttcgatatttctcgttttt','ttttgcgatcaaaataacactt','aaacgtgatcaacccctcaatt','taatgtgagttagctcactcat','aattgtgagcggataacaattt','ttgtgtgatctctgttacagaa','TAAtgtggagatgcgcacaTAA','TTTtgcaagcaacatcacgAAA','GACctcggtttagttcacaGAA','aattgtgacacagtgcaaattc','aaccgtgctcccactcgcagtc','TCTTGTGATTCAGATCACAAAG','ttttgtgagttttgtcaccaaa','aaatgttatccacatcacaatt','ttatttgccacaggtaacaaaa','atgcctgacggagttcacactt','taacgtgatcatatcaacagaa','Ttttgtggcctgcttcaaactt','ttttatgatttggttcaattct','aattgtgaacatcatcacgttc','ttttgtgatctgtttaaatgtt','agaggtgattttgatcacggaa','atttgtgagtggtcgcacatat','gattgtgattcgattcacattt','gtgtgtaaacgtgaacgcaatc','aactgtgaaacgaaacatattt','TCTTGTGATGTGGTTAACCAAT'] #fIn = open('/Users/biggus/SourceCodeEtc/weblogo-3.0/tests/data/cap.fas') #seqs = wll.read_seq_data(fIn) for i in range(len(seqs)): seqs[i] = wll.Seq(seqs[i]) seqs = wll.SeqList(seqs) seqs.alphabet = wll.which_alphabet(seqs) data = wll.LogoData.from_seqs(seqs) #pickle.dump(data,open('/Users/biggus/SourceCodeEtc/weblogo-3.0/tests/out/fromList.data.pkl','w')) #exit('Just Dumped List Pkl') options = wll.LogoOptions() options.title = 'A Logo Title' format = wll.LogoFormat(data, options) fout = open('/Users/biggus/SourceCodeEtc/weblogo-3.0/tests/out/cap2.xmpl.pdf', 'w') wll.pdf_formatter( data, format, fout) None print 'done'