def create_logo(self, seqs=[]): """Create sequence logo for input sequences.""" # seperate headers headers, instances = [list(x) for x in zip(*seqs)] if self.options.sequence_type is 'rna': alphabet = Alphabet('ACGU') elif self.options.sequence_type is 'protein': alphabet = Alphabet('ACDEFGHIKLMNPQRSTVWY') else: alphabet = Alphabet('AGCT') motif_corebio = SeqList(alist=instances, alphabet=alphabet) data = wbl.LogoData().from_seqs(motif_corebio) format = wbl.LogoFormat(data, self.options) if self.output_format == 'png': return wbl.png_formatter(data, format) elif self.output_format == 'png_print': return wbl.png_print_formatter(data, format) elif self.output_format == 'jpeg': return wbl.jpeg_formatter(data, format) else: return wbl.eps_formatter(data, format)
def make_pdf_weblogo( self, pdf_path, prior=None ): # prior could also be = weblogolib.parse_prior('equiprobable', corebio.seq.unambiguous_protein_alphabet) ) assert (pdf_path.endswith('.pdf')) assert (self.design_positions != None) assert (self.starting_seq != None) pdf_basedir = os.path.dirname(pdf_path) if not os.path.isdir(pdf_basedir): os.makedirs(pdf_basedir) eps_logo_filename = pdf_path[:-4] + '.eps' data = weblogolib.LogoData.from_seqs(self.raw_seqs, prior=prior) options = weblogolib.LogoOptions() options.show_fineprint = False options.xaxis_tic_interval = 1 options.number_interval = 1 options.number_fontsize = 3 options.stacks_per_line = 40 options.show_errorbars = False logo_format = weblogolib.LogoFormat(data, options) eps_binary = weblogolib.eps_formatter(data, logo_format) eps_str = eps_binary.decode() eps_str = replace_logo_numbers(eps_str, self.design_positions, self.starting_seq) with open(eps_logo_filename, 'w') as f: f.write(eps_str) eps_to_pdf(eps_logo_filename)
def seqlogo(source_name, list_weights): """ :param source_name: string source name for folder name :param list_weights: list of weights from different layers :return: sequence logos in folders for all filters """ for l_ in range(len(list_weights)): layer_w = np.moveaxis(list_weights[l_], 2, 0) length = layer_w.shape[1] path = './' + source_name + '_' + str(length) + '/' os.makedirs(path[:-1], exist_ok=True) for w_ in range(layer_w.shape[0]): data = weblogolib.LogoData.from_counts( 'ACGT', weights_modification(layer_w[w_])) options = weblogolib.LogoOptions( fineprint=False, logo_title='', color_scheme=weblogolib.classic, stack_width=weblogolib.std_sizes["large"], logo_start=1, logo_end=length, resolution=600) logo_format = weblogolib.LogoFormat(data, options) f = open(path + str(w_ + 1) + '.png', 'wb') f.write(weblogolib.png_formatter(data, logo_format)) f.close()
def generate_logo(seqfile, title): ''' Generate the sequence logo from the specified sequences. Args: seqfile (str): The path to a sequence file. ''' with open(seqfile, 'r') as fh: seqlen = len(fh.readline().rstrip('\n')) fh.seek(0) seqs = wl.read_seq_data(fh) data = wl.LogoData.from_seqs(seqs) options = wl.LogoOptions() options.title = title options.fineprint = '' #options.stack_width = 16 options.first_index = -1 * int(seqlen / 2) form = wl.LogoFormat(data, options) eps = wl.eps_formatter(data, form) eps_file = seqfile[:-4] + '.eps' with open(eps_file, 'wb') as fh: fh.write(eps)
def printlogo(pwm, filename, alphabet="ACGT", mode="pdf"): myAlphabet = {"A": 0, "C": 1, "G": 2, "T": 3} translate = np.array([myAlphabet[i] for i in alphabet]) pwm = pwm[:, translate] "Prints logo from nucleotides as a pdf" import cPickle cPickle.dump(pwm, open(filename + ".pkl", 'wb'), -1) import weblogolib as wl # @UnresolvedImport PWMdata = np.array(pwm) data = wl.LogoData.from_counts(wl.std_alphabets["dna"], PWMdata) options = wl.LogoOptions(resolution=300) options.title = filename options.color_scheme = wl.colorscheme.nucleotide formatt = wl.LogoFormat(data, options) if mode == "pdf": fout = open(filename + ".pdf", 'wb') fout.write(wl.pdf_formatter(data, formatt)) elif mode == "png": fout = open(filename + ".png", 'wb') fout.write(wl.png_formatter(data, formatt)) else: fout = open(filename + ".{0}".format(mode), 'wb') exec("""fout.write(wl.{0}_formatter(data, format))""".format(mode)) fout.close()
def main(): args = docopt.docopt(__doc__) root = args['<workspace>'] round = args['<round>'] output_path = args['<pdf_output>'] # Right now I'm looking at validated designs by default, but the user may # be interested in fixbb designs or restrained models as well. workspace = pipeline.ValidatedDesigns(root, round) workspace.check_paths() designs = [structures.Design(x) for x in workspace.output_subdirs] sequences = corebio.seq.SeqList( [corebio.seq.Seq(x.resfile_sequence) for x in designs], alphabet=corebio.seq.unambiguous_protein_alphabet, ) logo_data = weblogo.LogoData.from_seqs(sequences) logo_options = weblogo.LogoOptions() logo_options.title = workspace.focus_dir logo_format = weblogo.LogoFormat(logo_data, logo_options) with open(output_path, 'wb') as logo_file: document = weblogo.pdf_formatter(logo_data, logo_format) logo_file.write(document)
def generate_logos(motifs, foldername, filetype='png'): options = wl.LogoOptions() options.color_scheme = wl.std_color_schemes["chemistry"] for i, motif in enumerate(motifs): if motif: my_format = wl.LogoFormat(motif.data, options) if 'png' in filetype: to_write = wl.png_print_formatter(motif.data, my_format) foo = open( os.path.join(foldername, str(i) + '_' + str(len(motif.seqs)) + ".png"), "wb") foo.write(to_write) foo.close() if 'pdf' in filetype: to_write = wl.pdf_formatter(motif.data, my_format) foo = open( os.path.join(foldername, str(i) + '_' + str(len(motif.seqs)) + ".pdf"), "wb") foo.write(to_write) foo.close() if 'txt' in filetype: to_write = ''.join(["%s\n" % str(seq) for seq in motif.seqs]) foo = open( os.path.join(foldername, str(i) + '_' + str(len(motif.seqs)) + ".txt"), "w") foo.write(to_write) foo.close() else: raise ValueError( 'Invalid filetype. Available options: png, pdf or txt. ')
def weblogo_from_dist(dist, filename='logo.eps'): """Generate a weblogo from a pssm distribution""" import weblogolib data = weblogo_data_from_dist(dist) options = weblogolib.LogoOptions() options.size = weblogolib.LogoSize(stack_width=5.4 * 12, stack_height=5.4 * 12 * 5) options.color_scheme = weblogolib.std_color_schemes["classic"] format = weblogolib.LogoFormat(data, options) weblogolib.eps_formatter(data, format, open(filename, 'w'))
def get_logo(PFM, file_path, seq_length=6, normalize=False, output_format='png'): if normalize == True: for i in range(0, PFM.shape[0]): if numpy.sum(PFM[i, :]) > 0: PFM[i, :] = PFM[i, :] / numpy.sum(PFM[i, :]) #PFM[i, :] *= 10000.0 #print(PFM) #Create weblogo from API logo_output_format = output_format #"svg" #Load data from an occurence matrix data = weblogolib.LogoData.from_counts('ACGT', PFM[:seq_length, :]) #Generate color scheme '''colors = weblogolib.ColorScheme([ weblogolib.ColorGroup("A", "yellow","CFI Binder" ), weblogolib.ColorGroup("C", "green","CFI Binder" ), weblogolib.ColorGroup("G", "red","CFI Binder" ), weblogolib.ColorGroup("T", "blue","CFI Binder" ), weblogolib.ColorGroup("a", "grey","CFI Binder" ), weblogolib.ColorGroup("c", "grey","CFI Binder" ), weblogolib.ColorGroup("g", "grey","CFI Binder" ), weblogolib.ColorGroup("t", "grey","CFI Binder" )] )''' color_rules = [] color_rules.append(weblogolib.SymbolColor("A", "yellow")) color_rules.append(weblogolib.SymbolColor("C", "green")) color_rules.append(weblogolib.SymbolColor("G", "red")) color_rules.append(weblogolib.SymbolColor("T", "blue")) colors = weblogolib.ColorScheme(color_rules) #Create options options = weblogolib.LogoOptions(fineprint=False, logo_title="LOR filter", color_scheme=colors, stack_width=weblogolib.std_sizes["large"], logo_start=1, logo_end=seq_length, stacks_per_line=seq_length) #seq_length) #Create logo logo_format = weblogolib.LogoFormat(data, options) #Generate image formatter = weblogolib.formatters[logo_output_format] png = formatter(data, logo_format) #Write it f = open(file_path, "w") f.write(png) f.close()
def generate_weblogo(seq_id, seq_strs): seqs = weblogolib.read_seq_data(io.StringIO('\n'.join(seq_strs)), input_parser=array_io.read) data = weblogolib.LogoData.from_seqs(seqs) options = weblogolib.LogoOptions() options.title = seq_id options.unit_name = "probability" format = weblogolib.LogoFormat(data, options) eps = weblogolib.eps_formatter(data, format) with open(os.path.join(WEBLOGO_PATH, seq_id + '.eps'), 'wb') as f: f.write(eps)
def create_logo(input_seqs_fname, logo_fname, options): """ Create a logo plot png using weblogo from a fasta created with write_logo_input() """ with open(input_seqs_fname, "rU") as f: seqs = w.read_seq_data(f) data = w.LogoData.from_seqs(seqs) subprocess.check_call(("rm " + input_seqs_fname).split()) format = w.LogoFormat(data, options) with open(logo_fname, "w") as f: f.write(w.png_print_formatter(data, format))
def generate_weblogo(file_path, seq_strs, unit_name=None): seqs = weblogolib.read_seq_data( io.StringIO('\n'.join(seq_strs)), input_parser=array_io.read, alphabet=corebio_seq.reduced_protein_alphabet) data = weblogolib.LogoData.from_seqs(seqs) options = weblogolib.LogoOptions(color_scheme=get_color_scheme()) if unit_name is not None: options.unit_name = unit_name format = weblogolib.LogoFormat(data, options) eps = weblogolib.eps_formatter(data, format) with open(file_path, 'wb') as f: f.write(eps)
def build_logo(sequences, outfilename): seqs = corebio.seq.SeqList(alphabet=corebio.seq.dna_alphabet) for sequence in sequences: seqs.append(corebio.seq.dna(re.sub("\s", "-", sequence))) data = weblogolib.LogoData.from_seqs(seqs) options = weblogolib.LogoOptions() options.color_scheme = weblogolib.classic options.title = 'A Logo Title' format = weblogolib.LogoFormat(data, options) fout = open(outfilename, 'w') weblogolib.png_formatter(data, format, fout)
def weblogo_from_dist(dist, filename='logo.eps'): """Generate a weblogo from a pssm distribution""" import weblogolib data = weblogo_data_from_dist(dist) options = weblogolib.LogoOptions() options.size = weblogolib.LogoSize(stack_width=5.4 * 12, stack_height=5.4 * 12 * 5) options.color_scheme = weblogolib.std_color_schemes["classic"] options.create_text = ('', ) options.show_xaxis = False options.show_yaxis = False format = weblogolib.LogoFormat(data, options) formatter_for_ext(os.path.splitext(filename)[1])(data, format, open(filename, 'w'))
def draw_logo(self, filename, title=None): ''' Draws a sequence logo from the PWM Requires weblogolib is available. Does not respect typical glbase config. Particularly config.draw_mode (Output is always a png) **Arguments** filename The filename to save the image to. title (Optional, default=the pwm name) A title for the ''' assert filename, "pwm.draw_logo: You must specify a filename" # see if weblogo is available. try: import weblogolib WEBLOGO_AVAILABLE = True except Exception: WEBLOGO_AVAILABLE = False # fail silently raise AssertionError( 'pwm.draw_logo: Asking to draw logo, but weblogolib not found/available' ) if not title: title = self.name data = weblogolib.LogoData.from_counts("ACGT", self.__original_PFM) options = weblogolib.LogoOptions() options.logo_title = title options.title_fontsize = 4 options.resolution = 200 options.show_xaxis = True options.show_yaxis = True options.scale_width = False #options.logo_label = "motif: %s" % name options.fineprint = False options.color_scheme = weblogolib.std_color_schemes["base pairing"] format = weblogolib.LogoFormat(data, options) out = open(filename, "wb") # stick it in the parent dir weblogolib.png_formatter(data, format, out) out.close() config.log.info("pwm.draw_logo: Saved '%s' logo" % filename)
def logo(dist, tag, dir): "Generate a logo with the given tag in the given directory." import weblogolib as W import corebio.seq as S data = W.LogoData.from_counts(S.unambiguous_dna_alphabet, dist) options = W.LogoOptions( logo_title=tag, color_scheme=W.colorscheme.nucleotide, show_xaxis=False, show_yaxis=True, show_fineprint=False, ) format = W.LogoFormat(data, options) filename = 'logo-%s' % tag #W.eps_formatter(data, format, open(os.path.join(dir, '%s.eps' % filename), 'w')) W.png_formatter(data, format, open(os.path.join(dir, '%s.png' % filename), 'w'))
def mutect_weblogo_sub(sampN, inFileN, outFileN, pdfFileN): inFile = open(inFileN, 'r') inFile.readline() #comment line headerL = inFile.readline().rstrip().split('\t') idxH = {} for i in range(len(headerL)): idxH[headerL[i]] = i outFile = open(outFileN, 'w') for line in inFile: colL = line.rstrip().split('\t') context = colL[idxH['context']] ref = colL[idxH['ref_allele']] alt = colL[idxH['alt_allele']] status = colL[idxH['judgement']] if status == 'REJECT': continue head = context[:3] tail = context[-3:] context = head + ref + tail if ref not in ['C', 'T']: context = mybasic.rc(context) ref = mybasic.rc(ref) alt = mybasic.rc(alt) if ref == 'C' and alt == 'T': ## TMZ context only outFile.write('%s\n' % context) outFile.flush() outFile.close() fin = open(outFileN, 'r') seqs = weblogolib.read_seq_data(fin) data = weblogolib.LogoData.from_seqs(seqs) options = weblogolib.LogoOptions() options.show_fineprint = False options.first_index = -3 options.logo_title = sampN format = weblogolib.LogoFormat(data, options) fout = open(pdfFileN, 'w') weblogolib.pdf_formatter(data, format, fout)
def plot_motif_from_sites( sites, img_format='png', smallText=None ): ldata = wl.LogoData.from_seqs(wl.SeqList(sites, wl.unambiguous_dna_alphabet)) options = wl.LogoOptions() if smallText is not None: options.fineprint = smallText ##os.path.dirname(self.dbfile) + ' ' + self.organism format = wl.LogoFormat(ldata, options) format.color_scheme = wl.classic format.resolution = 150 if img_format == 'png': tmp = wl.png_formatter( ldata, format ) output = cStringIO.StringIO(tmp) img = mpimg.imread(output) plt.axis('off') imgplot = plt.imshow( img ) return plt elif img_format == 'svg': tmp = wl.svg_formatter( ldata, format ) return tmp elif img_format == 'pdf': tmp = wl.pdf_formatter( ldata, format ) return tmp
def plot_motif(self, cluster_num, motif_num, img_format='png'): #conn = sql3.connect(self.dbfile) #cursor = conn.cursor() #cursor.execute('select max(iteration) from motif_infos') #iteration = cursor.fetchone()[0] #query = 'select rowid from motif_infos where iteration=? and cluster=? and motif_num=?' #params = [self.iteration, cluster_num, motif_num] #cursor.execute(query, params) #rowid = cursor.fetchone()[0] #mot_info = pd.read_sql('select * from motif_infos where rowid=?', conn, params=[rowid]) #mot_sites = pd.read_sql('select * from meme_motif_sites where motif_info_id=?', conn, params=[rowid]) #motif_infos = self.tables['motif_infos'] #rowid = motif_infos[(motif_infos.iteration==self.iteration) & # (motif_infos.cluster==cluster_num) & (motif_infos.motif_num==motif_num)].index.values[0]+1 rowid = self.__get_motif_id(cluster_num, motif_num) mot_sites = self.tables['meme_motif_sites'][ self.tables['meme_motif_sites'].motif_info_id == rowid] ldata = wl.LogoData.from_seqs( wl.SeqList(mot_sites.seq.values.tolist(), wl.unambiguous_dna_alphabet)) options = wl.LogoOptions() options.fineprint = os.path.dirname( self.dbfile) + ' %03d %03d' % (cluster_num, motif_num) format = wl.LogoFormat(ldata, options) format.color_scheme = wl.classic format.resolution = 150 if img_format == 'png': tmp = wl.png_formatter(ldata, format) output = cStringIO.StringIO(tmp) img = mpimg.imread(output) plt.axis('off') imgplot = plt.imshow(img) #plt.show() return plt elif img_format == 'svg': tmp = wl.svg_formatter(ldata, format) return tmp
def main(htdocs_directory=None): logooptions = weblogolib.LogoOptions() # A list of form fields. # The default for checkbox values must be False (irrespective of # the default in logooptions) since a checked checkbox returns 'true' # but an unchecked checkbox returns nothing. controls = [ Field('sequences', ''), Field( 'format', 'png', weblogolib.formatters.get, options=[ 'png_print', 'png', 'jpeg', 'eps', 'pdf', 'svg', 'logodata' ], #TODO: Should copy list from __init__.formatters errmsg="Unknown format option."), Field('stacks_per_line', logooptions.stacks_per_line, int, errmsg='Invalid number of stacks per line.'), Field('stack_width', 'medium', weblogolib.std_sizes.get, options=['small', 'medium', 'large'], errmsg='Invalid logo size.'), Field('alphabet', 'alphabet_auto', alphabets.get, options=[ 'alphabet_auto', 'alphabet_protein', 'alphabet_dna', 'alphabet_rna' ], errmsg="Unknown sequence type."), Field('unit_name', 'bits', options=[ 'probability', 'bits', 'nats', 'kT', 'kJ/mol', 'kcal/mol' ]), Field('first_index', 1, int_or_none), Field('logo_start', '', int_or_none), Field('logo_end', '', int_or_none), Field('composition', 'comp_auto', composition.get, options=[ 'comp_none', 'comp_auto', 'comp_equiprobable', 'comp_CG', 'comp_Celegans', 'comp_Dmelanogaster', 'comp_Ecoli', 'comp_Hsapiens', 'comp_Mmusculus', 'comp_Scerevisiae' ], errmsg="Illegal sequence composition."), Field('percentCG', '', float_or_none, errmsg="Invalid CG percentage."), Field('show_errorbars', False, truth), Field('logo_title', logooptions.logo_title), Field('logo_label', logooptions.logo_label), Field('show_xaxis', False, truth), Field('xaxis_label', logooptions.xaxis_label), Field('show_yaxis', False, truth), Field('yaxis_label', logooptions.yaxis_label, string_or_none), Field('yaxis_scale', logooptions.yaxis_scale, float_or_none, errmsg="The yaxis scale must be a positive number."), Field('yaxis_tic_interval', logooptions.yaxis_tic_interval, float_or_none), Field('show_ends', False, truth), Field('show_fineprint', False, truth), Field('color_scheme', 'color_auto', color_schemes.get, options=color_schemes.keys(), errmsg='Unknown color scheme'), Field('color0', ''), Field('symbols0', ''), Field('desc0', ''), Field('color1', ''), Field('symbols1', ''), Field('desc1', ''), Field('color2', ''), Field('symbols2', ''), Field('desc2', ''), Field('color3', ''), Field('symbols3', ''), Field('desc3', ''), Field('color4', ''), Field('symbols4', ''), Field('desc4', ''), Field('ignore_lower_case', False, truth), Field('scale_width', False, truth), ] form = {} for c in controls: form[c.name] = c form_values = cgilib.FieldStorage() # Send default form? if len(form_values) == 0 or "cmd_reset" in form_values: # Load default truth values now. form['show_errorbars'].value = logooptions.show_errorbars form['show_xaxis'].value = logooptions.show_xaxis form['show_yaxis'].value = logooptions.show_yaxis form['show_ends'].value = logooptions.show_ends form['show_fineprint'].value = logooptions.show_fineprint form['scale_width'].value = logooptions.scale_width send_form(controls, htdocs_directory=htdocs_directory) return # Get form content for c in controls: c.value = form_values.getfirst(c.name, c.default) options_from_form = [ 'format', 'stacks_per_line', 'stack_width', 'alphabet', 'unit_name', 'first_index', 'logo_start', 'logo_end', 'composition', 'show_errorbars', 'logo_title', 'logo_label', 'show_xaxis', 'xaxis_label', 'show_yaxis', 'yaxis_label', 'yaxis_scale', 'yaxis_tic_interval', 'show_ends', 'show_fineprint', 'scale_width' ] errors = [] for optname in options_from_form: try: value = form[optname].get_value() if value != None: setattr(logooptions, optname, value) except ValueError as err: errors.append(err.args) # Construct custom color scheme custom = ColorScheme() for i in range(0, 5): color = form["color%d" % i].get_value() symbols = form["symbols%d" % i].get_value() desc = form["desc%d" % i].get_value() if color: try: custom.groups.append( weblogolib.ColorGroup(symbols, color, desc)) except ValueError as e: errors.append(('color%d' % i, "Invalid color: %s" % color)) if form["color_scheme"].value == 'color_custom': logooptions.color_scheme = custom else: try: logooptions.color_scheme = form["color_scheme"].get_value() except ValueError as err: errors.append(err.args) sequences = None # FIXME: Ugly fix: Must check that sequence_file key exists # FIXME: Sending malformed or missing form keys should not cause a crash # sequences_file = form["sequences_file"] if "sequences_file" in form_values: sequences = form_values.getvalue("sequences_file") #assert type(sequences) == str if not sequences or len(sequences) == 0: sequences = form["sequences"].get_value() if not sequences or len(sequences) == 0: errors.append(( "sequences", "Please enter a multiple-sequence alignment in the box above, or select a file to upload." )) # If we have uncovered errors or we want the chance to edit the logo # ("cmd_edit" command from examples page) then we return the form now. # We do not proceed to the time consuming logo creation step unless # required by a 'create' or 'validate' command, and no errors have been # found yet. if errors or "cmd_edit" in form_values: send_form(controls, errors, htdocs_directory) return try: comp = form["composition"].get_value() percentCG = form["percentCG"].get_value() ignore_lower_case = ("ignore_lower_case" in form_values) if comp == 'percentCG': comp = str(percentCG / 100) from corebio.matrix import Motif try: # Try reading data in transfac format first. # TODO Refactor this code motif = Motif.read_transfac(StringIO(sequences), alphabet=logooptions.alphabet) prior = weblogolib.parse_prior(comp, motif.alphabet) data = weblogolib.LogoData.from_counts(motif.alphabet, motif, prior) except ValueError as motif_err: seqs = weblogolib.read_seq_data( StringIO(sequences), alphabet=logooptions.alphabet, ignore_lower_case=ignore_lower_case) prior = weblogolib.parse_prior(comp, seqs.alphabet) data = weblogolib.LogoData.from_seqs(seqs, prior) logoformat = weblogolib.LogoFormat(data, logooptions) format = form["format"].value logo = weblogolib.formatters[format](data, logoformat) except ValueError as err: errors.append(err.args) except IOError as err: errors.append(err.args) except RuntimeError as err: errors.append(err.args) if errors or "cmd_validate" in form_values: send_form(controls, errors, htdocs_directory) return # # RETURN LOGO OVER HTTP # print("Content-Type:", mime_type[format]) # Content-Disposition: inline Open logo in browser window # Content-Disposition: attachment Download logo if "download" in form_values: print('Content-Disposition: attachment; ' \ 'filename="logo.%s"' % extension[format]) else: print('Content-Disposition: inline; ' \ 'filename="logo.%s"' % extension[format]) # Separate header from data print() # Finally, and at last, send the logo. if sys.version_info[0] >= 3: sys.stdout.buffer.write(logo) else: sys.stdout.write(logo)
def draw_logo(df,seq_dict,inv_dict,dicttype): '''Draw logo of sequences.''' #Set Logo options '''stack width in points, not default size of 10.8, but set to this in weblogo call below''' stackwidth = 9.5 barheight = 5.5 # height of bars in points if using overlay barspacing = 2.0 # spacing between bars in points if using overlay '''ratio of stack height:width, doesn't count part going over maximum value of 1''' stackaspectratio = 4.4 ymax = 1.0 logo_options = weblogolib.LogoOptions() logo_options.fineprint = None #logo_options.stacks_per_line = nperline logo_options.stack_aspect_ratio = stackaspectratio logo_options.show_errorbars = True logo_options.errorbar_fraction = .75 logo_options.errorbar_gray = .9 logo_options.errorbar_width_fraction = .9 logo_options.stack_width = stackwidth #Command to uncomment if you want each column to have height = 1 #logo_options.unit_name = 'probability' logo_options.show_yaxis = False #logo_options.yaxis_scale = ymax #for dna if dicttype == 'dna': al = weblogolib.unambiguous_dna_alphabet column_headers = ['freq_' + inv_dict[i] for i in range(len(seq_dict))] counts_arr = np.array(df[column_headers]) data = weblogolib.LogoData.from_counts(al,counts_arr) colormapping = {} colormapping['A'] = '#008000' colormapping['T'] = '#FF0000' colormapping['C'] = '#0000FF' colormapping['G'] = '#FFA500' color_scheme = weblogolib.colorscheme.ColorScheme() for x in [inv_dict[i] for i in range(len(seq_dict))]: if hasattr(color_scheme, 'rules'): color_scheme.rules.append(weblogolib.colorscheme.SymbolColor(x, colormapping[x], "'%s'" % x)) else: # this part is needed for weblogo 3.4 color_scheme.groups.append( weblogolib.colorscheme.ColorGroup(x, colormapping[x], "'%s'" % x)) logo_options.color_scheme = color_scheme #for protein if dicttype == 'protein': al = weblogolib.unambiguous_protein_alphabet column_headers = ['freq_' + inv_dict[i] for i in range(len(seq_dict))] counts_arr = np.array(df[column_headers]) data = weblogolib.LogoData.from_counts(al,counts_arr) #for rna if dicttype == 'rna': al = weblogolib.unambiguous_rna_alphabet column_headers = ['freq_' + inv_dict[i] for i in range(len(seq_dict))] counts_arr = np.array(df[column_headers]) data = weblogolib.LogoData.from_counts(al,counts_arr) #set logo format and output myformat = weblogolib.LogoFormat(data,logo_options) myimage = weblogolib.pdf_formatter(data,myformat) return myimage
def generate_weblogo(fname, count_mat, idx_first_residue=1, residue_min=1, residue_max=None, title=""): """ Generates logo representation of PBs frequency along protein sequence through the weblogo library. The weblogo reference: G. E. Crooks, G. Hon, J.-M. Chandonia, and S. E. Brenner. 'WebLogo: A Sequence Logo Generator.' Genome Research 14:1188–90 (2004) doi:10.1101/gr.849004. http://weblogo.threeplusone.com/ Parameters ---------- fname : str The path to the file to write in count_mat : numpy array an occurence matrix returned by `count_matrix`. idx_first_residue: int the index of the first residue in the matrix residue_min: int the lower bound of residue frame residue_max: int the upper bound of residue frame title: str the title of the weblogo. Default is empty. """ # Slice the matrix count = utils._slice_matrix(count_mat, idx_first_residue, residue_min, residue_max) # Create a custom color scheme for PB colors = weblogolib.ColorScheme([ColorGroup("d", "#1240AB", "strand main"), ColorGroup("abcdef", "#1240AB", "strand others"), ColorGroup("ghij", "#0BD500", "coil"), ColorGroup("m", "#FD0006", "helix main"), ColorGroup("klnop", "#FD0006", "helix others")]) # Load data from an occurence matrix data = weblogolib.LogoData.from_counts(PB.NAMES, count) # Create options options = weblogolib.LogoOptions(fineprint=False, logo_title=title, color_scheme=colors, stack_width=weblogolib.std_sizes["large"], first_residue=residue_min) # Generate weblogo logo = weblogolib.LogoFormat(data, options) # Retrieve image format image_format = os.path.splitext(fname)[1][1:] # Retrieve the right function given the image format try: if image_format == 'jpg': image_format = 'jpeg' formatter = weblogolib.formatters[image_format] except KeyError: raise ValueError("Invalid format image '{0}'." " Valid ones are : eps, png, pdf, jpg/jpeg, svg".format(image_format)) # Format the logo image = formatter(data, logo) # Write it with open(fname, "wb") as f: f.write(image)
def seqlogo(pm, ic_scale=True, color_scheme=None, size='medium', format='svg', filename=None, **kwargs): """The plotting method of the `seqlogo` distribution. Depends on using any of the 3 classes exposed by `seqlogo`: * `seqlogo.Ppm` * `seqlogo.Pwm` * `seqlogo.CompletePm` Given an `M x N` PM matrix, where `M` is the number of positions and `N` is the number of letters, calculate and render a WebLogo-like motif plot. When `ic_scale` is `True`, the height of each column is proportional to its information content. The y-axis label and scale will reflect information content. Otherwise, all columns have the same height and y-axis label will reflect "bits" Args: pm (`seqlogo.Pm` subclass): a pre-formatted Pm instance ic_scale (bool): whether or not to scale the column heights (default: True) size (str): small (3.54 in), medium (5 in), large (7.25 in), xlarge (10.25) (default: 'medium') format (str): desired matplotlib supported output format Options are 'eps', 'pdf', 'png', 'jpeg', and 'svg' (default: "svg") filename (None | str): Name of the file to save the figure. If `None`: the figure will not be saved. (default: None) color_scheme (str): the color scheme to use for weblogo: 'auto': None 'monochrome': all black 'base pairing': (NA Only) TAU are orange, GC are blue 'classic': (NA Only) classic WebLogo color scheme for nucleic acids 'hydrophobicity': (AA only) Color based on hydrophobicity 'chemistry': (AA only) Color based on chemical properties 'charge': (AA Only) Color based on charge **kwargs: all additional keyword arguments found at http://weblogo.threeplusone.com/manual.html """ # Ensure color scheme matches the alphabet if pm._alphabet_type in utils._NA_ALPHABETS: if color_scheme is None: color_scheme = 'classic' if color_scheme not in utils.NA_COLORSCHEMES: raise ValueError( '{} color_scheme selected is not an allowed nucleic acid color scheme' .format(color_scheme)) elif pm._alphabet_type in utils._AA_ALPHABETS: if color_scheme is None: color_scheme = 'hydrophobicity' if color_scheme not in utils.AA_COLORSCHEMES: raise ValueError( '{} color_scheme selected is not an allowed amino acid color scheme' .format(color_scheme)) color_scheme = wl.std_color_schemes[color_scheme] # Setup the format writer out_format = wl.formatters[format] # Prepare the logo size stack_width = (_sizes[size] / pm.length) * 72 # Initialize the options if ic_scale: unit_name = 'bits' else: unit_name = 'probability' options = wl.LogoOptions(unit_name=unit_name, color_scheme=color_scheme, show_fineprint=False, stack_width=stack_width, **kwargs) #Initialize the output format logo_format = wl.LogoFormat(pm, options) out = out_format(pm, logo_format) # Create the file if the user supplied an filename if filename: with open('{}'.format(filename), 'wb') as out_file: out_file.write(out) if format == 'svg': svg_hash = hash(out) out = re.sub(rb'("#?glyph.*?)(")', rb'\1 %s\2' % str(svg_hash).encode(), out) try: if get_ipython(): import IPython.display as ipd if format == 'svg': return ipd.SVG(out) elif format in ('png', 'jpeg', 'svg'): return ipd.Image(out) else: raise ValueError( '{} format not supported for plotting in console'.format( format)) except NameError: if filename is None: raise ValueError( 'If not in an IPython/Jupyter console and no filename is given, nothing will be rendered' )
# Try reading data in transfac format first. # TODO Refactor this code motif = Motif.read_transfac(StringIO(sequences), alphabet=logooptions.alphabet) prior = weblogolib.parse_prior(comp, motif.alphabet) data = weblogolib.LogoData.from_counts(motif.alphabet, motif, prior) except ValueError, motif_err: seqs = weblogolib.read_seq_data( StringIO(sequences), alphabet=logooptions.alphabet, ignore_lower_case=ignore_lower_case) prior = weblogolib.parse_prior(comp, seqs.alphabet) data = weblogolib.LogoData.from_seqs(seqs, prior) logoformat = weblogolib.LogoFormat(data, logooptions) format = form["format"].value weblogolib.formatters[format](data, logoformat, logo) except ValueError, err: errors.append(err.args) except IOError, err: errors.append(err.args) except RuntimeError, err: errors.append(err.args) if form_values.has_key("cmd_validate") or errors: send_form(controls, errors, htdocs_directory) return # # RETURN LOGO OVER HTTP