def generate_logo(seqfile, title): ''' Generate the sequence logo from the specified sequences. Args: seqfile (str): The path to a sequence file. ''' with open(seqfile, 'r') as fh: seqlen = len(fh.readline().rstrip('\n')) fh.seek(0) seqs = wl.read_seq_data(fh) data = wl.LogoData.from_seqs(seqs) options = wl.LogoOptions() options.title = title options.fineprint = '' #options.stack_width = 16 options.first_index = -1 * int(seqlen / 2) form = wl.LogoFormat(data, options) eps = wl.eps_formatter(data, form) eps_file = seqfile[:-4] + '.eps' with open(eps_file, 'wb') as fh: fh.write(eps)
def generate_base_weblogo(options, allsequences, output): import weblogolib as wl from weblogolib.colorscheme import ColorScheme, ColorGroup from corebio import seq_io from StringIO import StringIO ecliptic_color_scheme = ColorScheme([ ColorGroup('A', iwork_colors.blue), ColorGroup('C', iwork_colors.green), ColorGroup('G', iwork_colors.yellow), ColorGroup('UT', iwork_colors.red), ]) fastainput = StringIO(''.join('>%d\n%s\n' % s for s in enumerate(allsequences))) rna = wl.std_alphabets['rna'] seqs = wl.read_seq_data(fastainput, alphabet=rna) logo = wl.LogoData.from_seqs(seqs) logooptions = wl.LogoOptions() logooptions.unit_name = 'probability' logooptions.color_scheme = ecliptic_color_scheme logooptions.show_fineprint = False logooptions.first_index = -options.flanking_window logooptions.title = "Ecliptic" format = wl.LogoFormat(logo, logooptions) wl.pdf_formatter(logo, format, open(output, 'w'))
def readSequence(f, ip): if f == sys.stdin: fin = StringIO(sys.stdin.read()) else: fin = open(f, "r") seqs = read_seq_data(fin, ip) if seqs.alphabet != unambiguous_protein_alphabet: raise Exception("input sequences should be protein seqeunces") return seqs
def runCalculation(self): """ Load the fasta file and run the sequence entropy calcluation. """ # Calculate the sequence entropy of each column in a fasta file f = open(self.fasta_file,'r') self.data = wl.LogoData.from_seqs(wl.read_seq_data(f)) f.close()
def runCalculation(self): """ Load the fasta file and run the sequence entropy calcluation. """ # Calculate the sequence entropy of each column in a fasta file f = open(self.fasta_file, 'r') self.data = wl.LogoData.from_seqs(wl.read_seq_data(f)) f.close()
def generate_weblogo(seq_id, seq_strs): seqs = weblogolib.read_seq_data(io.StringIO('\n'.join(seq_strs)), input_parser=array_io.read) data = weblogolib.LogoData.from_seqs(seqs) options = weblogolib.LogoOptions() options.title = seq_id options.unit_name = "probability" format = weblogolib.LogoFormat(data, options) eps = weblogolib.eps_formatter(data, format) with open(os.path.join(WEBLOGO_PATH, seq_id + '.eps'), 'wb') as f: f.write(eps)
def generate_weblogo(file_path, seq_strs, unit_name=None): seqs = weblogolib.read_seq_data( io.StringIO('\n'.join(seq_strs)), input_parser=array_io.read, alphabet=corebio_seq.reduced_protein_alphabet) data = weblogolib.LogoData.from_seqs(seqs) options = weblogolib.LogoOptions(color_scheme=get_color_scheme()) if unit_name is not None: options.unit_name = unit_name format = weblogolib.LogoFormat(data, options) eps = weblogolib.eps_formatter(data, format) with open(file_path, 'wb') as f: f.write(eps)
def create_logo(input_seqs_fname, logo_fname, options): """ Create a logo plot png using weblogo from a fasta created with write_logo_input() """ with open(input_seqs_fname, "rU") as f: seqs = w.read_seq_data(f) data = w.LogoData.from_seqs(seqs) subprocess.check_call(("rm " + input_seqs_fname).split()) format = w.LogoFormat(data, options) with open(logo_fname, "w") as f: f.write(w.png_print_formatter(data, format))
def read_logodata(handle): """Get weblogo data for a sequence alignment. Returns a list of tuples: (posn, letter_counts, entropy, weight) """ seqs = weblogolib.read_seq_data(handle, alphabet=unambiguous_protein_alphabet) ldata = weblogolib.LogoData.from_seqs(seqs) letters = ldata.alphabet.letters() counts = ldata.counts.array logodata = [] for i, coldata, entropy, weight in zip(range(len(counts)), counts, ldata.entropy, ldata.weight): cnts = dict((let, int(cnt)) for let, cnt in zip(letters, coldata)) logodata.append((i + 1, cnts, entropy, weight)) return logodata
def _build_logodata_core(fin, fin_compos, fin_weight, options, second_data=None): motif_flag = False isCodon = False try: # Try reading data in transfac format first. from corebio.matrix import Motif motif = Motif.read_transfac(fin, alphabet=options.alphabet) motif_flag = True except ValueError, motif_err: # Failed reading Motif, try reading as multiple sequence data. if options.alphabet is True: isCodon = True options.alphabet = None seqs = read_seq_data( fin, options.input_parser.read, alphabet=options.alphabet, ignore_lower_case=options.ignore_lower_case )
def _build_logodata(options): motif_flag = False fin = options.fin if fin is None: from StringIO import StringIO fin = StringIO(sys.stdin.read()) try: # Try reading data in transfac format first. from corebio.matrix import Motif motif = Motif.read_transfac(fin, alphabet=options.alphabet) motif_flag = True except ValueError, motif_err: # Failed reading Motif, try reading as multiple sequence data. seqs = read_seq_data(fin, options.input_parser.read, alphabet=options.alphabet, ignore_lower_case=options.ignore_lower_case)
def _build_logodata(options): if options.input_parser != "transfac": seqs = read_seq_data(options.fin, options.input_parser.read, alphabet=options.alphabet, ignore_lower_case=options.ignore_lower_case) if options.reverse: seqs = SeqList([s.reverse() for s in seqs], seqs.alphabet) if options.complement: seqs = SeqList([Seq(s, seqs.alphabet).complement() for s in seqs], seqs.alphabet) prior = parse_prior(options.composition, seqs.alphabet, options.weight) data = LogoData.from_seqs(seqs, prior) else: from corebio.matrix import Motif if options.ignore_lower_case: raise ValueError( "error: option --ignore-lower-case incompatible with matrix input" ) #FIXME : implement if options.reverse: raise ValueError( "error: option --reverse incompatible with matrix input") #FIXME : implement if options.complement: raise ValueError( "error: option --complement incompatible with matrix input") motif = Motif.read_transfac(options.fin, alphabet=options.alphabet) prior = parse_prior(options.composition, motif.alphabet, options.weight) data = LogoData.from_counts(motif.alphabet, motif, prior) return data
def _build_logodata(options) : motif_flag=False fin = options.fin; if fin is None : from StringIO import StringIO fin = StringIO(sys.stdin.read()) try: # Try reading data in transfac format first. from corebio.matrix import Motif motif = Motif.read_transfac(fin, alphabet=options.alphabet) motif_flag = True except ValueError, motif_err : # Failed reading Motif, try reading as multiple sequence data. seqs = read_seq_data(fin, options.input_parser.read, alphabet=options.alphabet, ignore_lower_case = options.ignore_lower_case)
def make_logo(fasta_path, png_path, title, start_pos, counts = None): if counts: mat_counts = np.array([counts[l] for l in 'ACGT']).transpose() data = weblogolib.LogoData.from_counts(unambiguous_dna_alphabet, mat_counts) else: with open(fasta_path) as handle: seqs = weblogolib.read_seq_data(handle) data = weblogolib.LogoData.from_seqs(seqs) options = weblogolib.LogoOptions() options.logo_title = title options.resolution = 500 options.first_index = start_pos options.number_interval = 1 options.rotate_numbers = True options.color_scheme = nucleotide fmt = weblogolib.LogoFormat(data, options) with open(png_path, 'w') as handle: weblogolib.png_formatter(data, fmt, handle)
def mutect_weblogo_sub(sampN, inFileN, outFileN, pdfFileN): inFile = open(inFileN, 'r') inFile.readline() #comment line headerL = inFile.readline().rstrip().split('\t') idxH = {} for i in range(len(headerL)): idxH[headerL[i]] = i outFile = open(outFileN, 'w') for line in inFile: colL = line.rstrip().split('\t') context = colL[idxH['context']] ref = colL[idxH['ref_allele']] alt = colL[idxH['alt_allele']] status = colL[idxH['judgement']] if status == 'REJECT': continue head = context[:3] tail = context[-3:] context = head + ref + tail if ref not in ['C', 'T']: context = mybasic.rc(context) ref = mybasic.rc(ref) alt = mybasic.rc(alt) if ref == 'C' and alt == 'T': ## TMZ context only outFile.write('%s\n' % context) outFile.flush() outFile.close() fin = open(outFileN, 'r') seqs = weblogolib.read_seq_data(fin) data = weblogolib.LogoData.from_seqs(seqs) options = weblogolib.LogoOptions() options.show_fineprint = False options.first_index = -3 options.logo_title = sampN format = weblogolib.LogoFormat(data, options) fout = open(pdfFileN, 'w') weblogolib.pdf_formatter(data, format, fout)
def mutect_weblogo_sub(sampN, inFileN, outFileN, pdfFileN): inFile = open(inFileN, 'r') inFile.readline() #comment line headerL = inFile.readline().rstrip().split('\t') idxH = {} for i in range(len(headerL)): idxH[headerL[i]] = i outFile = open(outFileN,'w') for line in inFile: colL = line.rstrip().split('\t') context = colL[idxH['context']] ref = colL[idxH['ref_allele']] alt = colL[idxH['alt_allele']] status = colL[idxH['judgement']] if status == 'REJECT': continue head = context[:3] tail = context[-3:] context = head + ref + tail if ref not in ['C','T']: context = mybasic.rc(context) ref = mybasic.rc(ref) alt = mybasic.rc(alt) if ref == 'C' and alt == 'T':## TMZ context only outFile.write('%s\n' % context) outFile.flush() outFile.close() fin = open(outFileN,'r') seqs = weblogolib.read_seq_data(fin) data = weblogolib.LogoData.from_seqs(seqs) options = weblogolib.LogoOptions() options.show_fineprint = False options.first_index = -3 options.logo_title = sampN format = weblogolib.LogoFormat(data, options) fout = open(pdfFileN, 'w') weblogolib.pdf_formatter(data, format, fout)
def do_seqlogo(self, input_fasta, output_seqlogo, col_list): """ create sequence logo of given fasta file and then label it with col_list """ import weblogolib import corebio from weblogolib.colorscheme import ColorScheme from weblogolib.colorscheme import ColorGroup #assigning colors hydrophobicity = ColorScheme([ ColorGroup("RKDENQ", "blue"), ColorGroup("SGHTAP", "green"), ColorGroup("YVMCLFIW", "black"), ColorGroup("O", "dark orange") ]) #Here i changed this so that the column begins with 1 not 0 col_list2 = [i+1 for i in col_list] fin = open(input_fasta) seqs = weblogolib.read_seq_data(fin) seqs.alphabet = corebio.seq.Alphabet('ACDEFGHIKLMNPQRSTVWYO') data = weblogolib.LogoData.from_seqs(seqs) data.composition = 'equiprobable' options = weblogolib.LogoOptions() options.yaxis_scale = 4.5 options.yaxis_tic_interval = 2.25 options.annotate = col_list2 options.stack_width = 30 options.stacks_per_line = 35 options.scale_width = 'No' options.color_scheme = hydrophobicity format_logo = weblogolib.LogoFormat(data, options) fout = open(output_seqlogo, 'w') pdf_logo = weblogolib.pdf_formatter(data, format_logo) fout.write(pdf_logo) fout.close()
def main(htdocs_directory = None) : logooptions = weblogolib.LogoOptions() # A list of form fields. # The default for checkbox values must be False (irrespective of # the default in logooptions) since a checked checkbox returns 'true' # but an unchecked checkbox returns nothing. controls = [ Field( 'sequences', ''), Field( 'format', 'png', weblogolib.formatters.get , options=['png_print', 'png', 'jpeg', 'eps', 'pdf', 'svg', 'logodata'] , #TODO: Should copy list from __init__.formatters errmsg="Unknown format option."), Field( 'stacks_per_line', logooptions.stacks_per_line , int, errmsg='Invalid number of stacks per line.'), Field( 'stack_width','medium', weblogolib.std_sizes.get, options=['small', 'medium', 'large'], errmsg='Invalid logo size.'), Field( 'alphabet','alphabet_auto', alphabets.get, options=['alphabet_auto', 'alphabet_protein', 'alphabet_dna', 'alphabet_rna'], errmsg="Unknown sequence type."), Field( 'unit_name', 'bits', options=[ 'probability', 'bits', 'nats', 'kT', 'kJ/mol', 'kcal/mol']), Field( 'first_index', 1, int_or_none), Field( 'logo_start', '', int_or_none), Field( 'logo_end', '', int_or_none), Field( 'composition', 'comp_auto', composition.get, options=['comp_none','comp_auto','comp_equiprobable','comp_CG', 'comp_Celegans','comp_Dmelanogaster','comp_Ecoli', 'comp_Hsapiens','comp_Mmusculus','comp_Scerevisiae'], errmsg= "Illegal sequence composition."), Field( 'percentCG', '', float_or_none, errmsg="Invalid CG percentage."), Field( 'show_errorbars', False , truth), Field( 'logo_title', logooptions.logo_title ), Field( 'logo_label', logooptions.logo_label ), Field( 'show_xaxis', False, truth), Field( 'xaxis_label', logooptions.xaxis_label ), Field( 'show_yaxis', False, truth), Field( 'yaxis_label', logooptions.yaxis_label, string_or_none ), Field( 'yaxis_scale', logooptions.yaxis_scale , float_or_none, errmsg="The yaxis scale must be a positive number." ), Field( 'yaxis_tic_interval', logooptions.yaxis_tic_interval , float_or_none), Field( 'show_ends', False, truth), Field( 'show_fineprint', False , truth), Field( 'color_scheme', 'color_auto', color_schemes.get, options=color_schemes.keys() , errmsg = 'Unknown color scheme'), Field( 'color0', ''), Field( 'symbols0', ''), Field( 'desc0', ''), Field( 'color1', ''), Field( 'symbols1', ''), Field( 'desc1', ''), Field( 'color2', ''), Field( 'symbols2', ''), Field( 'desc2', ''), Field( 'color3', ''), Field( 'symbols3', ''), Field( 'desc3', ''), Field( 'color4', ''), Field( 'symbols4', ''), Field( 'desc4', ''), Field( 'ignore_lower_case', False, truth), Field( 'scale_width', False, truth), ] form = {} for c in controls : form[c.name] = c form_values = cgilib.FieldStorage() # Send default form? if len(form_values) == 0 or "cmd_reset" in form_values: # Load default truth values now. form['show_errorbars'].value = logooptions.show_errorbars form['show_xaxis'].value = logooptions.show_xaxis form['show_yaxis'].value = logooptions.show_yaxis form['show_ends'].value = logooptions.show_ends form['show_fineprint'].value = logooptions.show_fineprint form['scale_width'].value = logooptions.scale_width send_form(controls, htdocs_directory = htdocs_directory) return # Get form content for c in controls : c.value = form_values.getfirst( c.name, c.default) options_from_form = ['format', 'stacks_per_line', 'stack_width', 'alphabet', 'unit_name', 'first_index', 'logo_start','logo_end', 'composition', 'show_errorbars', 'logo_title', 'logo_label', 'show_xaxis', 'xaxis_label', 'show_yaxis', 'yaxis_label', 'yaxis_scale', 'yaxis_tic_interval', 'show_ends', 'show_fineprint', 'scale_width'] errors = [] for optname in options_from_form : try : value = form[optname].get_value() if value!=None : setattr(logooptions, optname, value) except ValueError as err : errors.append(err.args) # Construct custom color scheme custom = ColorScheme() for i in range(0,5) : color = form["color%d"%i].get_value() symbols = form["symbols%d"%i].get_value() desc = form["desc%d"%i].get_value() if color : try : custom.groups.append(weblogolib.ColorGroup(symbols, color, desc)) except ValueError as e: errors.append( ('color%d'%i, "Invalid color: %s" % color) ) if form["color_scheme"].value == 'color_custom' : logooptions.color_scheme = custom else : try : logooptions.color_scheme = form["color_scheme"].get_value() except ValueError as err: errors.append(err.args) sequences = None # FIXME: Ugly fix: Must check that sequence_file key exists # FIXME: Sending malformed or missing form keys should not cause a crash # sequences_file = form["sequences_file"] if "sequences_file" in form_values: sequences = form_values.getvalue("sequences_file") #assert type(sequences) == str if not sequences or len(sequences) ==0: sequences = form["sequences"].get_value() if not sequences or len(sequences) ==0: errors.append( ("sequences", "Please enter a multiple-sequence alignment in the box above, or select a file to upload.")) # If we have uncovered errors or we want the chance to edit the logo # ("cmd_edit" command from examples page) then we return the form now. # We do not proceed to the time consuming logo creation step unless # required by a 'create' or 'validate' command, and no errors have been # found yet. if errors or "cmd_edit" in form_values: send_form(controls, errors, htdocs_directory) return try : comp = form["composition"].get_value() percentCG = form["percentCG"].get_value() ignore_lower_case = ("ignore_lower_case" in form_values) if comp == 'percentCG': comp = str(percentCG / 100) from corebio.matrix import Motif try: # Try reading data in transfac format first. # TODO Refactor this code motif = Motif.read_transfac(StringIO( sequences), alphabet=logooptions.alphabet) prior = weblogolib.parse_prior( comp,motif.alphabet) data = weblogolib.LogoData.from_counts(motif.alphabet, motif, prior) except ValueError as motif_err: seqs = weblogolib.read_seq_data(StringIO( sequences), alphabet=logooptions.alphabet, ignore_lower_case=ignore_lower_case ) prior = weblogolib.parse_prior(comp, seqs.alphabet) data = weblogolib.LogoData.from_seqs(seqs, prior) logoformat = weblogolib.LogoFormat(data, logooptions) format = form["format"].value logo = weblogolib.formatters[format](data, logoformat) except ValueError as err: errors.append(err.args) except IOError as err: errors.append(err.args) except RuntimeError as err: errors.append(err.args) if errors or "cmd_validate" in form_values: send_form(controls, errors, htdocs_directory) return # # RETURN LOGO OVER HTTP # print("Content-Type:", mime_type[format]) # Content-Disposition: inline Open logo in browser window # Content-Disposition: attachment Download logo if "download" in form_values: print('Content-Disposition: attachment; ' \ 'filename="logo.%s"' % extension[format]) else: print('Content-Disposition: inline; ' \ 'filename="logo.%s"' % extension[format]) # Separate header from data print() # Finally, and at last, send the logo. if sys.version_info[0] >= 3: sys.stdout.buffer.write(logo) else: sys.stdout.write(logo)
def main(htdocs_directory=None): logooptions = weblogolib.LogoOptions() # A list of form fields. # The default for checkbox values must be False (irrespective of # the default in logooptions) since a checked checkbox returns 'true' # but an unchecked checkbox returns nothing. controls = [ Field("sequences", ""), Field( "format", "png", weblogolib.formatters.get, options=[ "png_print", "png", "jpeg", "eps", "pdf", "svg", "logodata", ], # TODO: Should copy list from __init__.formatters errmsg="Unknown format option.", ), Field("stacks_per_line", logooptions.stacks_per_line, int, errmsg="Invalid number of stacks per line."), Field( "stack_width", "medium", weblogolib.std_sizes.get, options=["small", "medium", "large"], errmsg="Invalid logo size.", ), Field( "alphabet", "alphabet_auto", alphabets.get, options=["alphabet_auto", "alphabet_protein", "alphabet_dna", "alphabet_rna"], errmsg="Unknown sequence type.", ), Field("unit_name", "bits", options=["probability", "bits", "nats", "kT", "kJ/mol", "kcal/mol"]), Field("first_index", 1, int_or_none), Field("logo_start", "", int_or_none), Field("logo_end", "", int_or_none), Field( "composition", "comp_auto", composition.get, options=[ "comp_none", "comp_auto", "comp_equiprobable", "comp_CG", "comp_Celegans", "comp_Dmelanogaster", "comp_Ecoli", "comp_Hsapiens", "comp_Mmusculus", "comp_Scerevisiae", ], errmsg="Illegal sequence composition.", ), Field("percentCG", "", float_or_none, errmsg="Invalid CG percentage."), Field("show_errorbars", False, truth), Field("logo_title", logooptions.logo_title), Field("logo_label", logooptions.logo_label), Field("show_xaxis", False, truth), Field("xaxis_label", logooptions.xaxis_label), Field("show_yaxis", False, truth), Field("yaxis_label", logooptions.yaxis_label, string_or_none), Field( "yaxis_scale", logooptions.yaxis_scale, float_or_none, errmsg="The yaxis scale must be a positive number." ), Field("yaxis_tic_interval", logooptions.yaxis_tic_interval, float_or_none), Field("show_ends", False, truth), Field("show_fineprint", False, truth), Field( "color_scheme", "color_auto", color_schemes.get, options=color_schemes.keys(), errmsg="Unknown color scheme" ), Field("color0", ""), Field("symbols0", ""), Field("desc0", ""), Field("color1", ""), Field("symbols1", ""), Field("desc1", ""), Field("color2", ""), Field("symbols2", ""), Field("desc2", ""), Field("color3", ""), Field("symbols3", ""), Field("desc3", ""), Field("color4", ""), Field("symbols4", ""), Field("desc4", ""), Field("ignore_lower_case", False, truth), Field("scale_width", False, truth), ] form = {} for c in controls: form[c.name] = c form_values = cgilib.FieldStorage() # Send default form? if len(form_values) == 0 or "cmd_reset" in form_values: # Load default truth values now. form["show_errorbars"].value = logooptions.show_errorbars form["show_xaxis"].value = logooptions.show_xaxis form["show_yaxis"].value = logooptions.show_yaxis form["show_ends"].value = logooptions.show_ends form["show_fineprint"].value = logooptions.show_fineprint form["scale_width"].value = logooptions.scale_width send_form(controls, htdocs_directory=htdocs_directory) return # Get form content for c in controls: c.value = form_values.getfirst(c.name, c.default) options_from_form = [ "format", "stacks_per_line", "stack_width", "alphabet", "unit_name", "first_index", "logo_start", "logo_end", "composition", "show_errorbars", "logo_title", "logo_label", "show_xaxis", "xaxis_label", "show_yaxis", "yaxis_label", "yaxis_scale", "yaxis_tic_interval", "show_ends", "show_fineprint", "scale_width", ] errors = [] for optname in options_from_form: try: value = form[optname].get_value() if value != None: setattr(logooptions, optname, value) except ValueError as err: errors.append(err.args) # Construct custom color scheme custom = ColorScheme() for i in range(0, 5): color = form["color%d" % i].get_value() symbols = form["symbols%d" % i].get_value() desc = form["desc%d" % i].get_value() if color: try: custom.groups.append(weblogolib.ColorGroup(symbols, color, desc)) except ValueError as e: errors.append(("color%d" % i, "Invalid color: %s" % color)) if form["color_scheme"].value == "color_custom": logooptions.color_scheme = custom else: try: logooptions.color_scheme = form["color_scheme"].get_value() except ValueError as err: errors.append(err.args) sequences = None # FIXME: Ugly fix: Must check that sequence_file key exists # FIXME: Sending malformed or missing form keys should not cause a crash # sequences_file = form["sequences_file"] if "sequences_file" in form_values: sequences = form_values.getvalue("sequences_file") # assert type(sequences) == str if not sequences or len(sequences) == 0: sequences = form["sequences"].get_value() # If a user tries to paste a very large file into sequence textarea, # then WebLogo runs very slow for no apparently good reason. (Might be client side bug?) # So we limit the maximum sequence size. # Form field also limits size, but not necessarly respected. Also can truncate data # without warning, so we'll set textarea maximum to be larger than MAX_SEQUENCE_SIZE SEQUENCES_MAXLENGTH = 100000 if len(sequences) > SEQUENCES_MAXLENGTH: errors.append(("sequences", "Sequence data too large for text input. Use file upload instead.")) controls[0] = Field("sequences", "") if not sequences or len(sequences) == 0: errors.append( ("sequences", "Please enter a multiple-sequence alignment in the box above, or select a file to upload.") ) # If we have uncovered errors or we want the chance to edit the logo # ("cmd_edit" command from examples page) then we return the form now. # We do not proceed to the time consuming logo creation step unless # required by a 'create' or 'validate' command, and no errors have been # found yet. if errors or "cmd_edit" in form_values: send_form(controls, errors, htdocs_directory) return try: comp = form["composition"].get_value() percentCG = form["percentCG"].get_value() ignore_lower_case = "ignore_lower_case" in form_values if comp == "percentCG": comp = str(percentCG / 100) from corebio.matrix import Motif try: # Try reading data in transfac format first. # TODO Refactor this code motif = Motif.read_transfac(StringIO(sequences), alphabet=logooptions.alphabet) prior = weblogolib.parse_prior(comp, motif.alphabet) data = weblogolib.LogoData.from_counts(motif.alphabet, motif, prior) except ValueError as motif_err: seqs = weblogolib.read_seq_data( StringIO(sequences), alphabet=logooptions.alphabet, ignore_lower_case=ignore_lower_case ) prior = weblogolib.parse_prior(comp, seqs.alphabet) data = weblogolib.LogoData.from_seqs(seqs, prior) logoformat = weblogolib.LogoFormat(data, logooptions) format = form["format"].value logo = weblogolib.formatters[format](data, logoformat) except ValueError as err: errors.append(err.args) except IOError as err: errors.append(err.args) except RuntimeError as err: errors.append(err.args) if errors or "cmd_validate" in form_values: send_form(controls, errors, htdocs_directory) return # # RETURN LOGO OVER HTTP # print("Content-Type:", mime_type[format]) # Content-Disposition: inline Open logo in browser window # Content-Disposition: attachment Download logo if "download" in form_values: print("Content-Disposition: attachment; " 'filename="logo.%s"' % extension[format]) else: print("Content-Disposition: inline; " 'filename="logo.%s"' % extension[format]) # Separate header from data print() # Finally, and at last, send the logo. if sys.version_info[0] >= 3: sys.stdout.buffer.write(logo) else: sys.stdout.write(logo)
comp = form["composition"].get_value() percentCG = form["percentCG"].get_value() ignore_lower_case = form_values.has_key("ignore_lower_case") if comp=='percentCG': comp = str(percentCG/100) from corebio.matrix import Motif try: # Try reading data in transfac format first. # TODO Refactor this code motif = Motif.read_transfac(StringIO( sequences), alphabet=logooptions.alphabet) prior = weblogolib.parse_prior( comp,motif.alphabet) data = weblogolib.LogoData.from_counts(motif.alphabet, motif, prior) except ValueError, motif_err : seqs = weblogolib.read_seq_data(StringIO( sequences), alphabet=logooptions.alphabet, ignore_lower_case=ignore_lower_case ) prior = weblogolib.parse_prior(comp, seqs.alphabet) data = weblogolib.LogoData.from_seqs(seqs, prior) logoformat = weblogolib.LogoFormat(data, logooptions) format = form["format"].value weblogolib.formatters[format](data, logoformat, logo) except ValueError, err : errors.append( err.args ) except IOError, err : errors.append( err.args) except RuntimeError, err : errors.append( err.args ) if form_values.has_key("cmd_validate") or errors :
if comp == 'percentCG': comp = str(percentCG / 100) from corebio.matrix import Motif try: # Try reading data in transfac format first. # TODO Refactor this code motif = Motif.read_transfac(seq_file, alphabet=logooptions.alphabet) prior = weblogolib.parse_prior(comp, motif.alphabet) data = weblogolib.LogoData.from_counts(motif.alphabet, motif, prior) except ValueError as motif_err: seqs = weblogolib.read_seq_data( seq_file, alphabet=logooptions.alphabet, ignore_lower_case=ignore_lower_case) prior = weblogolib.parse_prior(comp, seqs.alphabet) data = weblogolib.LogoData.from_seqs(seqs, prior) logoformat = weblogolib.LogoFormat(data, logooptions) format = form["format"].value logo = weblogolib.formatters[format](data, logoformat) except ValueError as err: errors.append(err.args) except IOError as err: errors.append(err.args) except RuntimeError as err: errors.append(err.args) if errors or "cmd_validate" in form_values:
ignore_lower_case = form_values.has_key("ignore_lower_case") if comp == 'percentCG': comp = str(percentCG / 100) from corebio.matrix import Motif try: # Try reading data in transfac format first. # TODO Refactor this code motif = Motif.read_transfac(StringIO(sequences), alphabet=logooptions.alphabet) prior = weblogolib.parse_prior(comp, motif.alphabet) data = weblogolib.LogoData.from_counts(motif.alphabet, motif, prior) except ValueError, motif_err: seqs = weblogolib.read_seq_data( StringIO(sequences), alphabet=logooptions.alphabet, ignore_lower_case=ignore_lower_case) prior = weblogolib.parse_prior(comp, seqs.alphabet) data = weblogolib.LogoData.from_seqs(seqs, prior) logoformat = weblogolib.LogoFormat(data, logooptions) format = form["format"].value weblogolib.formatters[format](data, logoformat, logo) except ValueError, err: errors.append(err.args) except IOError, err: errors.append(err.args) except RuntimeError, err: errors.append(err.args) if form_values.has_key("cmd_validate") or errors:
def main(htdocs_directory=None): logooptions = weblogolib.LogoOptions() # A list of form fields. # The default for checkbox values must be False (irrespective of # the default in logooptions) since a checked checkbox returns 'true' # but an unchecked checkbox returns nothing. controls = [ Field('sequences', ''), Field( 'format', 'png', weblogolib.formatters.get, options=[ 'png_print', 'png', 'jpeg', 'eps', 'pdf', 'svg', 'logodata' ], #TODO: Should copy list from __init__.formatters errmsg="Unknown format option."), Field('stacks_per_line', logooptions.stacks_per_line, int, errmsg='Invalid number of stacks per line.'), Field('stack_width', 'medium', weblogolib.std_sizes.get, options=['small', 'medium', 'large'], errmsg='Invalid logo size.'), Field('alphabet', 'alphabet_auto', alphabets.get, options=[ 'alphabet_auto', 'alphabet_protein', 'alphabet_dna', 'alphabet_rna' ], errmsg="Unknown sequence type."), Field('unit_name', 'bits', options=[ 'probability', 'bits', 'nats', 'kT', 'kJ/mol', 'kcal/mol' ]), Field('first_index', 1, int_or_none), Field('logo_start', '', int_or_none), Field('logo_end', '', int_or_none), Field('composition', 'comp_auto', composition.get, options=[ 'comp_none', 'comp_auto', 'comp_equiprobable', 'comp_CG', 'comp_Celegans', 'comp_Dmelanogaster', 'comp_Ecoli', 'comp_Hsapiens', 'comp_Mmusculus', 'comp_Scerevisiae' ], errmsg="Illegal sequence composition."), Field('percentCG', '', float_or_none, errmsg="Invalid CG percentage."), Field('show_errorbars', False, truth), Field('logo_title', logooptions.logo_title), Field('logo_label', logooptions.logo_label), Field('show_xaxis', False, truth), Field('xaxis_label', logooptions.xaxis_label), Field('show_yaxis', False, truth), Field('yaxis_label', logooptions.yaxis_label, string_or_none), Field('yaxis_scale', logooptions.yaxis_scale, float_or_none, errmsg="The yaxis scale must be a positive number."), Field('yaxis_tic_interval', logooptions.yaxis_tic_interval, float_or_none), Field('show_ends', False, truth), Field('show_fineprint', False, truth), Field('color_scheme', 'color_auto', color_schemes.get, options=color_schemes.keys(), errmsg='Unknown color scheme'), Field('color0', ''), Field('symbols0', ''), Field('desc0', ''), Field('color1', ''), Field('symbols1', ''), Field('desc1', ''), Field('color2', ''), Field('symbols2', ''), Field('desc2', ''), Field('color3', ''), Field('symbols3', ''), Field('desc3', ''), Field('color4', ''), Field('symbols4', ''), Field('desc4', ''), Field('ignore_lower_case', False, truth), Field('scale_width', False, truth), ] form = {} for c in controls: form[c.name] = c form_values = cgilib.FieldStorage() # Send default form? if len(form_values) == 0 or "cmd_reset" in form_values: # Load default truth values now. form['show_errorbars'].value = logooptions.show_errorbars form['show_xaxis'].value = logooptions.show_xaxis form['show_yaxis'].value = logooptions.show_yaxis form['show_ends'].value = logooptions.show_ends form['show_fineprint'].value = logooptions.show_fineprint form['scale_width'].value = logooptions.scale_width send_form(controls, htdocs_directory=htdocs_directory) return # Get form content for c in controls: c.value = form_values.getfirst(c.name, c.default) options_from_form = [ 'format', 'stacks_per_line', 'stack_width', 'alphabet', 'unit_name', 'first_index', 'logo_start', 'logo_end', 'composition', 'show_errorbars', 'logo_title', 'logo_label', 'show_xaxis', 'xaxis_label', 'show_yaxis', 'yaxis_label', 'yaxis_scale', 'yaxis_tic_interval', 'show_ends', 'show_fineprint', 'scale_width' ] errors = [] for optname in options_from_form: try: value = form[optname].get_value() if value != None: setattr(logooptions, optname, value) except ValueError as err: errors.append(err.args) # Construct custom color scheme custom = ColorScheme() for i in range(0, 5): color = form["color%d" % i].get_value() symbols = form["symbols%d" % i].get_value() desc = form["desc%d" % i].get_value() if color: try: custom.groups.append( weblogolib.ColorGroup(symbols, color, desc)) except ValueError as e: errors.append(('color%d' % i, "Invalid color: %s" % color)) if form["color_scheme"].value == 'color_custom': logooptions.color_scheme = custom else: try: logooptions.color_scheme = form["color_scheme"].get_value() except ValueError as err: errors.append(err.args) sequences = None # FIXME: Ugly fix: Must check that sequence_file key exists # FIXME: Sending malformed or missing form keys should not cause a crash # sequences_file = form["sequences_file"] if "sequences_file" in form_values: sequences = form_values.getvalue("sequences_file") #assert type(sequences) == str if not sequences or len(sequences) == 0: sequences = form["sequences"].get_value() if not sequences or len(sequences) == 0: errors.append(( "sequences", "Please enter a multiple-sequence alignment in the box above, or select a file to upload." )) # If we have uncovered errors or we want the chance to edit the logo # ("cmd_edit" command from examples page) then we return the form now. # We do not proceed to the time consuming logo creation step unless # required by a 'create' or 'validate' command, and no errors have been # found yet. if errors or "cmd_edit" in form_values: send_form(controls, errors, htdocs_directory) return try: comp = form["composition"].get_value() percentCG = form["percentCG"].get_value() ignore_lower_case = ("ignore_lower_case" in form_values) if comp == 'percentCG': comp = str(percentCG / 100) from corebio.matrix import Motif try: # Try reading data in transfac format first. # TODO Refactor this code motif = Motif.read_transfac(StringIO(sequences), alphabet=logooptions.alphabet) prior = weblogolib.parse_prior(comp, motif.alphabet) data = weblogolib.LogoData.from_counts(motif.alphabet, motif, prior) except ValueError as motif_err: seqs = weblogolib.read_seq_data( StringIO(sequences), alphabet=logooptions.alphabet, ignore_lower_case=ignore_lower_case) prior = weblogolib.parse_prior(comp, seqs.alphabet) data = weblogolib.LogoData.from_seqs(seqs, prior) logoformat = weblogolib.LogoFormat(data, logooptions) format = form["format"].value logo = weblogolib.formatters[format](data, logoformat) except ValueError as err: errors.append(err.args) except IOError as err: errors.append(err.args) except RuntimeError as err: errors.append(err.args) if errors or "cmd_validate" in form_values: send_form(controls, errors, htdocs_directory) return # # RETURN LOGO OVER HTTP # print("Content-Type:", mime_type[format]) # Content-Disposition: inline Open logo in browser window # Content-Disposition: attachment Download logo if "download" in form_values: print('Content-Disposition: attachment; ' \ 'filename="logo.%s"' % extension[format]) else: print('Content-Disposition: inline; ' \ 'filename="logo.%s"' % extension[format]) # Separate header from data print() # Finally, and at last, send the logo. if sys.version_info[0] >= 3: sys.stdout.buffer.write(logo) else: sys.stdout.write(logo)
args = parser.parse_args() tree_yielder = dendropy.Tree.yield_from_files( files=[args.trees_path], schema="newick", preserve_underscores=True ) naive_seqs = [tree.find_node_with_taxon_label("naive").annotations.get_value("ancestral") for tree in tree_yielder] aa_naive_seqs = [translate(seq) for seq in naive_seqs] aa_naive_seqs_d = {("naive" + str(i)): seq for i, seq in enumerate(aa_naive_seqs)} write_to_fasta(aa_naive_seqs_d, args.output_base + ".fasta") with open(args.output_base + ".fasta", "rU") as f: seqs = w.read_seq_data(f) data = w.LogoData.from_seqs(seqs) subprocess.check_call(("rm " + args.output_base + ".fasta").split()) options = w.LogoOptions() options.unit_name = "probability" options.yaxis_label = "Probability" options.xaxis_label = "Site Position" options.show_fineprint = False options.stacks_per_line = 500 options.tic_length = 10 format = w.LogoFormat(data, options) with open(args.output_base + ".png", 'w') as f: f.write(w.png_print_formatter(data, format))