예제 #1
0
def weblogoPOIM(logofile, poim, max_len):
    """instead of plotting the POIM heatmap, create a weblogo from the 1st-degree poim"""
    warnings.filterwarnings('ignore', ' This call to matplotlib.use()*')
    from  corebio.data import rna_letters, dna_letters, amino_acid_letters
    from weblogolib import LogoData, LogoOptions, LogoFormat, classic, png_print_formatter
    #print "WEBLOGO!"
    #print "Writing ", logofile
    #print poim[0]
    positive_logo = []
    negative_logo = []
    for i in xrange(len(poim[0])):
        positive_logo.append([])
        negative_logo.append([])
        for j in xrange(len(poim[0][i])):
            if poim[0][i][j] < 0:
                positive_logo[i].append(0)
                negative_logo[i].append(poim[0][i][j] * -10000)
            else:
                negative_logo[i].append(0)
                positive_logo[i].append(poim[0][i][j] * 1000)
    #print "Positive logo: ", positive_logo
    #print "Negative logo: ", negative_logo
    pos_data = LogoData.from_counts('ACGT', numpy.array(positive_logo).T, None)
    neg_data = LogoData.from_counts("ACGT", numpy.array(negative_logo).T, None)
    neg_opt = LogoOptions()
    neg_opt.fineprint += " from KIRMES POIM data"
    #logoopt.number_interval = 5
    neg_opt.small_fontsize = 4 
    neg_opt.title_fontsize = 8
    neg_opt.scale_width = False
    title = os.path.split(logofile)[1]
    title = title[:title.rfind(".")]
    if "_" in title:
        title = title[title.rfind("_") + 1:]
    neg_opt.logo_title = title + " Negative Logo"
    neg_format = LogoFormat(neg_data, neg_opt)
    pos_opt = LogoOptions()
    #pos_opt.show_ends = True
    pos_opt.scale_width = False
    pos_opt.logo_title = title + " Positive Sequence Logo"
    pos_opt.show_fineprint = False
    pos_opt.color_scheme = classic
    pos_format = LogoFormat(pos_data, pos_opt)
    neg_logo = open(logofile + "n.png", 'w')
    png_print_formatter(neg_data, neg_format, neg_logo)
    neg_logo.close()
    pos_logo = open(logofile + "p.png", 'w')
    png_print_formatter(pos_data, pos_format, pos_logo)
    pos_logo.close()
    concatPNG(logofile, (logofile + "p.png", logofile + "n.png"))
    os.remove(logofile + "n.png")
    os.remove(logofile + "p.png")
예제 #2
0
    def write_weblogo(self, filepath):
        matrix_tuple = []
        for distribution in self.values:
            matrix_tuple.append(tuple(distribution))

        dataArray = np.array(tuple(matrix_tuple))

        alph = Alphabet(''.join(self.alphabet))

        weblogoData = LogoData.from_counts(alph, dataArray)
        weblogoOptions = LogoOptions(color_scheme=classic)
        weblogoOptions.title = "PWM"
        weblogoFormat = LogoFormat(weblogoData, weblogoOptions)
        weblogo_file = open(filepath, 'w')
        weblogo_file.write(png_print_formatter(weblogoData, weblogoFormat))
        weblogo_file.close()
예제 #3
0
def outputMotif(theta_motif, theta_background_matrix, lambda_motif, logev, k, outstr):
    from weblogolib import LogoData, LogoOptions, LogoFormat, png_formatter, eps_formatter, unambiguous_dna_alphabet
    _pv_format = "%3.1fe%+04.0f"
    f_string = sprint_logx(log(lambda_motif), 1, _pv_format)
    g_string = sprint_logx(logev, 1, _pv_format)
    print(("Motif {0:s} had a fraction of {1:s}").format(str(k), f_string))
    print(("Motif {0:s} had an E-value of {1:s}").format(str(k), g_string))
    print 'Saving motif as a png...'
    data = LogoData.from_counts(counts=theta_motif,alphabet=unambiguous_dna_alphabet)#,prior=theta_background_matrix[0])#Does prior mess things up?
    options = LogoOptions()
    options.title = 'Motif'
    forma = LogoFormat(data, options)
    fout = open(outstr + "Motif_" + str(k) + '.png', 'w')
    png_formatter(data, forma, fout)
    fout.close()
    print 'Saving motif as an eps...'
    fout = open(outstr + "Motif_" + str(k) + '.eps', 'w')
    eps_formatter(data, forma, fout)
    fout.close()
예제 #4
0
def outputMotif(theta_motif, theta_background_matrix, lambda_motif, logev, k, outstr):
    from weblogolib import LogoData, LogoOptions, LogoFormat, png_formatter, eps_formatter, unambiguous_dna_alphabet
    _pv_format = "%3.1fe%+04.0f"
    f_string = sprint_logx(log(lambda_motif), 1, _pv_format)
    g_string = sprint_logx(logev, 1, _pv_format)
    print(("Motif {0:s} had a fraction of {1:s}").format(str(k), f_string))
    print(("Motif {0:s} had an E-value of {1:s}").format(str(k), g_string))
    print 'Saving motif as a png...'
    data = LogoData.from_counts(counts=theta_motif,alphabet=unambiguous_dna_alphabet)#,prior=theta_background_matrix[0])#Does prior mess things up?
    options = LogoOptions()
    options.title = 'Motif'
    forma = LogoFormat(data, options)
    fout = open(outstr + "Motif_" + str(k) + '.png', 'w')
    png_formatter(data, forma, fout)
    fout.close()
    print 'Saving motif as an eps...'
    fout = open(outstr + "Motif_" + str(k) + '.eps', 'w')
    eps_formatter(data, forma, fout)
    fout.close()
예제 #5
0
def createSeqLogo(pfm, filename, fformat='eps'):
    """Create sequence logo for an individual cRBM motif.

    Parameters
    -----------
    pfm : numpy-array
        2D numpy array representing a PFM. See :meth:`CRBM.getPFMs`
    path : str
        Output folder.
    fformat : str
        File format for storing the sequence logos. Default: 'eps'.
    """
    alph = Alphabet('ACGT')
    weblogoData = LogoData.from_counts(
        alph, pfm.T)  #, c)#, learner.c.get_value().reshape(-1))
    weblogoOptions = LogoOptions(color_scheme=classic)
    weblogoFormat = LogoFormat(weblogoData, weblogoOptions)
    content = formatters[fformat](weblogoData, weblogoFormat)
    f = open(filename, "wb")
    f.write(content)
    f.close()
예제 #6
0
파일: _cli.py 프로젝트: snehamitra/NPLB
def _build_option_parser():
    defaults = LogoOptions()
    parser = DeOptionParser(
        usage="%prog [options]  < sequence_data.fa > sequence_logo.eps",
        description=description,
        version=release_description,
        add_verbose_options=False)

    io_grp = OptionGroup(
        parser,
        "Input/Output Options",
    )
    data_grp = OptionGroup(
        parser,
        "Logo Data Options",
    )
    trans_grp = OptionGroup(parser, "Transformations",
                            "Optional transformations of the sequence data.")

    format_grp = OptionGroup(
        parser, "Logo Format Options",
        "These options control the format and display of the logo.")
    color_grp = OptionGroup(
        parser, "Color Options",
        "Colors can be specified using CSS2 syntax. e.g. 'red', '#FF0000', etc."
    )
    advanced_grp = OptionGroup(
        parser, "Advanced Format Options",
        "These options provide fine control over the display of the logo. ")
    server_grp = OptionGroup(parser, "WebLogo Server",
                             "Run a standalone webserver on a local port.")

    parser.add_option_group(io_grp)
    parser.add_option_group(data_grp)
    parser.add_option_group(trans_grp)
    parser.add_option_group(format_grp)
    parser.add_option_group(color_grp)
    parser.add_option_group(advanced_grp)
    parser.add_option_group(server_grp)

    # ========================== IO OPTIONS ==========================

    io_grp.add_option("-f",
                      "--fin",
                      dest="fin",
                      action="store",
                      type="file_in",
                      default=None,
                      help="Sequence input file (default: stdin)",
                      metavar="FILENAME")

    # Add position weight matrix formats to input parsers by hand
    fin_choices = dict(seq_io.format_names())
    fin_choices['transfac'] = 'transfac'

    io_grp.add_option(
        "-D",
        "--datatype",
        dest="input_parser",
        action="store",
        type="dict",
        default=seq_io,
        choices=fin_choices,  # seq_io.format_names(),
        help=
        "Type of multiple sequence alignment or position weight matrix file: (%s, transfac)"
        % ', '.join([f.names[0] for f in seq_io.formats]),
        metavar="FORMAT")

    io_grp.add_option("-o",
                      "--fout",
                      dest="fout",
                      type="file_out",
                      default=sys.stdout,
                      help="Output file (default: stdout)",
                      metavar="FILENAME")

    io_grp.add_option(
        "-F",
        "--format",
        dest="formatter",
        action="store",
        type="dict",
        choices=formatters,
        metavar="FORMAT",
        help=
        "Format of output: eps (default), png, png_print, pdf, jpeg, svg, logodata",
        default=default_formatter)

    # ========================== Data OPTIONS ==========================

    data_grp.add_option(
        "-A",
        "--sequence-type",
        dest="alphabet",
        action="store",
        type="dict",
        choices=std_alphabets,
        help="The type of sequence data: 'protein', 'rna' or 'dna'.",
        metavar="TYPE")

    data_grp.add_option(
        "-a",
        "--alphabet",
        dest="alphabet",
        action="store",
        help="The set of symbols to count, e.g. 'AGTC'. "
        "All characters not in the alphabet are ignored. "
        "If neither the alphabet nor sequence-type are specified then weblogo will examine the input data and make an educated guess. "
        "See also --sequence-type, --ignore-lower-case")

    data_grp.add_option(
        "-U",
        "--units",
        dest="unit_name",
        action="store",
        choices=std_units.keys(),
        type="choice",
        default=defaults.unit_name,
        help=
        "A unit of entropy ('bits' (default), 'nats', 'digits'), or a unit of free energy ('kT', 'kJ/mol', 'kcal/mol'), or 'probability' for probabilities",
        metavar="NUMBER")

    data_grp.add_option(
        "",
        "--composition",
        dest="composition",
        action="store",
        type="string",
        default="auto",
        help=
        "The expected composition of the sequences: 'auto' (default), 'equiprobable', 'none' (do not perform any compositional adjustment), a CG percentage, a species name (e.g. 'E. coli', 'H. sapiens'), or an explicit distribution (e.g. \"{'A':10, 'C':40, 'G':40, 'T':10}\"). The automatic option uses a typical distribution for proteins and equiprobable distribution for everything else. ",
        metavar="COMP.")

    data_grp.add_option(
        "",
        "--weight",
        dest="weight",
        action="store",
        type="float",
        default=None,
        help="The weight of prior data.  Default depends on alphabet length",
        metavar="NUMBER")

    data_grp.add_option(
        "-i",
        "--first-index",
        dest="first_index",
        action="store",
        type="int",
        default=1,
        help="Index of first position in sequence data (default: 1)",
        metavar="INDEX")

    data_grp.add_option("-l",
                        "--lower",
                        dest="logo_start",
                        action="store",
                        type="int",
                        help="Lower bound of sequence to display",
                        metavar="INDEX")

    data_grp.add_option("-u",
                        "--upper",
                        dest="logo_end",
                        action="store",
                        type="int",
                        help="Upper bound of sequence to display",
                        metavar="INDEX")

    # ========================== Transformation OPTIONS ==========================

    # FIXME Add test?
    trans_grp.add_option(
        "",
        "--ignore-lower-case",
        dest="ignore_lower_case",
        action="store_true",
        default=False,
        help=
        "Disregard lower case letters and only count upper case letters in sequences."
    )

    trans_grp.add_option(
        "",
        "--reverse",
        dest="reverse",
        action="store_true",
        default=False,
        help="reverse sequences",
    )

    trans_grp.add_option(
        "",
        "--complement",
        dest="complement",
        action="store_true",
        default=False,
        help="complement DNA sequences",
    )

    # ========================== FORMAT OPTIONS ==========================

    format_grp.add_option(
        "-s",
        "--size",
        dest="stack_width",
        action="store",
        type="dict",
        choices=std_sizes,
        metavar="LOGOSIZE",
        default=defaults.stack_width,
        help="Specify a standard logo size (small, medium (default), large)")

    format_grp.add_option(
        "-n",
        "--stacks-per-line",
        dest="stacks_per_line",
        action="store",
        type="int",
        help="Maximum number of logo stacks per logo line. (default: %default)",
        default=defaults.stacks_per_line,
        metavar="COUNT")

    format_grp.add_option("-t",
                          "--title",
                          dest="logo_title",
                          action="store",
                          type="string",
                          help="Logo title text.",
                          default=defaults.logo_title,
                          metavar="TEXT")

    format_grp.add_option("",
                          "--label",
                          dest="logo_label",
                          action="store",
                          type="string",
                          help="A figure label, e.g. '2a'",
                          default=defaults.logo_label,
                          metavar="TEXT")

    format_grp.add_option(
        "-X",
        "--show-xaxis",
        action="store",
        type="boolean",
        default=defaults.show_xaxis,
        metavar="YES/NO",
        help="Display sequence numbers along x-axis? (default: %default)")

    format_grp.add_option("-x",
                          "--xlabel",
                          dest="xaxis_label",
                          action="store",
                          type="string",
                          default=defaults.xaxis_label,
                          help="X-axis label",
                          metavar="TEXT")

    format_grp.add_option(
        "",
        "--annotate",
        dest="annotate",
        action="store",
        type="string",
        default=None,
        help=
        "A comma separated list of custom stack annotations, e.g. '1,3,4,5,6,7'.  Annotation list must be same length as sequences.",
        metavar="TEXT")

    format_grp.add_option(
        "-S",
        "--yaxis",
        dest="yaxis_scale",
        action="store",
        type="float",
        help=
        "Height of yaxis in units. (Default: Maximum value with uninformative prior.)",
        metavar="UNIT")

    format_grp.add_option(
        "-Y",
        "--show-yaxis",
        action="store",
        type="boolean",
        dest="show_yaxis",
        default=defaults.show_yaxis,
        metavar="YES/NO",
        help="Display entropy scale along y-axis? (default: %default)")

    format_grp.add_option(
        "-y",
        "--ylabel",
        dest="yaxis_label",
        action="store",
        type="string",
        help="Y-axis label (default depends on plot type and units)",
        metavar="TEXT")

    format_grp.add_option(
        "-E",
        "--show-ends",
        action="store",
        type="boolean",
        default=defaults.show_ends,
        metavar="YES/NO",
        help="Label the ends of the sequence? (default: %default)")

    format_grp.add_option("-P",
                          "--fineprint",
                          dest="fineprint",
                          action="store",
                          type="string",
                          default=defaults.fineprint,
                          help="The fine print (default: weblogo version)",
                          metavar="TEXT")

    format_grp.add_option("",
                          "--ticmarks",
                          dest="yaxis_tic_interval",
                          action="store",
                          type="float",
                          default=defaults.yaxis_tic_interval,
                          help="Distance between ticmarks (default: %default)",
                          metavar="NUMBER")

    format_grp.add_option("",
                          "--errorbars",
                          dest="show_errorbars",
                          action="store",
                          type="boolean",
                          default=defaults.show_errorbars,
                          metavar="YES/NO",
                          help="Display error bars? (default: %default)")

    format_grp.add_option(
        "",
        "--reverse-stacks",
        dest="reverse_stacks",
        action="store",
        type="boolean",
        default=defaults.show_errorbars,
        metavar="YES/NO",
        help="Draw stacks with largest letters on top? (default: %default)")

    # ========================== Color OPTIONS ==========================
    # TODO: Future Feature
    # color_grp.add_option( "-K", "--color-key",
    #    dest= "show_color_key",
    #    action="store",
    #    type = "boolean",
    #    default= defaults.show_color_key,
    #    metavar = "YES/NO",
    #    help="Display a color key (default: %default)")

    color_scheme_choices = std_color_schemes.keys()
    color_scheme_choices.sort()
    color_grp.add_option( "-c", "--color-scheme",
        dest="color_scheme",
        action="store",
        type ="dict",
        choices = std_color_schemes,
        metavar = "SCHEME",
        default = None, # Auto
        help="Specify a standard color scheme (%s)" % \
            ", ".join(color_scheme_choices) )

    color_grp.add_option(
        "-C",
        "--color",
        dest="colors",
        action="append",
        metavar="COLOR SYMBOLS DESCRIPTION ",
        nargs=3,
        default=[],
        help=
        "Specify symbol colors, e.g. --color black AG 'Purine' --color red TC 'Pyrimidine' "
    )

    color_grp.add_option("",
                         "--default-color",
                         dest="default_color",
                         action="store",
                         metavar="COLOR",
                         default=defaults.default_color,
                         help="Symbol color if not otherwise specified.")

    # ========================== Advanced options =========================

    advanced_grp.add_option("-W",
                            "--stack-width",
                            dest="stack_width",
                            action="store",
                            type="float",
                            default=defaults.stack_width,
                            help="Width of a logo stack (default: %s)" %
                            defaults.stack_width,
                            metavar="POINTS")

    advanced_grp.add_option(
        "",
        "--aspect-ratio",
        dest="stack_aspect_ratio",
        action="store",
        type="float",
        default=defaults.stack_aspect_ratio,
        help="Ratio of stack height to width (default: %s)" %
        defaults.stack_aspect_ratio,
        metavar="POINTS")

    advanced_grp.add_option("",
                            "--box",
                            dest="show_boxes",
                            action="store",
                            type="boolean",
                            default=False,
                            metavar="YES/NO",
                            help="Draw boxes around symbols? (default: no)")

    advanced_grp.add_option(
        "",
        "--resolution",
        dest="resolution",
        action="store",
        type="float",
        default=96,
        help=
        "Bitmap resolution in dots per inch (DPI).  (Default: 96 DPI, except png_print, 600 DPI) Low resolution bitmaps (DPI<300) are antialiased.",
        metavar="DPI")

    advanced_grp.add_option(
        "",
        "--scale-width",
        dest="scale_width",
        action="store",
        type="boolean",
        default=True,
        metavar="YES/NO",
        help=
        "Scale the visible stack width by the fraction of symbols in the column?  (I.e. columns with many gaps of unknowns are narrow.)  (Default: yes)"
    )

    advanced_grp.add_option(
        "",
        "--debug",
        action="store",
        type="boolean",
        default=defaults.debug,
        metavar="YES/NO",
        help="Output additional diagnostic information. (Default: %default)")

    # ========================== Server options =========================
    server_grp.add_option(
        "",
        "--serve",
        dest="serve",
        action="store_true",
        default=False,
        help="Start a standalone WebLogo server for creating sequence logos.")

    server_grp.add_option(
        "",
        "--port",
        dest="port",
        action="store",
        type="int",
        default=8080,
        help="Listen to this local port. (Default: %default)",
        metavar="PORT")

    return parser
예제 #7
0
파일: _cli.py 프로젝트: snehamitra/NPLB
def _build_logoformat(logodata, opts):
    """ Extract and process relevant option values and return a 
    LogoFormat object."""

    args = {}
    direct_from_opts = [
        "stacks_per_line",
        "logo_title",
        "yaxis_label",
        "show_xaxis",
        "show_yaxis",
        "xaxis_label",
        "show_ends",
        "fineprint",
        "show_errorbars",
        "show_boxes",
        "yaxis_tic_interval",
        "resolution",
        "alphabet",
        "debug",
        "show_ends",
        "default_color",
        #"show_color_key",
        "color_scheme",
        "unit_name",
        "logo_label",
        "yaxis_scale",
        "first_index",
        "logo_start",
        "logo_end",
        "scale_width",
        "annotate",
        "stack_width",
        "stack_aspect_ratio",
        "reverse_stacks"
    ]

    for k in direct_from_opts:
        args[k] = opts.__dict__[k]

#    logo_size = copy.copy(opts.__dict__['logo_size'])
#    size_from_opts = ["stack_width", "stack_height"]
#    for k in size_from_opts :
#        length = getattr(opts, k)
#        if length : setattr( logo_size, k, length )
#   args["size"] = logo_size

    if opts.colors:
        color_scheme = ColorScheme()
        for color, symbols, desc in opts.colors:
            try:
                #c = Color.from_string(color)
                color_scheme.groups.append(ColorGroup(symbols, color, desc))
            except ValueError:
                raise ValueError("error: option --color: invalid value: '%s'" %
                                 color)

        args["color_scheme"] = color_scheme

    if opts.annotate:
        args["annotate"] = opts.annotate.split(',')

    logooptions = LogoOptions()
    for a, v in args.iteritems():
        setattr(logooptions, a, v)

    theformat = LogoFormat(logodata, logooptions)
    return theformat