示例#1
0
 def test_parse_prior_equiprobable(self) :            
     self.assertTrue( all(20.*equiprobable_distribution(20)  ==
         parse_prior( 'equiprobable',  unambiguous_protein_alphabet, weight=20.) ) )
                     
     self.assertTrue( 
         all( 1.2* equiprobable_distribution(3) 
         == parse_prior( ' equiprobablE  ',  Alphabet('123'), 1.2 )  ) )
示例#2
0
 def test_parse_prior_none(self):
     self.assertEqual(None,
                      parse_prior(None, unambiguous_protein_alphabet))
     self.assertEqual(None,
                      parse_prior('none', unambiguous_protein_alphabet))
     self.assertEqual(None,
                      parse_prior('noNe', None))
示例#3
0
 def test_parse_prior_none(self) :
     self.assertEquals( None, 
         parse_prior(None, unambiguous_protein_alphabet ) )
     self.assertEquals( None, 
         parse_prior( 'none', unambiguous_protein_alphabet ) )        
     self.assertEquals( None, 
         parse_prior( 'noNe', None) )                
示例#4
0
 def test_weight(self):
     self.assertTrue(
         all(2. * equiprobable_distribution(4) == parse_prior(
             'automatic', unambiguous_dna_alphabet)))
     self.assertTrue(
         all(123.123 * equiprobable_distribution(4) == parse_prior(
             'auto', unambiguous_dna_alphabet, 123.123)))
示例#5
0
 def test_auto(self):
     self.assertTrue(
         all(4. * equiprobable_distribution(4) == parse_prior(
             'auto', unambiguous_dna_alphabet)))
     self.assertTrue(
         all(4. * equiprobable_distribution(4) == parse_prior(
             'automatic', unambiguous_dna_alphabet)))
示例#6
0
    def test_parse_prior_equiprobable(self):
        self.assertTrue(all(20. * equiprobable_distribution(20) ==
                            parse_prior('equiprobable', unambiguous_protein_alphabet, weight=20.)))

        self.assertTrue(
                all(1.2 * equiprobable_distribution(3)
                    == parse_prior(' equiprobablE  ', Alphabet('123'), 1.2)))
示例#7
0
    def test_parse_prior_float(self):
        self.assertTrue(all(equiprobable_distribution(4) == parse_prior("0.5", unambiguous_dna_alphabet, 1.0)))

        self.assertTrue(all(equiprobable_distribution(4) == parse_prior(" 0.500 ", unambiguous_dna_alphabet, 1.0)))

        self.assertTrue(
            all(array((0.3, 0.2, 0.2, 0.3), float64) == parse_prior(" 0.40 ", unambiguous_dna_alphabet, 1.0))
        )
示例#8
0
    def test_parse_prior_float(self):
        self.assertTrue(all(equiprobable_distribution(4)
                            == parse_prior('0.5', unambiguous_dna_alphabet, 1.)))

        self.assertTrue(all(equiprobable_distribution(4)
                            == parse_prior(' 0.500 ', unambiguous_dna_alphabet, 1.)))

        self.assertTrue(all(array((0.3, 0.2, 0.2, 0.3), float64)
                            == parse_prior(' 0.40 ', unambiguous_dna_alphabet, 1.)))
示例#9
0
    def test_parse_prior_equiprobable(self):
        self.assertTrue(
            all(
                20.0 * equiprobable_distribution(20)
                == parse_prior("equiprobable", unambiguous_protein_alphabet, weight=20.0)
            )
        )

        self.assertTrue(all(1.2 * equiprobable_distribution(3) == parse_prior(" equiprobablE  ", Alphabet("123"), 1.2)))
示例#10
0
    def test_parse_prior_percentage(self):
        # print(parse_prior('50%', unambiguous_dna_alphabet, 1.))
        self.assertTrue(all(equiprobable_distribution(4) == parse_prior("50%", unambiguous_dna_alphabet, 1.0)))

        self.assertTrue(all(equiprobable_distribution(4) == parse_prior(" 50.0 % ", unambiguous_dna_alphabet, 1.0)))

        self.assertTrue(
            all(array((0.3, 0.2, 0.2, 0.3), float64) == parse_prior(" 40.0 % ", unambiguous_dna_alphabet, 1.0))
        )
示例#11
0
    def test_parse_prior_percentage(self):
        # print(parse_prior('50%', unambiguous_dna_alphabet, 1.))
        self.assertTrue(all(equiprobable_distribution(4)
                            == parse_prior('50%', unambiguous_dna_alphabet, 1.)))

        self.assertTrue(all(equiprobable_distribution(4)
                            == parse_prior(' 50.0 % ', unambiguous_dna_alphabet, 1.)))

        self.assertTrue(all(array((0.3, 0.2, 0.2, 0.3), float64)
                            == parse_prior(' 40.0 % ', unambiguous_dna_alphabet, 1.)))
示例#12
0
def _build_logodata(options):

    if options.input_parser != "transfac":
        seqs = read_seq_data(options.fin,
                             options.input_parser.read,
                             alphabet=options.alphabet,
                             ignore_lower_case=options.ignore_lower_case)

        if options.reverse:
            seqs = SeqList([s.reverse() for s in seqs], seqs.alphabet)

        if options.complement:
            seqs = SeqList([Seq(s, seqs.alphabet).complement() for s in seqs],
                           seqs.alphabet)

        prior = parse_prior(options.composition, seqs.alphabet, options.weight)
        data = LogoData.from_seqs(seqs, prior)

    else:
        from corebio.matrix import Motif

        if options.ignore_lower_case:
            raise ValueError(
                "error: option --ignore-lower-case incompatible with matrix input"
            )

        #FIXME : implement
        if options.reverse:
            raise ValueError(
                "error: option --reverse incompatible with matrix input")

        #FIXME : implement
        if options.complement:
            raise ValueError(
                "error: option --complement incompatible with matrix input")

        motif = Motif.read_transfac(options.fin, alphabet=options.alphabet)
        prior = parse_prior(options.composition, motif.alphabet,
                            options.weight)
        data = LogoData.from_counts(motif.alphabet, motif, prior)

    return data
示例#13
0
文件: _cli.py 项目: cran/RWebLogo
        motif = Motif.read_transfac(fin, alphabet=options.alphabet)
        motif_flag = True
    except ValueError, motif_err :
        # Failed reading Motif, try reading as multiple sequence data.
        seqs = read_seq_data(fin, 
            options.input_parser.read,
            alphabet=options.alphabet,
            ignore_lower_case = options.ignore_lower_case)   

    if motif_flag :
        if options.ignore_lower_case:
            raise ValueError("error: option --ignore-lower-case incompatible with matrix input")
        if options.reverse: motif.reverse()
        if options.complement: motif.complement()

        prior = parse_prior( options.composition,motif.alphabet, options.weight)
        data = LogoData.from_counts(motif.alphabet, motif, prior)
    else :
        if options.reverse: 
            seqs = SeqList([s.reverse() for s in seqs], seqs.alphabet)
        
        if options.complement :
            seqs= SeqList( [Seq(s,seqs.alphabet).complement() for s in seqs], seqs.alphabet)

        prior = parse_prior( options.composition,seqs.alphabet, options.weight)
        data = LogoData.from_seqs(seqs, prior)




    return data
示例#14
0
文件: _cgi.py 项目: snehamitra/NPLB
    # we can't send any useful feedback
    logo = StringIO()
    try:
        comp = form["composition"].get_value()
        percentCG = form["percentCG"].get_value()
        ignore_lower_case = form_values.has_key("ignore_lower_case")
        if comp == 'percentCG': comp = str(percentCG / 100)

        from corebio.matrix import Motif

        try:
            # Try reading data in transfac format first.
            # TODO Refactor this code
            motif = Motif.read_transfac(StringIO(sequences),
                                        alphabet=logooptions.alphabet)
            prior = weblogolib.parse_prior(comp, motif.alphabet)
            data = weblogolib.LogoData.from_counts(motif.alphabet, motif,
                                                   prior)
        except ValueError, motif_err:
            seqs = weblogolib.read_seq_data(
                StringIO(sequences),
                alphabet=logooptions.alphabet,
                ignore_lower_case=ignore_lower_case)
            prior = weblogolib.parse_prior(comp, seqs.alphabet)
            data = weblogolib.LogoData.from_seqs(seqs, prior)

        logoformat = weblogolib.LogoFormat(data, logooptions)
        format = form["format"].value
        weblogolib.formatters[format](data, logoformat, logo)
    except ValueError, err:
        errors.append(err.args)
示例#15
0
文件: _cli.py 项目: jnktsj/heatlogo
            isCodon = True
            options.alphabet = None
        seqs = read_seq_data(
            fin, options.input_parser.read, alphabet=options.alphabet, ignore_lower_case=options.ignore_lower_case
        )

    if motif_flag:
        if options.ignore_lower_case:
            raise ValueError("option --ignore-lower-case incompatible with matrix input")
        if options.reverse:
            motif.reverse()
        if options.complement:
            motif.complement()

        if not isCodon:
            prior, compos = parse_prior(fin_compos, motif.alphabet, fin_weight)
            data = LogoData.from_counts(motif.alphabet, motif, options.stats_func, prior, compos, second_data)
        else:
            raise ValueError("option --sequence-type 'codon' incompatible with matrix input")
    else:
        if options.codon_frame < 0 and isCodon:
            options.reverse = True
            options.complement = True

        if options.reverse:
            seqs = SeqList([s.reverse() for s in seqs], seqs.alphabet)

        if options.complement:
            seqs = SeqList([Seq(s, seqs.alphabet).complement() for s in seqs], seqs.alphabet)

        if isCodon:
示例#16
0
 def test_explicit(self):
     s = "{'A':10, 'C':40, 'G':40, 'T':10}"
     p = array((10, 40, 40, 10), float64) * 2. / 100.
     self.assertTrue(all(p == parse_prior(s, unambiguous_dna_alphabet)))
示例#17
0
def LogoPlot(sites, datatype, data, plotfile, nperline, numberevery=10, allowunsorted=False, ydatamax=1.01, overlay=None):
    """Constructs a sequence logo showing amino-acid or nucleotide preferences.

    The heights of each letter is equal to the preference of
    that site for that amino acid or nucleotide.

    Note that stop codons may or may not be included in the logo
    depending on whether they are present in *pi_d*.  

    CALLING VARIABLES:

    * *sites* is a list of all of the sites that are being included
      in the logo, as strings or numbers. They must be in natural sort order (as
      is done by *dms_tools.utils.NaturalSort*) or an error will
      be raised **unless** *allowunsorted* is *True*. The sites
      in the plot are ordered in the same arrangement
      listed in *sites*. These should be **strings**, not integers.

    * *datatype* should be one of the two following strings depending on whether
      we are making a plot of preferences or differential preferences: 
      'prefs' or 'diffprefs'

    * *data* is a dictionary that has a key for every entry in
      *sites*. For every site *r* in *sites*, *sites[r][x]*
      is the preference or differential preference for character *x*. 
      Preferences must sum to one; differential preferences to zero.
      All sites must have the same set of characters. The characters
      must be the set of nucleotides (*dms_tools.nts*)
      or the set of amino acids with or without stop codons
      (*dms_tools.aminoacids_nostop* or *dms_tools.aminoacids_withstop*).

    * *plotfile* is a string giving the name of the created PDF file 
      of the logo plot.
      It must end in the extension ``.pdf``.

    * *nperline* is the number of sites per line. Often 40 to 80 are good values.

    * *numberevery* is specifies how frequently we put labels for the sites on
      x-axis.

    * *allowunsorted* : if *True* then we allow the entries in *sites* to 
      **not** be sorted. This means that the logo plot will **not** have
      sites in sorted order.

    * *ydatamax* : meaningful only if *datatype* is 'diffprefs'. In this case, it gives
      the maximum that the logo stacks extend in the positive and negative directions.
      Cannot be smaller than the maximum extent of the differential preferences.

    * *overlay* : this argument allows you to make overlay bars that indicated
      other properties for the sites. By default, this option is *None*, meaning that
      no overlay is created. If you set it to something else, it must be a list
      giving either one or two properties. Each property is a tuple:
      *(prop_d, shortname, longname)* where:

        - *prop_d* is a dictionary keyed by site numbers that are in *sites*.
          For each *r* in *sites*, *prop_d[r]* gives the value of the property,
          or if there is no entry in *prop_d* for *r*, then the property
          is undefined and is colored white. Properties can either be:

            * continuous: in this case, all of the values should be numbers.

            * discrete : in this case, all of the values should be strings.
              While in practice, if you have more than a few discrete
              categories (different strings), the plot will be a mess.

        - *shortname* : short name for the property; will not format well
          if more than 4 or 5 characters.

        - *longname* : longer name for property used on axes label. Can be the
          same as *shortname* if you don't need a different long name.

    """
    assert datatype in ['prefs', 'diffprefs']

    # check data, and get characters
    assert sites, "No sites specified"
    assert set(sites) == set(data.keys()), "There is not a complete match between sites and the keys of data"
    characters = data[sites[0]].keys()
    if set(characters) == set(dms_tools.nts):
        alphabet_type = 'nt'
    elif set(characters) == set(dms_tools.aminoacids_nostop) or set(characters) == set(dms_tools.aminoacids_withstop):
        alphabet_type = 'aa'
    else:
        raise ValueError("Invalid set of character keys in data. Do not specify either nucleotides or amino acids:\n%s" % str(characters))
    for r in sites:
        if set(data[r].keys()) != set(characters):
            raise ValueError("Not all sites in data have the same set of characters")

    firstblankchar = 'B' # character for first blank space for diffprefs
    assert firstblankchar not in characters, "firstblankchar in characters"
    lastblankchar = 'b' # character for last blank space for diffprefs
    assert lastblankchar not in characters, "lastblankchar in characters"
    separatorchar = '-' # separates positive and negative for diffprefs
    assert lastblankchar not in characters, "lastblankchar in characters"
    separatorheight = 0.02 # height of separator as fraction of total for diffprefs

    if os.path.splitext(plotfile)[1].lower() != '.pdf':
        raise ValueError("plotfile must end in .pdf: %s" % plotfile)
    if os.path.isfile(plotfile):
        os.remove(plotfile) # remove existing plot

    if not allowunsorted:
        sorted_sites = [r for r in sites]
        dms_tools.utils.NaturalSort(sorted_sites)
        if sorted_sites != sites:
            raise ValueError("sites is not properly sorted")

    # Following are specifications of weblogo sizing taken from its documentation
    stackwidth = 9.5 # stack width in points, not default size of 10.8, but set to this in weblogo call below
    barheight = 5.5 # height of bars in points if using overlay
    barspacing = 2.0 # spacing between bars in points if using overlay
    stackaspectratio = 4.4 # ratio of stack height:width, doesn't count part going over maximum value of 1
    if overlay:
        if not (1 <= len(overlay) <= 2):
            raise ValueError("overlay must be a list of one or two entries; instead it had %d entries" % len(overlay))
        ymax = (stackaspectratio * stackwidth + len(overlay) * (barspacing + barheight)) / float(stackaspectratio * stackwidth)
        aspectratio = ymax * stackaspectratio # effective aspect ratio for full range
    else:
        ymax = 1.0 
        aspectratio = stackaspectratio
    rmargin = 11.5 # right margin in points, fixed by weblogo
    stackheightmargin = 16 # margin between stacks in points, fixed by weblogo

    try:
        # write data into transfacfile (a temporary file)
        (fd, transfacfile) = tempfile.mkstemp()
        f = os.fdopen(fd, 'w')
        ordered_alphabets = {} # keyed by site index (0, 1, ...) with values ordered lists for characters from bottom to top
        if datatype == 'prefs':
            chars_for_string = characters
            f.write('ID ID\nBF BF\nP0 %s\n' % ' '.join(chars_for_string))
            for (isite, r) in enumerate(sites):
                f.write('%s %s\n' % (r, ' '.join([str(data[r][x]) for x in characters])))
                pi_r = [(data[r][x], x) for x in characters]
                pi_r.sort()
                ordered_alphabets[isite] = [tup[1] for tup in pi_r] # order from smallest to biggest
        elif datatype == 'diffprefs':
            chars_for_string = characters + [firstblankchar, lastblankchar, separatorchar]
            ydatamax *= 2.0 # maximum possible range of data, multiply by two for range
            f.write('ID ID\nBF BF\nP0 %s\n' % ' '.join(chars_for_string))
            for (isite, r) in enumerate(sites):
                positivesum = sum([data[r][x] for x in characters if data[r][x] > 0]) + separatorheight / 2.0
                negativesum = sum([data[r][x] for x in characters if data[r][x] < 0]) - separatorheight / 2.0
                if abs(positivesum + negativesum) > 1.0e-3:
                    raise ValueError("Differential preferences sum of %s is not close to zero for site %s" % (positivesum + negativesum, r))
                if 2.0 * positivesum > ydatamax:
                    raise ValueError("You need to increase ydatamax: the total differential preferences sum to more than the y-axis limits. Right now, ydatamax is %.3f while the total differential preferences are %.3f" % (ydatamax, 2.0 * positivesum))
                f.write('%s' % r)
                deltapi_r = []
                for x in characters:
                    deltapi_r.append((data[r][x], x))
                    f.write(' %s' % (abs(data[r][x]) / float(ydatamax)))
                deltapi_r.sort()
                firstpositiveindex = 0
                while deltapi_r[firstpositiveindex][0] < 0:
                    firstpositiveindex += 1
                ordered_alphabets[isite] = [firstblankchar] + [tup[1] for tup in deltapi_r[ : firstpositiveindex]] + [separatorchar] + [tup[1] for tup in deltapi_r[firstpositiveindex : ]] + [lastblankchar] # order from most negative to most positive with blank characters and separators
                f.write(' %g %g %g\n' % (0.5 * (ydatamax + 2.0 * negativesum) / ydatamax, 0.5 * (ydatamax + 2.0 * negativesum) / ydatamax, separatorheight)) # heights for blank charactors and separators
        else:
            raise ValueError("Invalid datatype of %s" % datatype)
        f.close()

        # create web logo
        charstring = ''.join(chars_for_string)
        assert len(charstring) == len(chars_for_string), "Length of charstring doesn't match length of chars_for_string. Do you have unallowable multi-letter characters?\n%s" % (str(chars_for_string))
        logoprior = weblogolib.parse_prior('equiprobable', charstring, 0)
        motif = _my_Motif.read_transfac(open(transfacfile), charstring)
        logodata = weblogolib.LogoData.from_counts(motif.alphabet, motif, logoprior)
        logo_options = weblogolib.LogoOptions()
        logo_options.fineprint = None
        logo_options.stacks_per_line = nperline
        logo_options.stack_aspect_ratio = aspectratio
        logo_options.stack_width = stackwidth
        logo_options.unit_name = 'probability'
        logo_options.show_yaxis = False
        logo_options.yaxis_scale = ymax 
        if alphabet_type == 'aa':
            (cmap, colormapping, mapper) = KyteDoolittleColorMapping()
        elif alphabet_type == 'nt':
            colormapping = {}
            colormapping['A'] = '#008000'
            colormapping['T'] = '#FF0000'
            colormapping['C'] = '#0000FF'
            colormapping['G'] = '#FFA500'
        else:
            raise ValueError("Invalid alphabet_type %s" % alphabet_type)
        colormapping[firstblankchar] = colormapping[lastblankchar] = '#000000' # black, but color doesn't matter as modified weblogo code replaces with empty space
        colormapping[separatorchar] = '#000000' # black
        color_scheme = weblogolib.colorscheme.ColorScheme()
        for x in chars_for_string:
            color_scheme.groups.append(weblogolib.colorscheme.ColorGroup(x, colormapping[x], "'%s'" % x))
        logo_options.color_scheme = color_scheme
        logo_options.annotate = [{True:r, False:''}[0 == isite % numberevery] for (isite, r) in enumerate(sites)]
        logoformat = weblogolib.LogoFormat(logodata, logo_options)
        # _my_pdf_formatter is modified from weblogo version 3.4 source code
        # to allow custom ordering of the symbols.
        pdf = _my_pdf_formatter(logodata, logoformat, ordered_alphabets) 
        open(plotfile, 'w').write(pdf)
        assert os.path.isfile(plotfile), "Failed to find expected plotfile %s" % plotfile
    finally:
        # close if still open
        try:
            f.close()
        except:
            pass
        # remove temporary file
        if os.path.isfile(transfacfile):
            os.remove(transfacfile)

    # now build the overlay
    if overlay:
        try:
            (fdoverlay, overlayfile) = tempfile.mkstemp(suffix='.pdf')
            (fdmerged, mergedfile) = tempfile.mkstemp(suffix='.pdf')
            foverlay = os.fdopen(fdoverlay, 'wb')
            foverlay.close() # close, but we still have the path overlayfile...
            fmerged = os.fdopen(fdmerged, 'wb')
            LogoOverlay(sites, overlayfile, overlay, nperline, sitewidth=stackwidth, rmargin=rmargin, logoheight=stackwidth * stackaspectratio + stackheightmargin, barheight=barheight, barspacing=barspacing)
            plotfile_f = open(plotfile, 'rb')
            plot = PyPDF2.PdfFileReader(plotfile_f).getPage(0)
            overlayfile_f = open(overlayfile, 'rb')
            overlay = PyPDF2.PdfFileReader(overlayfile_f).getPage(0)
            xshift = overlay.artBox[2] - plot.artBox[2]
            overlay.mergeTranslatedPage(plot, xshift, 0)
            overlay.compressContentStreams() 
            output = PyPDF2.PdfFileWriter()
            output.addPage(overlay)
            output.write(fmerged)
            fmerged.close()
            shutil.move(mergedfile, plotfile)
        finally:
            try:
                plotfile_f.close()
            except:
                pass
            try:
                overlayfile_f.close()
            except:
                pass
            try:
                foverlay.close()
            except:
                pass
            try:
                fmerged.close()
            except:
                pass
            for fname in [overlayfile, mergedfile]:
                if os.path.isfile(fname):
                    os.remove(fname)
示例#18
0
def main(htdocs_directory=None):

    logooptions = weblogolib.LogoOptions()

    # A list of form fields.
    # The default for checkbox values must be False (irrespective of
    # the default in logooptions) since a checked checkbox returns 'true'
    # but an unchecked checkbox returns nothing.
    controls = [
        Field("sequences", ""),
        Field(
            "format",
            "png",
            weblogolib.formatters.get,
            options=[
                "png_print",
                "png",
                "jpeg",
                "eps",
                "pdf",
                "svg",
                "logodata",
            ],  # TODO: Should copy list from __init__.formatters
            errmsg="Unknown format option.",
        ),
        Field("stacks_per_line", logooptions.stacks_per_line, int, errmsg="Invalid number of stacks per line."),
        Field(
            "stack_width",
            "medium",
            weblogolib.std_sizes.get,
            options=["small", "medium", "large"],
            errmsg="Invalid logo size.",
        ),
        Field(
            "alphabet",
            "alphabet_auto",
            alphabets.get,
            options=["alphabet_auto", "alphabet_protein", "alphabet_dna", "alphabet_rna"],
            errmsg="Unknown sequence type.",
        ),
        Field("unit_name", "bits", options=["probability", "bits", "nats", "kT", "kJ/mol", "kcal/mol"]),
        Field("first_index", 1, int_or_none),
        Field("logo_start", "", int_or_none),
        Field("logo_end", "", int_or_none),
        Field(
            "composition",
            "comp_auto",
            composition.get,
            options=[
                "comp_none",
                "comp_auto",
                "comp_equiprobable",
                "comp_CG",
                "comp_Celegans",
                "comp_Dmelanogaster",
                "comp_Ecoli",
                "comp_Hsapiens",
                "comp_Mmusculus",
                "comp_Scerevisiae",
            ],
            errmsg="Illegal sequence composition.",
        ),
        Field("percentCG", "", float_or_none, errmsg="Invalid CG percentage."),
        Field("show_errorbars", False, truth),
        Field("logo_title", logooptions.logo_title),
        Field("logo_label", logooptions.logo_label),
        Field("show_xaxis", False, truth),
        Field("xaxis_label", logooptions.xaxis_label),
        Field("show_yaxis", False, truth),
        Field("yaxis_label", logooptions.yaxis_label, string_or_none),
        Field(
            "yaxis_scale", logooptions.yaxis_scale, float_or_none, errmsg="The yaxis scale must be a positive number."
        ),
        Field("yaxis_tic_interval", logooptions.yaxis_tic_interval, float_or_none),
        Field("show_ends", False, truth),
        Field("show_fineprint", False, truth),
        Field(
            "color_scheme", "color_auto", color_schemes.get, options=color_schemes.keys(), errmsg="Unknown color scheme"
        ),
        Field("color0", ""),
        Field("symbols0", ""),
        Field("desc0", ""),
        Field("color1", ""),
        Field("symbols1", ""),
        Field("desc1", ""),
        Field("color2", ""),
        Field("symbols2", ""),
        Field("desc2", ""),
        Field("color3", ""),
        Field("symbols3", ""),
        Field("desc3", ""),
        Field("color4", ""),
        Field("symbols4", ""),
        Field("desc4", ""),
        Field("ignore_lower_case", False, truth),
        Field("scale_width", False, truth),
    ]

    form = {}
    for c in controls:
        form[c.name] = c

    form_values = cgilib.FieldStorage()

    # Send default form?
    if len(form_values) == 0 or "cmd_reset" in form_values:
        # Load default truth values now.
        form["show_errorbars"].value = logooptions.show_errorbars
        form["show_xaxis"].value = logooptions.show_xaxis
        form["show_yaxis"].value = logooptions.show_yaxis
        form["show_ends"].value = logooptions.show_ends
        form["show_fineprint"].value = logooptions.show_fineprint
        form["scale_width"].value = logooptions.scale_width

        send_form(controls, htdocs_directory=htdocs_directory)
        return

    # Get form content
    for c in controls:
        c.value = form_values.getfirst(c.name, c.default)

    options_from_form = [
        "format",
        "stacks_per_line",
        "stack_width",
        "alphabet",
        "unit_name",
        "first_index",
        "logo_start",
        "logo_end",
        "composition",
        "show_errorbars",
        "logo_title",
        "logo_label",
        "show_xaxis",
        "xaxis_label",
        "show_yaxis",
        "yaxis_label",
        "yaxis_scale",
        "yaxis_tic_interval",
        "show_ends",
        "show_fineprint",
        "scale_width",
    ]

    errors = []
    for optname in options_from_form:
        try:
            value = form[optname].get_value()
            if value != None:
                setattr(logooptions, optname, value)
        except ValueError as err:
            errors.append(err.args)

    # Construct custom color scheme
    custom = ColorScheme()
    for i in range(0, 5):
        color = form["color%d" % i].get_value()
        symbols = form["symbols%d" % i].get_value()
        desc = form["desc%d" % i].get_value()

        if color:
            try:
                custom.groups.append(weblogolib.ColorGroup(symbols, color, desc))
            except ValueError as e:
                errors.append(("color%d" % i, "Invalid color: %s" % color))

    if form["color_scheme"].value == "color_custom":
        logooptions.color_scheme = custom
    else:
        try:
            logooptions.color_scheme = form["color_scheme"].get_value()
        except ValueError as err:
            errors.append(err.args)

    sequences = None

    # FIXME: Ugly fix: Must check that sequence_file key exists
    # FIXME: Sending malformed or missing form keys should not cause a crash
    # sequences_file = form["sequences_file"]
    if "sequences_file" in form_values:
        sequences = form_values.getvalue("sequences_file")
        # assert type(sequences) == str

    if not sequences or len(sequences) == 0:
        sequences = form["sequences"].get_value()
        # If a user tries to paste a very large file into sequence textarea,
        # then WebLogo runs very slow for no apparently good reason. (Might be client side bug?)
        # So we limit the maximum sequence size.
        # Form field also limits size, but not necessarly respected. Also can truncate data
        # without warning, so we'll set textarea maximum to be larger than MAX_SEQUENCE_SIZE
        SEQUENCES_MAXLENGTH = 100000
        if len(sequences) > SEQUENCES_MAXLENGTH:
            errors.append(("sequences", "Sequence data too large for text input. Use file upload instead."))
            controls[0] = Field("sequences", "")

    if not sequences or len(sequences) == 0:
        errors.append(
            ("sequences", "Please enter a multiple-sequence alignment in the box above, or select a file to upload.")
        )

    # If we have uncovered errors or we want the chance to edit the logo
    # ("cmd_edit" command from examples page) then we return the form now.
    # We do not proceed to the time consuming logo creation step unless
    # required by a 'create' or 'validate' command, and no errors have been
    # found yet.
    if errors or "cmd_edit" in form_values:
        send_form(controls, errors, htdocs_directory)
        return

    try:
        comp = form["composition"].get_value()
        percentCG = form["percentCG"].get_value()
        ignore_lower_case = "ignore_lower_case" in form_values
        if comp == "percentCG":
            comp = str(percentCG / 100)

        from corebio.matrix import Motif

        try:
            # Try reading data in transfac format first.
            # TODO Refactor this code
            motif = Motif.read_transfac(StringIO(sequences), alphabet=logooptions.alphabet)
            prior = weblogolib.parse_prior(comp, motif.alphabet)
            data = weblogolib.LogoData.from_counts(motif.alphabet, motif, prior)
        except ValueError as motif_err:
            seqs = weblogolib.read_seq_data(
                StringIO(sequences), alphabet=logooptions.alphabet, ignore_lower_case=ignore_lower_case
            )
            prior = weblogolib.parse_prior(comp, seqs.alphabet)
            data = weblogolib.LogoData.from_seqs(seqs, prior)

        logoformat = weblogolib.LogoFormat(data, logooptions)
        format = form["format"].value
        logo = weblogolib.formatters[format](data, logoformat)
    except ValueError as err:
        errors.append(err.args)
    except IOError as err:
        errors.append(err.args)
    except RuntimeError as err:
        errors.append(err.args)

    if errors or "cmd_validate" in form_values:
        send_form(controls, errors, htdocs_directory)
        return

    #
    #  RETURN LOGO OVER HTTP
    #

    print("Content-Type:", mime_type[format])
    # Content-Disposition: inline       Open logo in browser window
    # Content-Disposition: attachment   Download logo
    if "download" in form_values:
        print("Content-Disposition: attachment; " 'filename="logo.%s"' % extension[format])
    else:
        print("Content-Disposition: inline; " 'filename="logo.%s"' % extension[format])
    # Separate header from data
    print()
    # Finally, and at last, send the logo.

    if sys.version_info[0] >= 3:
        sys.stdout.buffer.write(logo)
    else:
        sys.stdout.write(logo)
示例#19
0
def DifferentialPreferencesLogo(sites, dpi_d, plotfile, nperline, overlay, sitenumbermapping=None, numberevery=10, ydatamax=1.0):
    """Creates a logo plot of differential amino-acid preferences.

    This plot shows the differential amino-acid preferences, which can potentially
    total up to 1.0 in each direction. For each stack, there is a center black line,
    and positive preferences are shown above that line while negative preferences
    are shown below it.

    All calling arguments have the same meaning as for the function
    *EquilibriumFreqsLogo* with the following two exceptions:

    *dpi_d* replaces the *pi_d* argument used for *EquilibriumFreqsLogo*.
    *dpi_d* is a dictionary keyed by every integer in *sites*.
    *sites*. The value of *dpi_d[isite]* is itself a dictionary,
    which has keys 'dPI_A', 'dPI_C', 'dPI_D', etc for all 20
    one-letter upper-case amino acid codes. The values
    for these keys are the differential preference of that
    amino acid at that site. So *dpi_d[isite]['dPI_M']* is
    the differential preference for methionine at site *isite*.
    *dpi_d* is allowed to either contain or not contain stop codons.
    If it contains stop codons, then there should be a key
    'dPI_*' giving the preference for a stop codon for each
    dictionary *dpi_d[isite]*. However, we only check that there
    are actually stop codons by looking to see if there
    is a key 'dPI_*' in *dpi_d[sites[0]]* -- if there is not,
    then we don't look for stop codons at any other sites either.
    Note that even though stop codons are denoted by an asterisk
    in *dpi_d*, they are plotted using an *X* character in the 
    sequence logo.

    *ydatamax* is the maximum that the logo stacks extend in the positive
    and negative directions. Is 1.0 by default.
    """
    stopchar = 'X' # character for stop codon in logo plot
    firstblankchar = 'B' # character for first blank space
    lastblankchar = 'b' # character for last blank space
    separatorchar = '-' # separates positive and negative 
    separatorheight = 0.02 # height of separator as fraction of total
    if not WebLogoAvailable():
        raise ValueError("Cannot run weblogo")
    if overlay and not PyPdfAvailable():
        raise ValueError("Cannot use overlay as pyPdf is not available.")
    if overlay and not mapmuts.plot.PylabAvailable():
        raise ValueError("Cannot use overlay as pylab is not available.")
    if overlay:
        if not (len(overlay) == 2 and isinstance(overlay[0], dict) and isinstance(overlay[1], dict)):
            raise ValueError("overlay is not a list of two dictionaries.")
    if sites != [i for i in range(sites[0], sites[-1] + 1)]:
        raise ValueError("sites does not specify consecutive numbers")
    if os.path.splitext(plotfile)[1] != '.pdf':
        raise ValueError("plotfile must end in .pdf: %s" % plotfile)
    if os.path.isfile(plotfile):
        os.remove(plotfile) # remove existing plot
    #
    # Following are specifications of weblogo sizing taken from its documentation
    # or specified when weblogo is called
    stackwidth = 9.5 # stack width in points, not default size of 10.8, but set to this in weblogo call below
    barheight = 5.5 # height of bars in points if using overlay
    barspacing = 2.0 # spacing between bars in points if using overlay
    stackaspectratio = 4.4 # ratio of stack height:width, doesn't count part going over maximum value of 1
    if overlay:
        ymax = (stackaspectratio * stackwidth + len(overlay) * (barspacing + barheight)) / float(stackaspectratio * stackwidth)
        aspectratio = ymax * stackaspectratio # effective aspect ratio for full range
    else:
        ymax = 1.0 
        aspectratio = stackaspectratio
    rmargin = 11.5 # right margin in points, fixed by weblogo
    stackheightmargin = 16 # margin between stacks in points, fixed by weblogo
    # End specifications of weblogo sizing taken from its documentation
    #
    assert sites, "No sites specified"
    if 'dPI_*' in dpi_d[sites[0]]:
        includestop = True
    else:
        includestop = False
    aas = mapmuts.sequtils.AminoAcids(includestop=includestop)
    if includestop:
        aas_for_string = aas[ : -1] + [stopchar]
    else:
        aas_for_string = aas
    aas_for_string = [aa for aa in aas_for_string] + [firstblankchar, lastblankchar, separatorchar]
    ydatamax *= 2.0 # maximum possible range of data, multiply by two for range
    try:
        # write data into transfacfile (a temporary file)
        transfacfile = tempfile.mkstemp()[1]
        f = open(transfacfile, 'w')
        f.write('ID ID\nBF BF\nP0 %s\n' % ' '.join(aas_for_string))
        ordered_alphabets = {} # keyed by site (consecutive 0-index) with values ordered lists of aas from bottom to top
        isite = 0
        for site in sites:
            positivesum = sum([dpi_d[site]['dPI_%s' % aa] for aa in aas if dpi_d[site]['dPI_%s' % aa] > 0]) + separatorheight / 2.0
            negativesum = sum([dpi_d[site]['dPI_%s' % aa] for aa in aas if dpi_d[site]['dPI_%s' % aa] < 0]) - separatorheight / 2.0
            if abs(positivesum + negativesum) > 1.0e-6:
                raise ValueError("Differential preference sums of %g and %g not close to zero for site %d" % (positivesum, negativesum, site))
            f.write('%d' % site)
            dpi_aa = []
            for aa in aas:
                y = dpi_d[site]['dPI_%s' % aa]
                dpi_aa.append((y, aa))
                f.write(' %g' % (abs(y) / float(ydatamax)))
            dpi_aa.sort()
            firstpositiveindex = 0
            while dpi_aa[firstpositiveindex][0] < 0:
                firstpositiveindex += 1
            ordered_alphabets[isite] = [firstblankchar] + [tup[1] for tup in dpi_aa[ : firstpositiveindex]] + [separatorchar] + [tup[1] for tup in dpi_aa[firstpositiveindex : ]] + [lastblankchar]
            isite += 1
            if 2.0 * positivesum > ydatamax:
                raise ValueError("You need to increase ymax: the total differential preferences sum to more than the y-axis limits")
            f.write(' %g %g %g\n' % (0.5 * (ydatamax + 2.0 * negativesum) / ydatamax, 0.5 * (ydatamax + 2.0 * negativesum) / ydatamax, separatorheight))
        f.close()
        # create web logo
        aastring = ''.join(aas_for_string)
        logoprior = weblogolib.parse_prior('equiprobable', aastring, 0)
        motif = _my_Motif.read_transfac(open(transfacfile), aastring)
        logodata = weblogolib.LogoData.from_counts(motif.alphabet, motif, logoprior)
        logo_options = weblogolib.LogoOptions()
        logo_options.fineprint = None
        logo_options.stacks_per_line = nperline
        logo_options.stack_aspect_ratio = aspectratio
        logo_options.stack_width = stackwidth
        logo_options.unit_name = 'probability'
        logo_options.show_yaxis = False
        logo_options.yaxis_scale = ymax 
        logo_options.first_index = sites[0]
        (cmap, colormapping, mapper) = mapmuts.plot.KyteDoolittleColorMapping()
        colormapping[firstblankchar] = colormapping[lastblankchar] = '#FFFFFF' # white
        colormapping[separatorchar] = '#000000' # black
        color_scheme = weblogolib.colorscheme.ColorScheme()
        for (aa, aaforstring) in zip(aas + [firstblankchar, lastblankchar, separatorchar], aas_for_string):
            color_scheme.groups.append(weblogolib.colorscheme.ColorGroup(aaforstring, colormapping[aa], "'%s'" % aaforstring))
        logo_options.color_scheme = color_scheme
        # add site number mapping
        if sitenumbermapping:
            annotate = []
            isite = 0
            for site in sites:
                if isite % numberevery == 0:
                    annotate.append(sitenumbermapping[site].strip())
                else:
                    annotate.append('')
                isite += 1
            logo_options.annotate = annotate
        logoformat = weblogolib.LogoFormat(logodata, logo_options)
        # _my_pdf_formatter is modified from weblogo version 3.4 source code
        # to allow custom ordering of the symbols.
        pdf = _my_pdf_formatter(logodata, logoformat, ordered_alphabets) 
        open(plotfile, 'w').write(pdf)
    finally:
        # remove temporary file
        if os.path.isfile(transfacfile):
            os.remove(transfacfile)
    # now build the overlay
    if overlay:
        # make the overlay plot
        overlayfile = '_overlay_tempfile.pdf'
        mergedfile = '_merged_tempfile.pdf'
        mapmuts.plot.LogoOverlay(sites, overlayfile, overlay[0], overlay[1], nperline, sitewidth=stackwidth, rmargin=rmargin, logoheight=stackwidth * stackaspectratio + stackheightmargin, barheight=barheight, barspacing=barspacing)
        # overlay onto plotfile using pyPdf
        plot = pyPdf.PdfFileReader(open(plotfile, 'rb')).getPage(0)
        overlay = pyPdf.PdfFileReader(open(overlayfile, 'rb')).getPage(0)
        xshift = overlay.artBox[2] - plot.artBox[2]
        overlay.mergeTranslatedPage(plot, xshift, 0)
        output = pyPdf.PdfFileWriter()
        output.addPage(overlay)
        outputstream = open(mergedfile, 'wb')
        output.write(outputstream)
        outputstream.close()
        os.rename(mergedfile, plotfile)
        os.remove(overlayfile)
示例#20
0
 def test_auto(self) :
     self.assertTrue( all(2.*equiprobable_distribution(4)  ==
         parse_prior( 'auto',  unambiguous_dna_alphabet ) ) )
     self.assertTrue( all(2.*equiprobable_distribution(4)  ==
         parse_prior( 'automatic',  unambiguous_dna_alphabet ) ) )
示例#21
0
 def test_weight(self):
     self.assertTrue(all(2.0 * equiprobable_distribution(4) == parse_prior("automatic", unambiguous_dna_alphabet)))
     self.assertTrue(
         all(123.123 * equiprobable_distribution(4) == parse_prior("auto", unambiguous_dna_alphabet, 123.123))
     )
示例#22
0
 def test_explicit(self):
     s = "{'A':10, 'C':40, 'G':40, 'T':10}"
     p = array((10, 40, 40, 10), float64) * 2.0 / 100.0
     self.assertTrue(all(p == parse_prior(s, unambiguous_dna_alphabet)))
示例#23
0
def main(htdocs_directory = None) :
 
    logooptions = weblogolib.LogoOptions() 
      
    # A list of form fields.
    # The default for checkbox values must be False (irrespective of
    # the default in logooptions) since a checked checkbox returns 'true'
    # but an unchecked checkbox returns nothing.
    controls = [
        Field( 'sequences', ''),
        Field( 'format', 'png', weblogolib.formatters.get ,
            options=['png_print', 'png', 'jpeg', 'eps', 'pdf', 'svg', 'logodata'] , #TODO: Should copy list from __init__.formatters
            errmsg="Unknown format option."),
        Field( 'stacks_per_line', logooptions.stacks_per_line , int, 
            errmsg='Invalid number of stacks per line.'),
        Field( 'stack_width','medium', weblogolib.std_sizes.get,
            options=['small', 'medium', 'large'], errmsg='Invalid logo size.'),
        Field( 'alphabet','alphabet_auto', alphabets.get,
            options=['alphabet_auto', 'alphabet_protein', 'alphabet_dna', 
                        'alphabet_rna'],
            errmsg="Unknown sequence type."),
        Field( 'unit_name', 'bits', 
            options=[ 'probability', 'bits', 'nats', 'kT', 'kJ/mol', 
                        'kcal/mol']),
        Field( 'first_index', 1, int_or_none),
        Field( 'logo_start', '', int_or_none),
        Field( 'logo_end', '', int_or_none),
        Field( 'composition', 'comp_auto', composition.get,
            options=['comp_none','comp_auto','comp_equiprobable','comp_CG',
            'comp_Celegans','comp_Dmelanogaster','comp_Ecoli',
            'comp_Hsapiens','comp_Mmusculus','comp_Scerevisiae'], 
            errmsg= "Illegal sequence composition."),
        Field( 'percentCG', '', float_or_none, errmsg="Invalid CG percentage."),
        Field( 'show_errorbars', False , truth),
        Field( 'logo_title', logooptions.logo_title ),
        Field( 'logo_label', logooptions.logo_label ),
        Field( 'show_xaxis', False, truth),
        Field( 'xaxis_label', logooptions.xaxis_label ),
        Field( 'show_yaxis', False, truth),  
        Field( 'yaxis_label', logooptions.yaxis_label, string_or_none ),
        Field( 'yaxis_scale', logooptions.yaxis_scale , float_or_none,
            errmsg="The yaxis scale must be a positive number." ),
        Field( 'yaxis_tic_interval', logooptions.yaxis_tic_interval , 
                float_or_none),
        Field( 'show_ends', False, truth), 
        Field( 'show_fineprint', False , truth), 
        Field( 'color_scheme', 'color_auto', color_schemes.get,
            options=color_schemes.keys() ,
            errmsg = 'Unknown color scheme'),
        Field( 'color0', ''),
        Field( 'symbols0', ''),
        Field( 'desc0', ''),
        Field( 'color1', ''),
        Field( 'symbols1', ''),
        Field( 'desc1', ''),
        Field( 'color2', ''),
        Field( 'symbols2', ''),
        Field( 'desc2', ''),
        Field( 'color3', ''),
        Field( 'symbols3', ''),
        Field( 'desc3', ''),
        Field( 'color4', ''),
        Field( 'symbols4', ''),
        Field( 'desc4', ''),
        Field( 'ignore_lower_case', False, truth), 
        Field( 'scale_width', False, truth), 
        ]
    
    form = {}
    for c in controls :
        form[c.name] = c


    form_values = cgilib.FieldStorage()
    
    # Send default form?
    if len(form_values) == 0 or "cmd_reset" in form_values:
        # Load default truth values now.
        form['show_errorbars'].value = logooptions.show_errorbars
        form['show_xaxis'].value = logooptions.show_xaxis
        form['show_yaxis'].value = logooptions.show_yaxis
        form['show_ends'].value = logooptions.show_ends
        form['show_fineprint'].value = logooptions.show_fineprint
        form['scale_width'].value = logooptions.scale_width
        
        send_form(controls, htdocs_directory = htdocs_directory) 
        return
    
    # Get form content
    for c in controls :
        c.value = form_values.getfirst( c.name, c.default) 
       
       
    options_from_form = ['format', 'stacks_per_line', 'stack_width', 
        'alphabet', 'unit_name', 'first_index', 'logo_start','logo_end',
         'composition', 
        'show_errorbars', 'logo_title', 'logo_label', 'show_xaxis', 
        'xaxis_label',
        'show_yaxis', 'yaxis_label', 'yaxis_scale', 'yaxis_tic_interval',
        'show_ends', 'show_fineprint', 'scale_width']
    
    
    errors = []
    for optname in options_from_form :
        try :
            value =  form[optname].get_value()
            if value!=None : setattr(logooptions, optname, value)
        except ValueError as err :
            errors.append(err.args)            

    
    # Construct custom color scheme
    custom = ColorScheme()
    for i in range(0,5) :
        color = form["color%d"%i].get_value()
        symbols = form["symbols%d"%i].get_value()
        desc = form["desc%d"%i].get_value() 

        if color :
            try :
                custom.groups.append(weblogolib.ColorGroup(symbols, color, desc))
            except ValueError as e:
                errors.append( ('color%d'%i, "Invalid color: %s" % color) )
    
    if form["color_scheme"].value == 'color_custom' :
        logooptions.color_scheme =  custom
    else :
        try :
            logooptions.color_scheme = form["color_scheme"].get_value()
        except ValueError as err:
            errors.append(err.args)            

    sequences = None

    # FIXME: Ugly fix: Must check that sequence_file key exists
    # FIXME: Sending malformed or missing form keys should not cause a crash
    # sequences_file = form["sequences_file"]
    if "sequences_file" in form_values:
        sequences = form_values.getvalue("sequences_file") 
        #assert type(sequences) == str

    if not sequences or len(sequences)  ==0:
        sequences = form["sequences"].get_value()
    
    if not sequences or len(sequences)  ==0:
        errors.append( ("sequences", "Please enter a multiple-sequence alignment in the box above, or select a file to upload."))
  


    # If we have uncovered errors or we want the chance to edit the logo 
    # ("cmd_edit" command from examples page) then we return the form now.
    # We do not proceed to the time consuming logo creation step unless
    # required by a 'create' or 'validate' command, and no errors have been
    # found yet.
    if errors or "cmd_edit" in form_values:
        send_form(controls, errors, htdocs_directory)
        return    
 
        
    try :
        comp = form["composition"].get_value()
        percentCG = form["percentCG"].get_value()
        ignore_lower_case = ("ignore_lower_case" in form_values)
        if comp == 'percentCG':
            comp = str(percentCG / 100)

        from corebio.matrix import Motif
         
        try:
            # Try reading data in transfac format first. 
            # TODO Refactor this code 
            motif = Motif.read_transfac(StringIO( sequences), alphabet=logooptions.alphabet)
            prior = weblogolib.parse_prior( comp,motif.alphabet)  
            data = weblogolib.LogoData.from_counts(motif.alphabet, motif, prior)          
        except ValueError as motif_err:
            seqs = weblogolib.read_seq_data(StringIO( sequences), 
                                        alphabet=logooptions.alphabet,
                                        ignore_lower_case=ignore_lower_case
                                        )
            prior = weblogolib.parse_prior(comp, seqs.alphabet)
            data = weblogolib.LogoData.from_seqs(seqs, prior) 
            
        logoformat =  weblogolib.LogoFormat(data, logooptions)
        format = form["format"].value
        logo = weblogolib.formatters[format](data, logoformat)            
    except ValueError as err:
        errors.append(err.args)
    except IOError as err:
        errors.append(err.args)
    except RuntimeError as err:
        errors.append(err.args)

    if errors or "cmd_validate" in form_values:
        send_form(controls, errors, htdocs_directory)
        return

    #
    #  RETURN LOGO OVER HTTP
    #

    print("Content-Type:", mime_type[format])
    # Content-Disposition: inline       Open logo in browser window
    # Content-Disposition: attachment   Download logo
    if "download" in form_values:
        print('Content-Disposition: attachment; ' \
              'filename="logo.%s"' % extension[format])
    else:
        print('Content-Disposition: inline; ' \
              'filename="logo.%s"' % extension[format])
    # Separate header from data
    print()
    # Finally, and at last, send the logo.

    if sys.version_info[0] >= 3:
        sys.stdout.buffer.write(logo)
    else: 
        sys.stdout.write(logo)
示例#24
0
文件: _cli.py 项目: snehamitra/NPLB
    except ValueError, motif_err:
        # Failed reading Motif, try reading as multiple sequence data.
        seqs = read_seq_data(fin,
                             options.input_parser.read,
                             alphabet=options.alphabet,
                             ignore_lower_case=options.ignore_lower_case)

    if motif_flag:
        if options.ignore_lower_case:
            raise ValueError(
                "error: option --ignore-lower-case incompatible with matrix input"
            )
        if options.reverse: motif.reverse()
        if options.complement: motif.complement()

        prior = parse_prior(options.composition, motif.alphabet,
                            options.weight)
        data = LogoData.from_counts(motif.alphabet, motif, prior)
    else:
        if options.reverse:
            seqs = SeqList([s.reverse() for s in seqs], seqs.alphabet)

        if options.complement:
            seqs = SeqList([Seq(s, seqs.alphabet).complement() for s in seqs],
                           seqs.alphabet)

        prior = parse_prior(options.composition, seqs.alphabet, options.weight)
        data = LogoData.from_seqs(seqs, prior)

    return data

示例#25
0
 # handle any errors. Once the "Content-Type:" header has been sent
 # we can't send any useful feedback
 logo = StringIO()
 try :
     comp = form["composition"].get_value()
     percentCG = form["percentCG"].get_value()
     ignore_lower_case = form_values.has_key("ignore_lower_case") 
     if comp=='percentCG': comp = str(percentCG/100)     
     
     from corebio.matrix import Motif
      
     try:
         # Try reading data in transfac format first. 
         # TODO Refactor this code 
         motif = Motif.read_transfac(StringIO( sequences), alphabet=logooptions.alphabet)
         prior = weblogolib.parse_prior( comp,motif.alphabet)  
         data = weblogolib.LogoData.from_counts(motif.alphabet, motif, prior)          
     except ValueError, motif_err :
         seqs = weblogolib.read_seq_data(StringIO( sequences), 
                                     alphabet=logooptions.alphabet,
                                     ignore_lower_case=ignore_lower_case
                                     )
         prior = weblogolib.parse_prior(comp, seqs.alphabet)                        
         data = weblogolib.LogoData.from_seqs(seqs, prior) 
         
     logoformat =  weblogolib.LogoFormat(data, logooptions)
     format = form["format"].value
     weblogolib.formatters[format](data, logoformat, logo)            
 except ValueError, err :
     errors.append( err.args )
 except IOError, err :
示例#26
0
def main(htdocs_directory=None):

    logooptions = weblogolib.LogoOptions()

    # A list of form fields.
    # The default for checkbox values must be False (irrespective of
    # the default in logooptions) since a checked checkbox returns 'true'
    # but an unchecked checkbox returns nothing.
    controls = [
        Field('sequences', ''),
        Field(
            'format',
            'png',
            weblogolib.formatters.get,
            options=[
                'png_print', 'png', 'jpeg', 'eps', 'pdf', 'svg', 'logodata'
            ],  #TODO: Should copy list from __init__.formatters
            errmsg="Unknown format option."),
        Field('stacks_per_line',
              logooptions.stacks_per_line,
              int,
              errmsg='Invalid number of stacks per line.'),
        Field('stack_width',
              'medium',
              weblogolib.std_sizes.get,
              options=['small', 'medium', 'large'],
              errmsg='Invalid logo size.'),
        Field('alphabet',
              'alphabet_auto',
              alphabets.get,
              options=[
                  'alphabet_auto', 'alphabet_protein', 'alphabet_dna',
                  'alphabet_rna'
              ],
              errmsg="Unknown sequence type."),
        Field('unit_name',
              'bits',
              options=[
                  'probability', 'bits', 'nats', 'kT', 'kJ/mol', 'kcal/mol'
              ]),
        Field('first_index', 1, int_or_none),
        Field('logo_start', '', int_or_none),
        Field('logo_end', '', int_or_none),
        Field('composition',
              'comp_auto',
              composition.get,
              options=[
                  'comp_none', 'comp_auto', 'comp_equiprobable', 'comp_CG',
                  'comp_Celegans', 'comp_Dmelanogaster', 'comp_Ecoli',
                  'comp_Hsapiens', 'comp_Mmusculus', 'comp_Scerevisiae'
              ],
              errmsg="Illegal sequence composition."),
        Field('percentCG', '', float_or_none, errmsg="Invalid CG percentage."),
        Field('show_errorbars', False, truth),
        Field('logo_title', logooptions.logo_title),
        Field('logo_label', logooptions.logo_label),
        Field('show_xaxis', False, truth),
        Field('xaxis_label', logooptions.xaxis_label),
        Field('show_yaxis', False, truth),
        Field('yaxis_label', logooptions.yaxis_label, string_or_none),
        Field('yaxis_scale',
              logooptions.yaxis_scale,
              float_or_none,
              errmsg="The yaxis scale must be a positive number."),
        Field('yaxis_tic_interval', logooptions.yaxis_tic_interval,
              float_or_none),
        Field('show_ends', False, truth),
        Field('show_fineprint', False, truth),
        Field('color_scheme',
              'color_auto',
              color_schemes.get,
              options=color_schemes.keys(),
              errmsg='Unknown color scheme'),
        Field('color0', ''),
        Field('symbols0', ''),
        Field('desc0', ''),
        Field('color1', ''),
        Field('symbols1', ''),
        Field('desc1', ''),
        Field('color2', ''),
        Field('symbols2', ''),
        Field('desc2', ''),
        Field('color3', ''),
        Field('symbols3', ''),
        Field('desc3', ''),
        Field('color4', ''),
        Field('symbols4', ''),
        Field('desc4', ''),
        Field('ignore_lower_case', False, truth),
        Field('scale_width', False, truth),
    ]

    form = {}
    for c in controls:
        form[c.name] = c

    form_values = cgilib.FieldStorage()

    # Send default form?
    if len(form_values) == 0 or "cmd_reset" in form_values:
        # Load default truth values now.
        form['show_errorbars'].value = logooptions.show_errorbars
        form['show_xaxis'].value = logooptions.show_xaxis
        form['show_yaxis'].value = logooptions.show_yaxis
        form['show_ends'].value = logooptions.show_ends
        form['show_fineprint'].value = logooptions.show_fineprint
        form['scale_width'].value = logooptions.scale_width

        send_form(controls, htdocs_directory=htdocs_directory)
        return

    # Get form content
    for c in controls:
        c.value = form_values.getfirst(c.name, c.default)

    options_from_form = [
        'format', 'stacks_per_line', 'stack_width', 'alphabet', 'unit_name',
        'first_index', 'logo_start', 'logo_end', 'composition',
        'show_errorbars', 'logo_title', 'logo_label', 'show_xaxis',
        'xaxis_label', 'show_yaxis', 'yaxis_label', 'yaxis_scale',
        'yaxis_tic_interval', 'show_ends', 'show_fineprint', 'scale_width'
    ]

    errors = []
    for optname in options_from_form:
        try:
            value = form[optname].get_value()
            if value != None: setattr(logooptions, optname, value)
        except ValueError as err:
            errors.append(err.args)

    # Construct custom color scheme
    custom = ColorScheme()
    for i in range(0, 5):
        color = form["color%d" % i].get_value()
        symbols = form["symbols%d" % i].get_value()
        desc = form["desc%d" % i].get_value()

        if color:
            try:
                custom.groups.append(
                    weblogolib.ColorGroup(symbols, color, desc))
            except ValueError as e:
                errors.append(('color%d' % i, "Invalid color: %s" % color))

    if form["color_scheme"].value == 'color_custom':
        logooptions.color_scheme = custom
    else:
        try:
            logooptions.color_scheme = form["color_scheme"].get_value()
        except ValueError as err:
            errors.append(err.args)

    sequences = None

    # FIXME: Ugly fix: Must check that sequence_file key exists
    # FIXME: Sending malformed or missing form keys should not cause a crash
    # sequences_file = form["sequences_file"]
    if "sequences_file" in form_values:
        sequences = form_values.getvalue("sequences_file")
        #assert type(sequences) == str

    if not sequences or len(sequences) == 0:
        sequences = form["sequences"].get_value()

    if not sequences or len(sequences) == 0:
        errors.append((
            "sequences",
            "Please enter a multiple-sequence alignment in the box above, or select a file to upload."
        ))

    # If we have uncovered errors or we want the chance to edit the logo
    # ("cmd_edit" command from examples page) then we return the form now.
    # We do not proceed to the time consuming logo creation step unless
    # required by a 'create' or 'validate' command, and no errors have been
    # found yet.
    if errors or "cmd_edit" in form_values:
        send_form(controls, errors, htdocs_directory)
        return

    try:
        comp = form["composition"].get_value()
        percentCG = form["percentCG"].get_value()
        ignore_lower_case = ("ignore_lower_case" in form_values)
        if comp == 'percentCG':
            comp = str(percentCG / 100)

        from corebio.matrix import Motif

        try:
            # Try reading data in transfac format first.
            # TODO Refactor this code
            motif = Motif.read_transfac(StringIO(sequences),
                                        alphabet=logooptions.alphabet)
            prior = weblogolib.parse_prior(comp, motif.alphabet)
            data = weblogolib.LogoData.from_counts(motif.alphabet, motif,
                                                   prior)
        except ValueError as motif_err:
            seqs = weblogolib.read_seq_data(
                StringIO(sequences),
                alphabet=logooptions.alphabet,
                ignore_lower_case=ignore_lower_case)
            prior = weblogolib.parse_prior(comp, seqs.alphabet)
            data = weblogolib.LogoData.from_seqs(seqs, prior)

        logoformat = weblogolib.LogoFormat(data, logooptions)
        format = form["format"].value
        logo = weblogolib.formatters[format](data, logoformat)
    except ValueError as err:
        errors.append(err.args)
    except IOError as err:
        errors.append(err.args)
    except RuntimeError as err:
        errors.append(err.args)

    if errors or "cmd_validate" in form_values:
        send_form(controls, errors, htdocs_directory)
        return

    #
    #  RETURN LOGO OVER HTTP
    #

    print("Content-Type:", mime_type[format])
    # Content-Disposition: inline       Open logo in browser window
    # Content-Disposition: attachment   Download logo
    if "download" in form_values:
        print('Content-Disposition: attachment; ' \
              'filename="logo.%s"' % extension[format])
    else:
        print('Content-Disposition: inline; ' \
              'filename="logo.%s"' % extension[format])
    # Separate header from data
    print()
    # Finally, and at last, send the logo.

    if sys.version_info[0] >= 3:
        sys.stdout.buffer.write(logo)
    else:
        sys.stdout.write(logo)
示例#27
0
def LogoPlot(sites, datatype, data, plotfile, nperline, numberevery=10, allowunsorted=False, ydatamax=1.01, overlay=None, fix_limits={}, fixlongname=False, overlay_cmap=None, ylimits=None, relativestackheight=1, custom_cmap='jet', map_metric='kd', noseparator=False):
    """Constructs a sequence logo showing amino-acid or nucleotide preferences.

    The heights of each letter is equal to the preference of
    that site for that amino acid or nucleotide.

    Note that stop codons may or may not be included in the logo
    depending on whether they are present in *pi_d*.  

    CALLING VARIABLES:

    * *sites* is a list of all of the sites that are being included
      in the logo, as strings. They must be in natural sort order (as
      is done by *dms_tools.utils.NaturalSort*) or an error will
      be raised **unless** *allowunsorted* is *True*. The sites
      in the plot are ordered in the same arrangement
      listed in *sites*. These should be **strings**, not integers.

    * *datatype* should be one of the following strings:
    
        * 'prefs' for preferences
        
        * 'diffprefs' for differential preferences
        
        * 'diffsel' for differential selection

    * *data* is a dictionary that has a key for every entry in
      *sites*. For every site *r* in *sites*, *sites[r][x]*
      is the value for character *x*. 
      Preferences must sum to one; differential preferences to zero.
      All sites must have the same set of characters. The characters
      must be the set of nucleotides (*dms_tools.nts*)
      or the set of amino acids with or without stop codons
      (*dms_tools.aminoacids_nostop* or *dms_tools.aminoacids_withstop*).

    * *plotfile* is a string giving the name of the created PDF file 
      of the logo plot.
      It must end in the extension ``.pdf``.

    * *nperline* is the number of sites per line. Often 40 to 80 are good values.

    * *numberevery* is specifies how frequently we put labels for the sites on
      x-axis.

    * *allowunsorted* : if *True* then we allow the entries in *sites* to 
      **not** be sorted. This means that the logo plot will **not** have
      sites in sorted order.

    * *ydatamax* : meaningful only if *datatype* is 'diffprefs'. In this case, it gives
      the maximum that the logo stacks extend in the positive and negative directions.
      Cannot be smaller than the maximum extent of the differential preferences.

    * *ylimits*: is **mandatory** if *datatype* is 'diffsel', and meaningless 
      otherwise. It is *(ymin, ymax)* where *ymax > 0 > ymin*, and gives extent 
      of the data in the positive and negative directions. Must encompass the 
      actual maximum and minimum of the data.

    * *overlay* : this argument allows you to make overlay bars that indicated
      other properties for the sites. By default, this option is *None*, meaning that
      no overlay is created. If you set it to something else, it must be a list
      giving either one or two properties. Each property is a tuple:
      *(prop_d, shortname, longname)* where:

        - *prop_d* is a dictionary keyed by site numbers that are in *sites*.
          For each *r* in *sites*, *prop_d[r]* gives the value of the property,
          or if there is no entry in *prop_d* for *r*, then the property
          is undefined and is colored white. Properties can either be:

            * continuous: in this case, all of the values should be numbers.

            * discrete : in this case, all of the values should be strings.
              While in practice, if you have more than a few discrete
              categories (different strings), the plot will be a mess.

        - *shortname* : short name for the property; will not format well
          if more than 4 or 5 characters.

        - *longname* : longer name for property used on axes label. Can be the
          same as *shortname* if you don't need a different long name.

    * *fix_limits* is only meaningful if *overlay* is being used. In this case, for any
      *shortname* in *overlay* that also keys an entry in *fix_limits*, we use
      *fix_limits[shortname]* to set the limits for *shortname*. Specifically,
      *fix_limits[shortname]* should be the 2-tuple *(ticks, ticknames)*. *ticks*
      should be a list of tick locations (numbers) and *ticknames* should be a list of
      the corresponding tick label for that tick.

    * If *fixlongname* is *True*, then we use the *longname* in *overlay* exactly as written;
      otherwise we add a parenthesis indicating the *shortname* for which this *longname*
      stands.

    * *overlay_cmap* can be the name of a valid *matplotlib.colors.Colormap*, such as the
      string *jet* or *bwr*. Otherwise, it can be *None* and a (hopefully) good choice will 
      be made for you.

    * *custom_cmap* can be the name of a valid *matplotlib.colors.Colormap* which will be
      used to color amino-acid one-letter codes in the logoplot by the *map_metric* when
      either 'kd' or 'mw' is used as *map_metric*.

    * *relativestackheight* indicates how high the letter stack is relative to
      the default. The default is multiplied by this number, so make it > 1
      for a higher letter stack.

    * *map_metric* specifies the amino-acid property metric used to map colors to amino-acid
      letters. Valid options are 'kd' (Kyte-Doolittle hydrophobicity scale, default), 'mw' 
      (molecular weight), 'functionalgroup' (functional groups: small, nucleophilic, hydrophobic,
      aromatic, basic, acidic, and amide), and 'charge' (charge at neutral pH). If 'charge' is used, then the
      argument for *custom_cmap* will no longer be meaningful, since 'charge' uses its own 
      blue/black/red colormapping. Similarly, 'functionalgroup' uses its own colormapping.

    * *noseparator* is only meaningful if *datatype* is 'diffsel' or 'diffprefs'.
      If it set to *True*, then we do **not** print a black horizontal line to
      separate positive and negative values.
    """
    assert datatype in ['prefs', 'diffprefs', 'diffsel'], "Invalid datatype {0}".format(datatype)

    # check data, and get characters
    assert sites, "No sites specified"
    assert set(sites) == set(data.keys()), "Not a match between sites and the keys of data"
    characters = list(data[sites[0]].keys())
    if set(characters) == set(dms_tools.nts):
        alphabet_type = 'nt'
    elif set(characters) == set(dms_tools.aminoacids_nostop) or set(characters) == set(dms_tools.aminoacids_withstop):
        alphabet_type = 'aa'
    else:
        raise ValueError("Invalid set of characters in data. Does not specify either nucleotides or amino acids:\n%s" % str(characters))
    for r in sites:
        if set(data[r].keys()) != set(characters):
            raise ValueError("Not all sites in data have the same set of characters")

    firstblankchar = 'B' # character for first blank space for diffprefs / diffsel
    assert firstblankchar not in characters, "firstblankchar in characters"
    lastblankchar = 'b' # character for last blank space for diffprefs / diffsel
    assert lastblankchar not in characters, "lastblankchar in characters"
    separatorchar = '-' # separates positive and negative for diffprefs / diffsel
    assert separatorchar not in characters, "lastblankchar in characters"
    if noseparator:
        separatorheight = 0
    else:
        separatorheight = 0.02 # height of separator as frac of total for diffprefs / diffsel

    if os.path.splitext(plotfile)[1].lower() != '.pdf':
        raise ValueError("plotfile must end in .pdf: %s" % plotfile)
    if os.path.isfile(plotfile):
        os.remove(plotfile) # remove existing plot

    if not allowunsorted:
        sorted_sites = [r for r in sites]
        dms_tools.utils.NaturalSort(sorted_sites)
        if sorted_sites != sites:
            raise ValueError("sites is not properly sorted")

    # Following are specifications of weblogo sizing taken from its documentation
    stackwidth = 9.5 # stack width in points, not default size of 10.8, but set to this in weblogo call below
    barheight = 5.5 # height of bars in points if using overlay
    barspacing = 2.0 # spacing between bars in points if using overlay
    stackaspectratio = 4.4 # ratio of stack height:width, doesn't count part going over maximum value of 1
    assert relativestackheight > 0, "relativestackheight must be > 0"
    stackaspectratio *= relativestackheight
    if overlay:
        if not (1 <= len(overlay) <= 3):
            raise ValueError("overlay must be a list of between one and three entries; instead it had %d entries" % len(overlay))
        ymax = (stackaspectratio * stackwidth + len(overlay) * (barspacing + barheight)) / float(stackaspectratio * stackwidth)
        aspectratio = ymax * stackaspectratio # effective aspect ratio for full range
    else:
        ymax = 1.0 
        aspectratio = stackaspectratio
    rmargin = 11.5 # right margin in points, fixed by weblogo
    stackheightmargin = 16 # margin between stacks in points, fixed by weblogo

    try:
        # write data into transfacfile (a temporary file)
        (fd, transfacfile) = tempfile.mkstemp()
        f = os.fdopen(fd, 'w')
        ordered_alphabets = {} # keyed by site index (0, 1, ...) with values ordered lists for characters from bottom to top
        if datatype == 'prefs':
            chars_for_string = characters
            f.write('ID ID\nBF BF\nP0 %s\n' % ' '.join(chars_for_string))
            for (isite, r) in enumerate(sites):
                f.write('%d %s\n' % (isite, ' '.join([str(data[r][x]) for x in characters])))
                pi_r = [(data[r][x], x) for x in characters]
                pi_r.sort()
                ordered_alphabets[isite] = [tup[1] for tup in pi_r] # order from smallest to biggest
        elif datatype == 'diffprefs':
            chars_for_string = characters + [firstblankchar, lastblankchar, separatorchar]
            ydatamax *= 2.0 # maximum possible range of data, multiply by two for range
            f.write('ID ID\nBF BF\nP0 %s\n' % ' '.join(chars_for_string))
            for (isite, r) in enumerate(sites):
                positivesum = sum([data[r][x] for x in characters if data[r][x] > 0]) + separatorheight / 2.0
                negativesum = sum([data[r][x] for x in characters if data[r][x] < 0]) - separatorheight / 2.0
                if abs(positivesum + negativesum) > 1.0e-3:
                    raise ValueError("Differential preferences sum of %s is not close to zero for site %s" % (positivesum + negativesum, r))
                if 2.0 * positivesum > ydatamax:
                    raise ValueError("You need to increase ydatamax: the total differential preferences sum to more than the y-axis limits. Right now, ydatamax is %.3f while the total differential preferences are %.3f" % (ydatamax, 2.0 * positivesum))
                f.write('%d' % isite)
                deltapi_r = []
                for x in characters:
                    deltapi_r.append((data[r][x], x))
                    f.write(' %s' % (abs(data[r][x]) / float(ydatamax)))
                deltapi_r.sort()
                firstpositiveindex = 0
                while deltapi_r[firstpositiveindex][0] < 0:
                    firstpositiveindex += 1
                ordered_alphabets[isite] = [firstblankchar] + [tup[1] for tup in deltapi_r[ : firstpositiveindex]] + [separatorchar] + [tup[1] for tup in deltapi_r[firstpositiveindex : ]] + [lastblankchar] # order from most negative to most positive with blank characters and separators
                f.write(' %g %g %g\n' % (0.5 * (ydatamax + 2.0 * negativesum) / ydatamax, 0.5 * (ydatamax + 2.0 * negativesum) / ydatamax, separatorheight)) # heights for blank charactors and separators
        elif datatype == 'diffsel':
            assert ylimits, "You must specify ylimits if using diffsel"
            (dataymin, dataymax) = ylimits
            assert dataymax > 0 > dataymin, "Invalid ylimits of {0}".format(ylimits)
            yextent = float(dataymax - dataymin)
            separatorheight *= yextent
            chars_for_string = characters + [firstblankchar, lastblankchar, separatorchar]
            f.write('ID ID\nBF BF\nP0 {0}\n'.format(' '.join(chars_for_string)))
            for (isite, r) in enumerate(sites):
                positivesum = sum([data[r][x] for x in characters if data[r][x] > 0]) + separatorheight / 2.0
                negativesum = sum([data[r][x] for x in characters if data[r][x] < 0]) - separatorheight / 2.0
                assert positivesum <= dataymax, "Data exceeds ylimits in positive direction"
                assert negativesum >= dataymin, "Data exceeds ylimits in negative direction"
                f.write('{0}'.format(isite))
                diffsel_r = []
                for x in characters:
                    diffsel_r.append((data[r][x], x))
                    f.write(' {0}'.format(abs(data[r][x]) / yextent))
                diffsel_r.sort()
                firstpositiveindex = 0
                while diffsel_r[firstpositiveindex][0] < 0:
                    firstpositiveindex += 1
                ordered_alphabets[isite] = [firstblankchar] + [tup[1] for tup in diffsel_r[ : firstpositiveindex]] + [separatorchar] + [tup[1] for tup in diffsel_r[firstpositiveindex : ]] + [lastblankchar] # order from most negative to most positive with blank characters and separators
                f.write(' %g %g %g\n' % ((negativesum - dataymin) / yextent, (dataymax - positivesum) / yextent, separatorheight / yextent)) # heights for blank charactors and separators
        else:
            raise ValueError("Invalid datatype of %s" % datatype)
        f.close()

        # create web logo
        charstring = ''.join(chars_for_string)
        assert len(charstring) == len(chars_for_string), "Length of charstring doesn't match length of chars_for_string. Do you have unallowable multi-letter characters?\n%s" % (str(chars_for_string))
        logoprior = weblogolib.parse_prior('equiprobable', charstring, 0)
        motif = _my_Motif.read_transfac(open(transfacfile), charstring)
        logodata = weblogolib.LogoData.from_counts(motif.alphabet, motif, logoprior)
        logo_options = weblogolib.LogoOptions()
        logo_options.fineprint = None
        logo_options.stacks_per_line = nperline
        logo_options.stack_aspect_ratio = aspectratio
        logo_options.stack_width = stackwidth
        logo_options.unit_name = 'probability'
        logo_options.show_yaxis = False
        logo_options.yaxis_scale = ymax 
        if alphabet_type == 'aa':
            map_functions = {'kd':KyteDoolittleColorMapping,
                             'mw': MWColorMapping,
                             'charge' : ChargeColorMapping,
                             'functionalgroup':FunctionalGroupColorMapping}
            map_fcn = map_functions[map_metric]
            (cmap, colormapping, mapper) = map_fcn(maptype=custom_cmap)
        elif alphabet_type == 'nt':
            colormapping = {}
            colormapping['A'] = '#008000'
            colormapping['T'] = '#FF0000'
            colormapping['C'] = '#0000FF'
            colormapping['G'] = '#FFA500'
        else:
            raise ValueError("Invalid alphabet_type %s" % alphabet_type)
        colormapping[firstblankchar] = colormapping[lastblankchar] = '#000000' # black, but color doesn't matter as modified weblogo code replaces with empty space
        colormapping[separatorchar] = '#000000' # black
        color_scheme = weblogolib.colorscheme.ColorScheme()
        for x in chars_for_string:
            if hasattr(color_scheme, 'rules'):
                color_scheme.rules.append(weblogolib.colorscheme.SymbolColor(x, colormapping[x], "'%s'" % x))
            else:
                # this part is needed for weblogo 3.4
                color_scheme.groups.append(weblogolib.colorscheme.ColorGroup(x, colormapping[x], "'%s'" % x))
        logo_options.color_scheme = color_scheme
        logo_options.annotate = [{True:r, False:''}[0 == isite % numberevery] for (isite, r) in enumerate(sites)]
        logoformat = weblogolib.LogoFormat(logodata, logo_options)
        # _my_pdf_formatter is modified from weblogo version 3.4 source code
        # to allow custom ordering of the symbols.
        pdf = _my_pdf_formatter(logodata, logoformat, ordered_alphabets) 
        with open(plotfile, 'wb') as f:
            f.write(pdf)
        assert os.path.isfile(plotfile), "Failed to find expected plotfile %s" % plotfile
    finally:
        # close if still open
        try:
            f.close()
        except:
            pass
        # remove temporary file
        if os.path.isfile(transfacfile):
            os.remove(transfacfile)

    # now build the overlay
    if overlay:
        try:
            (fdoverlay, overlayfile) = tempfile.mkstemp(suffix='.pdf')
            (fdmerged, mergedfile) = tempfile.mkstemp(suffix='.pdf')
            foverlay = os.fdopen(fdoverlay, 'wb')
            foverlay.close() # close, but we still have the path overlayfile...
            fmerged = os.fdopen(fdmerged, 'wb')
            LogoOverlay(sites, overlayfile, overlay, nperline, sitewidth=stackwidth, rmargin=rmargin, logoheight=stackwidth * stackaspectratio + stackheightmargin, barheight=barheight, barspacing=barspacing, fix_limits=fix_limits, fixlongname=fixlongname, overlay_cmap=overlay_cmap)
            plotfile_f = open(plotfile, 'rb')
            plot = PyPDF2.PdfFileReader(plotfile_f).getPage(0)
            overlayfile_f = open(overlayfile, 'rb')
            overlay = PyPDF2.PdfFileReader(overlayfile_f).getPage(0)
            xshift = overlay.artBox[2] - plot.artBox[2]
            overlay.mergeTranslatedPage(plot, xshift, 0)
            overlay.compressContentStreams() 
            output = PyPDF2.PdfFileWriter()
            output.addPage(overlay)
            output.write(fmerged)
            fmerged.close()
            shutil.move(mergedfile, plotfile)
        finally:
            try:
                plotfile_f.close()
            except:
                pass
            try:
                overlayfile_f.close()
            except:
                pass
            try:
                foverlay.close()
            except:
                pass
            try:
                fmerged.close()
            except:
                pass
            for fname in [overlayfile, mergedfile]:
                if os.path.isfile(fname):
                    os.remove(fname)