예제 #1
0
def weblogoPOIM(logofile, poim, max_len):
    """instead of plotting the POIM heatmap, create a weblogo from the 1st-degree poim"""
    warnings.filterwarnings('ignore', ' This call to matplotlib.use()*')
    from  corebio.data import rna_letters, dna_letters, amino_acid_letters
    from weblogolib import LogoData, LogoOptions, LogoFormat, classic, png_print_formatter
    #print "WEBLOGO!"
    #print "Writing ", logofile
    #print poim[0]
    positive_logo = []
    negative_logo = []
    for i in xrange(len(poim[0])):
        positive_logo.append([])
        negative_logo.append([])
        for j in xrange(len(poim[0][i])):
            if poim[0][i][j] < 0:
                positive_logo[i].append(0)
                negative_logo[i].append(poim[0][i][j] * -10000)
            else:
                negative_logo[i].append(0)
                positive_logo[i].append(poim[0][i][j] * 1000)
    #print "Positive logo: ", positive_logo
    #print "Negative logo: ", negative_logo
    pos_data = LogoData.from_counts('ACGT', numpy.array(positive_logo).T, None)
    neg_data = LogoData.from_counts("ACGT", numpy.array(negative_logo).T, None)
    neg_opt = LogoOptions()
    neg_opt.fineprint += " from KIRMES POIM data"
    #logoopt.number_interval = 5
    neg_opt.small_fontsize = 4 
    neg_opt.title_fontsize = 8
    neg_opt.scale_width = False
    title = os.path.split(logofile)[1]
    title = title[:title.rfind(".")]
    if "_" in title:
        title = title[title.rfind("_") + 1:]
    neg_opt.logo_title = title + " Negative Logo"
    neg_format = LogoFormat(neg_data, neg_opt)
    pos_opt = LogoOptions()
    #pos_opt.show_ends = True
    pos_opt.scale_width = False
    pos_opt.logo_title = title + " Positive Sequence Logo"
    pos_opt.show_fineprint = False
    pos_opt.color_scheme = classic
    pos_format = LogoFormat(pos_data, pos_opt)
    neg_logo = open(logofile + "n.png", 'w')
    png_print_formatter(neg_data, neg_format, neg_logo)
    neg_logo.close()
    pos_logo = open(logofile + "p.png", 'w')
    png_print_formatter(pos_data, pos_format, pos_logo)
    pos_logo.close()
    concatPNG(logofile, (logofile + "p.png", logofile + "n.png"))
    os.remove(logofile + "n.png")
    os.remove(logofile + "p.png")
예제 #2
0
    def create_logo(self, seqs=[]):
        """Create sequence logo for input sequences."""
        # seperate headers
        headers, instances = [list(x)
                              for x in zip(*seqs)]

        if self.options.sequence_type is 'rna':
            alphabet = Alphabet('ACGU')
        elif self.options.sequence_type is 'protein':
            alphabet = Alphabet('ACDEFGHIKLMNPQRSTVWY')
        else:
            alphabet = Alphabet('AGCT')
        motif_corebio = SeqList(alist=instances, alphabet=alphabet)
        data = wbl.LogoData().from_seqs(motif_corebio)

        format = wbl.LogoFormat(data, self.options)

        if self.output_format == 'png':
            return wbl.png_formatter(data, format)
        elif self.output_format == 'png_print':
            return wbl.png_print_formatter(data, format)
        elif self.output_format == 'jpeg':
            return wbl.jpeg_formatter(data, format)
        else:
            return wbl.eps_formatter(data, format)
예제 #3
0
def generate_logos(motifs, foldername, filetype='png'):
    options = wl.LogoOptions()
    options.color_scheme = wl.std_color_schemes["chemistry"]

    for i, motif in enumerate(motifs):
        if motif:
            my_format = wl.LogoFormat(motif.data, options)
            if 'png' in filetype:
                to_write = wl.png_print_formatter(motif.data, my_format)
                foo = open(
                    os.path.join(foldername,
                                 str(i) + '_' + str(len(motif.seqs)) + ".png"),
                    "wb")
                foo.write(to_write)
                foo.close()
            if 'pdf' in filetype:
                to_write = wl.pdf_formatter(motif.data, my_format)
                foo = open(
                    os.path.join(foldername,
                                 str(i) + '_' + str(len(motif.seqs)) + ".pdf"),
                    "wb")
                foo.write(to_write)
                foo.close()
            if 'txt' in filetype:
                to_write = ''.join(["%s\n" % str(seq) for seq in motif.seqs])
                foo = open(
                    os.path.join(foldername,
                                 str(i) + '_' + str(len(motif.seqs)) + ".txt"),
                    "w")
                foo.write(to_write)
                foo.close()
            else:
                raise ValueError(
                    'Invalid filetype. Available options: png, pdf or txt. ')
    def create_logo(self, seqs=[]):
        """Create sequence logo for input sequences."""
        # seperate headers
        headers, instances = [list(x)
                              for x in zip(*seqs)]

        if self.options.sequence_type is 'rna':
            alphabet = Alphabet('ACGU')
        elif self.options.sequence_type is 'protein':
            alphabet = Alphabet('ACDEFGHIKLMNPQRSTVWY')
        else:
            alphabet = Alphabet('AGCT')
        motif_corebio = SeqList(alist=instances, alphabet=alphabet)
        data = wbl.LogoData().from_seqs(motif_corebio)

        format = wbl.LogoFormat(data, self.options)

        if self.output_format == 'png':
            return wbl.png_formatter(data, format)
        elif self.output_format == 'png_print':
            return wbl.png_print_formatter(data, format)
        elif self.output_format == 'jpeg':
            return wbl.jpeg_formatter(data, format)
        else:
            return wbl.eps_formatter(data, format)
예제 #5
0
def create_logo(input_seqs_fname, logo_fname, options):
    """
    Create a logo plot png using weblogo from a fasta created with write_logo_input()
    """
    with open(input_seqs_fname, "rU") as f:
        seqs = w.read_seq_data(f)
    data = w.LogoData.from_seqs(seqs)

    subprocess.check_call(("rm " + input_seqs_fname).split())

    format = w.LogoFormat(data, options)
    with open(logo_fname, "w") as f:
        f.write(w.png_print_formatter(data, format))
예제 #6
0
    def write_weblogo(self, filepath):
        matrix_tuple = []
        for distribution in self.values:
            matrix_tuple.append(tuple(distribution))

        dataArray = np.array(tuple(matrix_tuple))

        alph = Alphabet(''.join(self.alphabet))

        weblogoData = LogoData.from_counts(alph, dataArray)
        weblogoOptions = LogoOptions(color_scheme=classic)
        weblogoOptions.title = "PWM"
        weblogoFormat = LogoFormat(weblogoData, weblogoOptions)
        weblogo_file = open(filepath, 'w')
        weblogo_file.write(png_print_formatter(weblogoData, weblogoFormat))
        weblogo_file.close()
예제 #7
0
    with open(args.output_base + ".fasta", "rU") as f:
        seqs = w.read_seq_data(f)
    data = w.LogoData.from_seqs(seqs)
    subprocess.check_call(("rm " + args.output_base + ".fasta").split())

    options = w.LogoOptions()
    options.unit_name = "probability"
    options.yaxis_label = "Probability"
    options.xaxis_label = "Site Position"
    options.show_fineprint = False
    options.stacks_per_line = 500
    options.tic_length = 10

    format = w.LogoFormat(data, options)
    with open(args.output_base + ".png", 'w') as f:
        f.write(w.png_print_formatter(data, format))

    aa_naive_seqs_c = Counter(aa_naive_seqs)
    num_trees = len(aa_naive_seqs)
    aa_naive_seqs_d = OrderedDict(
        ("naive_" + str(i) + "_" + str(float(count) / num_trees), seq)
        for i, (seq, count) in enumerate(aa_naive_seqs_c.most_common(None))
    )
    write_to_fasta(aa_naive_seqs_d, args.output_base + ".fasta")

    aa_dna_naive_seqs_d = {}
    for k, g in groupby(naive_seqs, lambda seq: translate(seq)):
        if k in aa_dna_naive_seqs_d:
            aa_dna_naive_seqs_d[k].update(g)
        else:
            aa_dna_naive_seqs_d[k] = Counter(g)