示例#1
0
文件: meme.py 项目: JohnReid/STEME
    def __call__(self, fasta):
        "Run the method."
        start_time = time.time()

        ensure_dir_exists(self.options.output_dir)

        predictions = []

        # run MEME
        self.meme_cmd_args, self.stdoutdata, self.starts, self.Zs, self.thetas, self.lambdas = run_meme(
            fasta, self.options)

        # parse output
        from Bio import Motif
        for motif in Motif.parse(open(os.path.join(self.options.output_dir, 'meme.txt')), "MEME"):
            for instance in motif.instances:
                # MEME parser seems to count from 1, not 0
                start = instance.start - 1
                prediction = instance.sequence_name, Interval(
                    start, start + motif.length), instance.strand == '-'
                predictions.append(prediction)

        logger.info('MEME took %.1f seconds', time.time() - start_time)

        return predictions
示例#2
0
def run_dataset(method_name, suite_name, data_set, fasta, options):
    "Run data set."
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    options.output_dir = output_dir_for_dataset(suite_name, data_set)
    ensure_dir_exists(options.output_dir)
    handler = logging.FileHandler(
        os.path.join(options.output_dir, '%s.log' % method_name))
    handler.setLevel(logging.INFO)
    handler.setFormatter(
        logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"))
    logger.addHandler(handler)
    logging.info('%s is analysing data set: %s', method_name, data_set)
    predictions = method_for_name(method_name).Algorithm(options)(fasta)
    logging.info('%s predicted %d binding sites.',
                 method_name, len(predictions))
    predictions = list(rationalise_predictions(predictions))
    return predictions
示例#3
0
                 W], label='STEME W=%d' % W, ls='-', color=colour)
        P.loglog(fasta_sizes, meme_timings[
                 W], label='MEME W=%d' % W, ls='-.', color=colour)
        P.legend(loc='upper left')
        P.xlabel('\\# bases in data set')
        P.ylabel('seconds')
    P.savefig(os.path.join(output_dir, 'timings.eps'))
    P.savefig(os.path.join(output_dir, 'timings.png'))
    P.close()


#
# do the timings.
#
pylab_utils.set_rcParams_for_latex()
stem_timings = DictOf(list)
meme_timings = DictOf(list)
fasta_sizes = []
for fasta in fastas:
    for W in Ws:
        options.min_w = options.max_w = W
        options.output_dir = os.path.join(
            output_dir, 'W=%02d-%s' % (W, stempy.basename_wo_ext(fasta)))
        stempy.ensure_dir_exists(options.output_dir)
        stem_algorithm = stem.Algorithm(options)
        meme_algorithm = meme.Algorithm(options)
        stem_timings[W].append(timeit(lambda: stem_algorithm(fasta)))
        meme_timings[W].append(timeit(lambda: meme_algorithm(fasta)))
    fasta_sizes.append(stem_algorithm.num_bases)
    save_timings()
示例#4
0
    # for each number of sites to use
    for num_sites in [
        2,
        5,
        10,
        20,
        50
    ]:
        # don't try if we don't have enough sequences
        if num_sites > num_seqs:
            continue

        options.min_num_sites = options.max_num_sites = num_sites

        # for each width
        for min_w, max_w in [
            #                ( 6,  6),
            #                ( 8,  8),
            #                (12, 12),
            (6, 16),
            #                (16, 16),
        ]:
            options.min_w = min_w
            options.max_w = max_w
            options.output_dir = dir_for_options(options, fasta)
            ensure_dir_exists(options.output_dir)
            logging.info(
                'Comparing MEME to STEM. Output dir=%s', options.output_dir)
            meme_algorithm, stem_algorithm = compare_meme_stem(options, fasta)
示例#5
0
文件: scan.py 项目: JohnReid/STEME
def create_figures(motifs, occs, by_motif, seq_infos, options):
    """Create figures.
    """

    from stempy import ensure_dir_exists
    ensure_dir_exists(os.path.join(options.results_dir, 'scan-stats'))

    # Size of figlegend
    if len(motifs) > 30:
        size = 6
    elif len(motifs) > 16:
        size = 8
    elif len(motifs) > 10:
        size = 10
    else:
        size = 12
    figlegendprops = {'size': size}

    # Format cycler for line plots
    format_cycler = create_format_cycler(
        linestyle=['--', '-.', '-', ':'],
        c=("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2",
           "#D55E00", "#CC79A7"))

    # Format cycler for marker plots
    # format_cycler_marker = create_format_cycler(
    #    marker=simple_marker_styles,
    #    c=("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2",
    #       "#D55E00", "#CC79A7"))

    # Scan scores
    pylab.figure(figsize=(6, 4))
    lines = plot_scores_per_motif(motifs, by_motif, format_cycler)
    savefig('scan-scores', options)
    pylab.close()

    # Scan legend
    pylab.figure(figsize=(4.25, 4))
    pylab.figlegend(lines, motifs, 'center', prop=figlegendprops)
    savefig('scan-legend', options)
    pylab.close()

    # Best Z for each motif/sequence combination
    pylab.figure(figsize=(6, 4))
    best_Z = calculate_motif_best_Z_per_sequence(
        motifs, by_motif, len(seq_infos))
    plot_best_Z(motifs, best_Z)
    savefig('scan-best-Z', options)
    pylab.close()

    # Scan motif cooccurrences
    pylab.figure(figsize=(6, 4))
    # pylab.figlegend(lines, motifs, 'center')
    plot_collinearity(motifs, best_Z)
    savefig('scan-collinearity', options)
    pylab.close()

    # Scan positions
    pylab.figure(figsize=(6, 4))
    lines = plot_site_positions(motifs, occs, by_motif, seq_infos,
                                format_cycler)
    savefig('scan-positions', options)
    pylab.close()

    # Scan legend with all
    pylab.figure(figsize=(4.25, 4))
    pylab.figlegend(
        lines, ['ALL MOTIFS'] + motifs, 'center', prop=figlegendprops)
    savefig('scan-legend-with-all', options)
    pylab.close()

    # Sequence coverage
    pylab.figure(figsize=(6, 4))
    plot_seq_coverage(best_Z, format_cycler)
    savefig('scan-seq-coverage', options)
    pylab.close()

    # Scan sequences
    pylab.figure(figsize=(6, 4))
    lines = plot_seq_distribution(motifs, by_motif, seq_infos, format_cycler)
    savefig('scan-sequences', options)
    pylab.close()

    # Scan legend with markers
    # fig = pylab.figure(figsize=(4.25, 4))
    # pylab.figlegend(lines, motifs, 'center', prop=figlegendprops)
    # savefig('scan-legend-marker', options)
    # pylab.close()

    # Scan lengths
    pylab.figure(figsize=(6, 4))
    plot_seq_lengths(seq_infos)
    savefig('scan-lengths', options)
    pylab.close()

    # Scan occurrences by motif
    pylab.figure(figsize=(6, len(by_motif) / 4.))
    pylab.subplots_adjust(left=.3, bottom=.1, right=.96, top=.98)
    plot_occs_by_motif(by_motif)
    savefig('scan-occs-by-motif', options)
    pylab.close()