def __call__(self, fasta): "Run the method." start_time = time.time() ensure_dir_exists(self.options.output_dir) predictions = [] # run MEME self.meme_cmd_args, self.stdoutdata, self.starts, self.Zs, self.thetas, self.lambdas = run_meme( fasta, self.options) # parse output from Bio import Motif for motif in Motif.parse(open(os.path.join(self.options.output_dir, 'meme.txt')), "MEME"): for instance in motif.instances: # MEME parser seems to count from 1, not 0 start = instance.start - 1 prediction = instance.sequence_name, Interval( start, start + motif.length), instance.strand == '-' predictions.append(prediction) logger.info('MEME took %.1f seconds', time.time() - start_time) return predictions
def run_dataset(method_name, suite_name, data_set, fasta, options): "Run data set." logger = logging.getLogger() logger.setLevel(logging.INFO) options.output_dir = output_dir_for_dataset(suite_name, data_set) ensure_dir_exists(options.output_dir) handler = logging.FileHandler( os.path.join(options.output_dir, '%s.log' % method_name)) handler.setLevel(logging.INFO) handler.setFormatter( logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")) logger.addHandler(handler) logging.info('%s is analysing data set: %s', method_name, data_set) predictions = method_for_name(method_name).Algorithm(options)(fasta) logging.info('%s predicted %d binding sites.', method_name, len(predictions)) predictions = list(rationalise_predictions(predictions)) return predictions
W], label='STEME W=%d' % W, ls='-', color=colour) P.loglog(fasta_sizes, meme_timings[ W], label='MEME W=%d' % W, ls='-.', color=colour) P.legend(loc='upper left') P.xlabel('\\# bases in data set') P.ylabel('seconds') P.savefig(os.path.join(output_dir, 'timings.eps')) P.savefig(os.path.join(output_dir, 'timings.png')) P.close() # # do the timings. # pylab_utils.set_rcParams_for_latex() stem_timings = DictOf(list) meme_timings = DictOf(list) fasta_sizes = [] for fasta in fastas: for W in Ws: options.min_w = options.max_w = W options.output_dir = os.path.join( output_dir, 'W=%02d-%s' % (W, stempy.basename_wo_ext(fasta))) stempy.ensure_dir_exists(options.output_dir) stem_algorithm = stem.Algorithm(options) meme_algorithm = meme.Algorithm(options) stem_timings[W].append(timeit(lambda: stem_algorithm(fasta))) meme_timings[W].append(timeit(lambda: meme_algorithm(fasta))) fasta_sizes.append(stem_algorithm.num_bases) save_timings()
# for each number of sites to use for num_sites in [ 2, 5, 10, 20, 50 ]: # don't try if we don't have enough sequences if num_sites > num_seqs: continue options.min_num_sites = options.max_num_sites = num_sites # for each width for min_w, max_w in [ # ( 6, 6), # ( 8, 8), # (12, 12), (6, 16), # (16, 16), ]: options.min_w = min_w options.max_w = max_w options.output_dir = dir_for_options(options, fasta) ensure_dir_exists(options.output_dir) logging.info( 'Comparing MEME to STEM. Output dir=%s', options.output_dir) meme_algorithm, stem_algorithm = compare_meme_stem(options, fasta)
def create_figures(motifs, occs, by_motif, seq_infos, options): """Create figures. """ from stempy import ensure_dir_exists ensure_dir_exists(os.path.join(options.results_dir, 'scan-stats')) # Size of figlegend if len(motifs) > 30: size = 6 elif len(motifs) > 16: size = 8 elif len(motifs) > 10: size = 10 else: size = 12 figlegendprops = {'size': size} # Format cycler for line plots format_cycler = create_format_cycler( linestyle=['--', '-.', '-', ':'], c=("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")) # Format cycler for marker plots # format_cycler_marker = create_format_cycler( # marker=simple_marker_styles, # c=("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", # "#D55E00", "#CC79A7")) # Scan scores pylab.figure(figsize=(6, 4)) lines = plot_scores_per_motif(motifs, by_motif, format_cycler) savefig('scan-scores', options) pylab.close() # Scan legend pylab.figure(figsize=(4.25, 4)) pylab.figlegend(lines, motifs, 'center', prop=figlegendprops) savefig('scan-legend', options) pylab.close() # Best Z for each motif/sequence combination pylab.figure(figsize=(6, 4)) best_Z = calculate_motif_best_Z_per_sequence( motifs, by_motif, len(seq_infos)) plot_best_Z(motifs, best_Z) savefig('scan-best-Z', options) pylab.close() # Scan motif cooccurrences pylab.figure(figsize=(6, 4)) # pylab.figlegend(lines, motifs, 'center') plot_collinearity(motifs, best_Z) savefig('scan-collinearity', options) pylab.close() # Scan positions pylab.figure(figsize=(6, 4)) lines = plot_site_positions(motifs, occs, by_motif, seq_infos, format_cycler) savefig('scan-positions', options) pylab.close() # Scan legend with all pylab.figure(figsize=(4.25, 4)) pylab.figlegend( lines, ['ALL MOTIFS'] + motifs, 'center', prop=figlegendprops) savefig('scan-legend-with-all', options) pylab.close() # Sequence coverage pylab.figure(figsize=(6, 4)) plot_seq_coverage(best_Z, format_cycler) savefig('scan-seq-coverage', options) pylab.close() # Scan sequences pylab.figure(figsize=(6, 4)) lines = plot_seq_distribution(motifs, by_motif, seq_infos, format_cycler) savefig('scan-sequences', options) pylab.close() # Scan legend with markers # fig = pylab.figure(figsize=(4.25, 4)) # pylab.figlegend(lines, motifs, 'center', prop=figlegendprops) # savefig('scan-legend-marker', options) # pylab.close() # Scan lengths pylab.figure(figsize=(6, 4)) plot_seq_lengths(seq_infos) savefig('scan-lengths', options) pylab.close() # Scan occurrences by motif pylab.figure(figsize=(6, len(by_motif) / 4.)) pylab.subplots_adjust(left=.3, bottom=.1, right=.96, top=.98) plot_occs_by_motif(by_motif) savefig('scan-occs-by-motif', options) pylab.close()