def get_motif_scores(fa, motifs): s = Scanner() s.set_motifs(motifs) s.set_threshold(threshold=0.0) seqs = Fasta(fa.seqfn) for i, result in enumerate(s.scan(seqs, nreport=1)): intron_id = seqs.ids[i] for m, matches in enumerate(result): motif = motifs[m] for score, pos, strand in matches: if score < 0: score_rescaled = rescale(score, orig_range=[motif.min_score, 0], new_range=[0, 50]) else: score_rescaled = rescale(score, orig_range=[0, motif.max_score], new_range=[50, 100]) yield (intron_id, motif.id, score_rescaled)
def command_scan(inputfile, pwmfile, nreport=1, cutoff=0.9, bed=False, scan_rc=True, table=False, score_table=False, moods=False, pvalue=None, bgfile=None, genome=None): motifs = pwmfile_to_motifs(pwmfile) index_dir = None if genome is not None: index_dir = os.path.join(MotifConfig().get_index_dir(), genome) # initialize scanner s = Scanner() s.set_motifs(pwmfile) fa = as_fasta(inputfile, index_dir) if moods: result_it = scan_it_moods(inputfile, motifs, cutoff, bgfile, nreport, scan_rc, pvalue, table) else: result_it = s.scan(fa, nreport, scan_rc, cutoff) if table: # header yield "\t{}".format("\t".join([m.id for m in motifs])) if moods: result_it = scan_it_moods(inputfile, motifs, cutoff, bgfile, nreport, scan_rc, pvalue, table) for seq_id, counts in result_it: yield "{}\t{}".format(seq_id, "\t".join([str(x) for x in counts])) else: # get iterator result_it = s.count(fa, nreport, scan_rc, cutoff) # counts table for i, counts in enumerate(result_it): yield "{}\t{}".format( fa.ids[i], "\t".join([str(x) for x in counts]) ) elif score_table: # get iterator result_it = s.best_score(fa, scan_rc) # header yield "\t{}".format("\t".join([m.id for m in motifs])) # score table for i,scores in enumerate(result_it): yield "{}\t{}".format( fa.ids[i], "\t".join([str(x) for x in scores]) ) else: if moods: for motif, d in result_it: for seq_id,matches in d.items(): for pos,score,strand in matches: yield format_line(fa, seq_id, motif, score, pos, strand, bed=bed) else: for i, result in enumerate(result_it): seq_id = fa.ids[i] for motif, matches in zip(motifs, result): for (score, pos, strand) in matches: yield format_line(fa, seq_id, motif, score, pos, strand, bed=bed)