Пример #1
0
def get_motif_scores(fa, motifs):
    s = Scanner()
    s.set_motifs(motifs)
    s.set_threshold(threshold=0.0)
    seqs = Fasta(fa.seqfn)
    for i, result in enumerate(s.scan(seqs, nreport=1)):
        intron_id = seqs.ids[i]
        for m, matches in enumerate(result):
            motif = motifs[m]
            for score, pos, strand in matches:
                if score < 0:
                    score_rescaled = rescale(score,
                                             orig_range=[motif.min_score, 0],
                                             new_range=[0, 50])
                else:
                    score_rescaled = rescale(score,
                                             orig_range=[0, motif.max_score],
                                             new_range=[50, 100])
                yield (intron_id, motif.id, score_rescaled)
Пример #2
0
def command_scan(inputfile, pwmfile, nreport=1, cutoff=0.9, bed=False, 
        scan_rc=True, table=False, score_table=False, moods=False, 
        pvalue=None, bgfile=None, genome=None):
    motifs = pwmfile_to_motifs(pwmfile)
    
    index_dir = None
    if genome is not None:
        index_dir = os.path.join(MotifConfig().get_index_dir(), genome) 
    
    # initialize scanner
    s = Scanner()
    s.set_motifs(pwmfile)
    
    fa = as_fasta(inputfile, index_dir)
    
    if moods:
        result_it = scan_it_moods(inputfile, motifs, cutoff, bgfile, nreport, scan_rc, pvalue, table)
    else:
        result_it = s.scan(fa, nreport, scan_rc, cutoff)

    
    if table:
        # header
        yield "\t{}".format("\t".join([m.id for m in motifs]))
        
        if moods:
            result_it = scan_it_moods(inputfile, motifs, cutoff, bgfile,  nreport, scan_rc, pvalue, table)
            for seq_id, counts in result_it:
                yield "{}\t{}".format(seq_id, "\t".join([str(x) for x in counts]))
        else:
            # get iterator
            result_it = s.count(fa, nreport, scan_rc, cutoff)
            # counts table
            for i, counts in enumerate(result_it):
                yield "{}\t{}".format(
                        fa.ids[i], 
                        "\t".join([str(x) for x in counts])
                        )

    elif score_table:
        # get iterator
        result_it = s.best_score(fa, scan_rc)
        # header
        yield "\t{}".format("\t".join([m.id for m in motifs]))
        # score table
        for i,scores in enumerate(result_it):
            yield "{}\t{}".format(
                    fa.ids[i], 
                    "\t".join([str(x) for x in scores])
                    )

    else:
        if moods:
            for motif, d in result_it:
                for seq_id,matches in d.items():
                    for pos,score,strand in matches:
                        yield format_line(fa, seq_id, motif,
                                score, pos, strand, bed=bed)
        else:
            for i, result in enumerate(result_it):
                seq_id = fa.ids[i]
                for motif, matches in zip(motifs, result):
                    for (score, pos, strand) in matches:
                        yield format_line(fa, seq_id, motif, 
                                   score, pos, strand, bed=bed)