Exemplo n.º 1
0
def test_stability_and_speed(fasta, seed, num_sites, score, epsilon, options):
    import stempy
    import logging
    from cookbook.timer import Timer
    algorithm = stempy.Algorithm(options)
    algorithm.initialise(fasta)
    start = stempy.Start(
        seed=seed, num_sites=num_sites, score=score, model=None)
    options.epsilon = epsilon
    logging.debug(
        'Testing seed=%s; num_sites=%d epsilon=%f; data size=%d, fasta=%s',
        seed, num_sites, epsilon, algorithm.data.num_W_mers(len(seed)), fasta
    )
    algorithm.index.visit()  # pre-visit index to make sure is built correctly.
    with Timer(msg='run EM') as timer:
        em_result = algorithm.run_em_from_start(start)
    _duration = timer.duration
    _post_EM_consensus = stempy.consensus_from_pssm(
        em_result.model.bs.pssm.log_probs.values())
    return em_result.em_duration, em_result.cons_after_em, len(em_result.LLs)
Exemplo n.º 2
0
#
# Set up the options
#
fasta_file = os.path.join(fasta_dir(), "random-seqs-two-motifs.fasta")
options = stempy.get_default_options()
options.output_dir = os.path.join("output", "test-2-motifs")
options.min_w = 8
options.max_w = 10
options.num_motifs = 2
options.meme_like_output = "two-motif-test-meme.txt"
meme_output = os.path.join(options.output_dir, options.meme_like_output)


#
# Run the STEME algorithm
#
algorithm = stempy.Algorithm(options)
algorithm(fasta_file)

#
# Make sure we can parse output with 2 motifs in it
#
predicted_sites = parse_meme_output_for_sites(meme_output)

#
# Calculate the consensuses for the 2 motifs
#
consensuses = [stempy.consensus_from_pssm(motif.model.bs.pssm.log_probs.values()) for motif in algorithm.motifs]
assert consensuses[0] == "AAACTCACTC" or stempy.reverse_complement(consensuses[0]) == "AAACTCACTC", consensuses[0]
assert consensuses[1] == "AACCTGTG" or stempy.reverse_complement(consensuses[1]) == "AACCTGTG", consensuses[1]