示例#1
0
文件: mosaic.py 项目: JohnReid/biopsy
    logging.basicConfig(level=logging.INFO)

    max_mosaics = 15
    max_order = 5
    me = evaluate_mosaics(max_mosaics=max_mosaics, max_order=max_order)
    for i in range(max_order):
        e=me[i*max_mosaics:(i+1)*max_mosaics]
        plot([x[1] for x in e], [x[2] for x in e])
    xlabel('# mosaics')
    ylabel('LL')
    title('Evaluation of mosaic models of various Markov orders')
    savefig('mosaic-evaluation.png', format='PNG')
    raise

    # load our sequences
    sequences = convert_fasta_sequences(fasta_file_for_fragment('T00671'))

    # build our model
    model_by_states = create_mosaic_model(
      num_mosaics=1,
      p_transition=0.,
      alphabet_size=4,
      order=2,
      dirichlet_prior_strength=10.
    )
    model = hmm.as_model(model_by_states)
    print model.B

    # convert our sequences to the correct order
    sequences_order_n = [model.converter.to_order_n(s) for s in sequences]
示例#2
0
    logging.basicConfig(level=logging.INFO)

    max_mosaics = 15
    max_order = 5
    me = evaluate_mosaics(max_mosaics=max_mosaics, max_order=max_order)
    for i in range(max_order):
        e = me[i * max_mosaics:(i + 1) * max_mosaics]
        plot([x[1] for x in e], [x[2] for x in e])
    xlabel('# mosaics')
    ylabel('LL')
    title('Evaluation of mosaic models of various Markov orders')
    savefig('mosaic-evaluation.png', format='PNG')
    raise

    # load our sequences
    sequences = convert_fasta_sequences(fasta_file_for_fragment('T00671'))

    # build our model
    model_by_states = create_mosaic_model(num_mosaics=1,
                                          p_transition=0.,
                                          alphabet_size=4,
                                          order=2,
                                          dirichlet_prior_strength=10.)
    model = hmm.as_model(model_by_states)
    print model.B

    # convert our sequences to the correct order
    sequences_order_n = [model.converter.to_order_n(s) for s in sequences]

    #from IPython.Debugger import Pdb; Pdb().set_trace()
    def callback(LL):
# Copyright John Reid 2008
#

"""
Code to generate negative test sequences for those fragments in the test harness.
"""

from gapped_pssms.data import fasta_file_for_fragment, test_set_fragments
import sys


def sequences_from_fasta(fasta):
    """Yields sequences from fasta file."""
    import corebio.seq_io.fasta_io
    from itertools import imap

    return imap(
        lambda s: s.strip("nN"), imap(str, corebio.seq_io.fasta_io.iterseq(open(fasta, "r"), corebio.seq.dna_alphabet))
    )


for fragment in test_set_fragments:
    seqs = list(sequences_from_fasta(fasta_file_for_fragment(fragment)))
    seq_length = max(len(s) for s in seqs)
    num_seqs = len(seqs)
    sys.argv = (
        "generate_negative_test_sequences.py -m ..\..\Python\%s-bg-model.pickle -n %d -l %d -o negative-%s.fa"
        % (fragment, num_seqs, seq_length, fragment)
    ).split()
    execfile("generate_negative_test_sequences.py")
示例#4
0
#
option_parser = OptionParser()
add_algorithm_options(option_parser)
logging.info('Command line: %s', ' '.join(sys.argv))
options, args = option_parser.parse_args()

log_filename = os.path.join(options.output_dir, '%s.log' % options.tag)
logging.getLogger('').addHandler(logging.FileHandler(log_filename))
logging.info('Writing log to %s', log_filename)
for option in option_parser.option_list:
    if option.dest:
        logging.info('%s: %s (%s)', option.dest,
                     str(getattr(options, option.dest)), option.help)

#inputs = [('K10-g0.50-N200-L200-seed4-1', fasta_file_for_synthetic_data('K10-g0.50-N200-L200-seed4-1'))]
inputs = [(fragment, fasta_file_for_fragment(fragment))
          for fragment in test_set_fragments]

# for each input sequence
for seq_tag, fasta_file in inputs:
    # add a file handler to log for this test set
    file_handler = logging.FileHandler(
        os.path.join(options.output_dir, '%s.log' % seq_tag))
    logging.getLogger('').addHandler(file_handler)
    try:
        sequences = convert_fasta_sequences(fasta_file)
        #sequences = [s[:200] for s in sequences[:10]]

        # set up the options for this test set
        options.tag = seq_tag
        options.bg_model_filename = "%s-bg-model.pickle" % seq_tag
示例#5
0
# Parse the options
#
option_parser = OptionParser()
add_algorithm_options(option_parser)
logging.info('Command line: %s', ' '.join(sys.argv))
options, args = option_parser.parse_args()

log_filename = os.path.join(options.output_dir, '%s.log' % options.tag)
logging.getLogger('').addHandler(logging.FileHandler(log_filename))
logging.info('Writing log to %s', log_filename)
for option in option_parser.option_list:
    if option.dest:
        logging.info('%s: %s (%s)', option.dest, str(getattr(options, option.dest)), option.help)

#inputs = [('K10-g0.50-N200-L200-seed4-1', fasta_file_for_synthetic_data('K10-g0.50-N200-L200-seed4-1'))]
inputs = [(fragment, fasta_file_for_fragment(fragment)) for fragment in test_set_fragments]

# for each input sequence
for seq_tag, fasta_file in inputs:
    # add a file handler to log for this test set
    file_handler = logging.FileHandler(os.path.join(options.output_dir, '%s.log' % seq_tag))
    logging.getLogger('').addHandler(file_handler)
    try:
        sequences = convert_fasta_sequences(fasta_file)
        #sequences = [s[:200] for s in sequences[:10]]

        # set up the options for this test set
        options.tag = seq_tag
        options.bg_model_filename = "%s-bg-model.pickle" % seq_tag

        # Run the algorithm