Exemplo n.º 1
0
def main(args, letter_to_emission, trans, emiss):
    if args.ticks and args.pbar:
        raise ValueError('it is silly to use both ticks and a progress bar')
    if args.pbar:
        pbar = progress.Bar(1 + 1 + args.n)
    else:
        pbar = None
    with open(args.fasta) as fin:
        raw_observations = fasta_to_raw_observations(fin.readlines())
        arr = [letter_to_emission[c] for c in raw_observations]
        observations = np.array(arr, dtype=np.int8)
    if pbar:
        pbar.increment()
    model = estimation.FiniteModel(trans, emiss, observations)
    summary = Summary(model, pbar, args.ticks)
    estimation.baum_welch(
        model.update, model.get_expectations, trans, emiss, args.n,
        callback=summary.after_baum_welch)
    summary_text = summary.finish()
    suffix = args.common_suffix
    if args.summary:
        with open(args.summary + suffix, 'w') as fout:
            print >> fout, summary_text
    if args.log_likelihoods:
        with open(args.log_likelihoods + suffix, 'w') as fout:
            print >> fout, '\n'.join('%f' % x for x in summary.log_likelihoods)
    if args.posterior:
        hmm.pretty_print_posterior(raw_observations,
                model.get_posterior(), 60, args.posterior + suffix)
    if args.posterior_decoding:
        hmm.pretty_print_posterior_decoding(raw_observations,
                model.get_posterior(), 60, args.posterior_decoding + suffix)
Exemplo n.º 2
0
def parallel_worker(params):
    """
    @param params: a bunch of parameter packed into a sequence
    """
    # unpack the params
    user_params, hmm_params = params
    f_in, d_out, niterations = user_params
    letter_to_emission, trans, emiss = hmm_params
    trans = np.array(trans)
    emiss = np.array(emiss)
    # define output path names
    suffix = '.' + os.path.basename(f_in) + ('.n%d.txt' % niterations)
    summary_name = os.path.join(d_out, 'summary' + suffix)
    log_likelihoods_name = os.path.join(d_out, 'log.likelihood' + suffix)
    posterior_name = os.path.join(d_out, 'soft.decoding' + suffix)
    posterior_decoding_name = os.path.join(d_out, 'hard.decoding' + suffix)
    # read the fasta file
    with open(f_in) as fin:
        raw_observations = fasta_to_raw_observations(fin.readlines())
        arr = [letter_to_emission[c] for c in raw_observations]
        observations = np.array(arr, dtype=np.int8)
    # do the baum welch stuff
    model = estimation.FiniteModel(trans, emiss, observations)
    summary = Summary(model, None, None)
    estimation.baum_welch(
        model.update, model.get_expectations, trans, emiss, niterations,
        callback=summary.after_baum_welch)
    summary_text = summary.finish()
    # write the output files
    with open(summary_name, 'wt') as fout:
        print >> fout, summary_text
    with open(log_likelihoods_name, 'wt') as fout:
        print >> fout, '\n'.join('%f' % x for x in summary.log_likelihoods)
    hmm.pretty_print_posterior(raw_observations,
            model.get_posterior(), 60, posterior_name)
    hmm.pretty_print_posterior_decoding(raw_observations,
            model.get_posterior(), 60, posterior_decoding_name)