Пример #1
0
def process(in_filename, out_filename):
    in_file = open(in_filename)
    out_file = open(out_filename, 'w')

    header, sequence = utils.load(in_file)
    in_file.close()

    out_file.write(header + '\n')
    out_file.write('\n'.join(utils.prepare_subsequences(utils.translate(sequence), 80)))

    out_file.close()
Пример #2
0
def create_from_sequence(sequence):
    states = list(set(sequence))
    observations = list(set([utils.translate(g) for g in states]))

    hmm = HMM(states, observations)

    gene_count = {g: {h: 0.0 for h in states} for g in states}

    for i, g in enumerate(sequence[:-1]):
        gene_count[g][sequence[i + 1]] += 1.0

    def normalize(value_dict):
        n = float(sum(value_dict.values()))
        if n == 0:
            n = 1
        return {k: v / n for k, v in value_dict.items()}

    gene_count = {k: normalize(v) for k, v in gene_count.items()}
    emissions = {g: {o: 1.0 if utils.translate(g) == o else 0.0 for o in observations} for g in states}
    hmm.emissions = emissions
    hmm.transitions = gene_count

    return hmm
Пример #3
0
 def check_translation(gene, aminoacid):
     assert utils.translate(gene) == aminoacid