def nmm_example(): abc = BaseAlphabet.create(b"ACGU", b"X") basep = BaseLprob.create(abc, (log(0.25), log(0.25), log(0.25), log(0.25))) codonp = CodonLprob.create(abc) codonp.set_lprob(Codon.create(b"AUG", abc), log(0.8)) codonp.set_lprob(Codon.create(b"AUU", abc), log(0.1)) B = MuteState.create(b"B", abc) M1 = FrameState.create(b"M1", basep, CodonMarg.create(codonp), 0.02) M2 = FrameState.create(b"M2", basep, CodonMarg.create(codonp), 0.01) E = MuteState.create(b"E", abc) hmm = HMM.create(abc) hmm.add_state(B, log(0.5)) hmm.add_state(M1) hmm.add_state(M2) hmm.add_state(E) hmm.set_transition(B, M1, log(0.8)) hmm.set_transition(B, M2, log(0.2)) hmm.set_transition(M1, M2, log(0.1)) hmm.set_transition(M1, E, log(0.4)) hmm.set_transition(M2, E, log(0.3)) dp = hmm.create_dp(E) return {"hmm": hmm, "dp": dp, "alphabet": abc}
def _create_base_table(codonp: CodonLprob): base_abc = codonp.alphabet base_lprob = {base: lprob_zero() for base in base_abc.symbols} norm = log(3) for codon in codon_iter(base_abc): lprob = codonp.get_lprob(codon) triplet = codon.symbols base_lprob[triplet[0]] = lprob_add(base_lprob[triplet[0]], lprob - norm) base_lprob[triplet[1]] = lprob_add(base_lprob[triplet[1]], lprob - norm) base_lprob[triplet[2]] = lprob_add(base_lprob[triplet[2]], lprob - norm) assert len(base_lprob) == 4 bases = base_abc.symbols assert len(bases) == 4 return BaseLprob.create( base_abc, ( base_lprob[bases[0]], base_lprob[bases[1]], base_lprob[bases[2]], base_lprob[bases[3]], ), )
def test_hmm(): abc = BaseAlphabet.create(b"ACGU", b"X") baset = BaseLprob.create(abc, (log(0.25), log(0.25), log(0.25), log(0.25))) codonp = CodonLprob.create(abc) codonp.set_lprob(Codon.create(b"AUG", abc), log(0.8)) codonp.set_lprob(Codon.create(b"AUU", abc), log(0.1)) B = MuteState.create(b"B", abc) M1 = FrameState.create(b"M1", baset, CodonMarg.create(codonp), 0.02) M2 = FrameState.create(b"M2", baset, CodonMarg.create(codonp), 0.01) E = MuteState.create(b"E", abc) hmm = HMM.create(abc) hmm.add_state(B, log(0.5)) hmm.add_state(M1) hmm.add_state(M2) hmm.add_state(E) hmm.set_transition(B, M1, log(0.8)) hmm.set_transition(B, M2, log(0.2)) hmm.set_transition(M1, M2, log(0.1)) hmm.set_transition(M1, E, log(0.4)) hmm.set_transition(M2, E, log(0.3)) dp = hmm.create_dp(E) task = DPTask.create(dp) task.setup(Sequence.create(b"AUGAUU", abc)) result = dp.viterbi(task) loglik = hmm.loglikelihood(task.sequence, result.path) assert_allclose(loglik, -7.069201008427531)
def test_frame_state(): base = BaseAlphabet.create(b"ACGU", b"X") basep = BaseLprob.create(base, (log(0.25), log(0.25), log(0.25), log(0.25))) codonp = CodonLprob.create(base) codonp.set_lprob(Codon.create(b"AUG", base), log(0.8)) codonp.set_lprob(Codon.create(b"AUU", base), log(0.1)) frame_state = FrameState.create(b"M1", basep, CodonMarg.create(codonp), 0.0) assert lprob_is_zero(frame_state.lprob(Sequence.create(b"AUA", base))) assert_allclose(frame_state.lprob(Sequence.create(b"AUG", base)), log(0.8)) assert_allclose(frame_state.lprob(Sequence.create(b"AUU", base)), log(0.1)) assert lprob_is_zero(frame_state.lprob(Sequence.create(b"AU", base))) assert lprob_is_zero(frame_state.lprob(Sequence.create(b"A", base))) assert lprob_is_zero(frame_state.lprob(Sequence.create(b"AUUA", base))) assert lprob_is_zero(frame_state.lprob(Sequence.create(b"AUUAA", base))) codonp.normalize() frame_state = FrameState.create(b"M1", basep, CodonMarg.create(codonp), 0.1) assert_allclose(frame_state.lprob(Sequence.create(b"AUA", base)), -6.905597115665666) assert_allclose(frame_state.lprob(Sequence.create(b"AUG", base)), -0.5347732882047062, rtol=1e-6) assert_allclose(frame_state.lprob(Sequence.create(b"AUU", base)), -2.5902373304999466, rtol=1e-6) assert_allclose(frame_state.lprob(Sequence.create(b"AU", base)), -2.9158434238698336) assert_allclose(frame_state.lprob(Sequence.create(b"A", base)), -5.914503505971854) assert_allclose(frame_state.lprob(Sequence.create(b"AUUA", base)), -6.881032208841384) assert_allclose(frame_state.lprob(Sequence.create(b"AUUAA", base)), -12.08828960987379) assert lprob_is_zero(frame_state.lprob(Sequence.create(b"AUUAAA", base))) lprob, codon = frame_state.decode(Sequence.create(b"AUA", base)) assert_allclose(lprob, -7.128586690537968) assert codon.symbols == b"AUG" lprob, codon = frame_state.decode(Sequence.create(b"AUAG", base)) assert_allclose(lprob, -4.813151489562624) assert codon.symbols == b"AUG" lprob, codon = frame_state.decode(Sequence.create(b"A", base)) assert_allclose(lprob, -6.032286541628237) assert codon.symbols == b"AUG" lprob, codon = frame_state.decode(Sequence.create(b"UUU", base)) assert_allclose(lprob, -8.110186062956258) assert codon.symbols == b"AUU"
def test_codon_state(): base = BaseAlphabet.create(b"ACGU", b"X") codonp = CodonLprob.create(base) codonp.set_lprob(Codon.create(b"AUG", base), log(0.8)) codonp.set_lprob(Codon.create(b"AUU", base), log(0.1)) state = CodonState.create(b"M1", codonp) assert state.name == b"M1" assert_allclose(state.lprob(Sequence.create(b"AUG", base)), log(0.8)) assert_allclose(state.lprob(Sequence.create(b"AUU", base)), log(0.1)) assert_allclose(state.lprob(Sequence.create(b"ACU", base)), -inf)
def test_codon_marg(): base = BaseAlphabet.create(b"ACGT", b"X") codonp = CodonLprob.create(base) codonp.set_lprob(Codon.create(b"AAA", base), log(0.01)) codonp.set_lprob(Codon.create(b"AGA", base), log(0.31)) codonp.set_lprob(Codon.create(b"CAA", base), log(0.40)) codonp.set_lprob(Codon.create(b"CAT", base), log(0.40)) codonm = CodonMarg.create(codonp) assert_allclose(codonm.lprob(Codon.create(b"CAT", base)), log(0.40)) assert_allclose(codonm.lprob(Codon.create(b"CAX", base)), log(0.80), rtol=1e-6) assert_allclose(codonm.lprob(Codon.create(b"XXX", base)), log(1.12), rtol=1e-6)
def test_codon_lprob(): base = BaseAlphabet.create(b"ACGT", b"X") codonp = CodonLprob.create(base) with pytest.raises(RuntimeError): codonp.normalize() codonp.set_lprob(Codon.create(b"AAA", base), log(0.01)) assert_allclose(codonp.get_lprob(Codon.create(b"AAA", base)), log(0.01)) codonp.normalize() assert_allclose(codonp.get_lprob(Codon.create(b"AAA", base)), log(1.0)) codonp.set_lprob(Codon.create(b"AAA", base), log(0.01)) assert_allclose(codonp.get_lprob(Codon.create(b"AAA", base)), log(0.01)) assert lprob_is_zero(codonp.get_lprob(Codon.create(b"ACA", base))) with pytest.raises(RuntimeError): codonp.get_lprob(Codon.create(b"AXA", base))
def _create_codon_prob(aminot: AminoLprob, gencode: CodonTable) -> CodonLprob: codonp = CodonLprob.create(gencode.base_alphabet) codon_lprobs = [] lprob_norm = lprob_zero() for i in range(len(aminot.alphabet.symbols)): aa = aminot.alphabet.symbols[i:i + 1] lprob = aminot.lprob(aa) codons = gencode.codons(aa) if len(codons) == 0: continue norm = log(len(codons)) for codon in codons: codon_lprobs.append((codon, lprob - norm)) lprob_norm = lprob_add(lprob_norm, codon_lprobs[-1][1]) for codon, lprob in codon_lprobs: codonp.set_lprob(codon, lprob - lprob_norm) return codonp